aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs')
-rw-r--r--fs/btrfs/Kconfig1
-rw-r--r--fs/btrfs/backref.c2
-rw-r--r--fs/btrfs/disk-io.c1
-rw-r--r--fs/btrfs/inode.c24
-rw-r--r--fs/btrfs/locking.c9
-rw-r--r--fs/btrfs/ordered-data.c11
-rw-r--r--fs/btrfs/send.c77
-rw-r--r--fs/btrfs/transaction.c32
-rw-r--r--fs/btrfs/transaction.h3
-rw-r--r--fs/btrfs/volumes.c10
10 files changed, 91 insertions, 79 deletions
diff --git a/fs/btrfs/Kconfig b/fs/btrfs/Kconfig
index 212b4a854f2c..38651fae7f21 100644
--- a/fs/btrfs/Kconfig
+++ b/fs/btrfs/Kconfig
@@ -4,6 +4,7 @@ config BTRFS_FS
tristate "Btrfs filesystem support"
select CRYPTO
select CRYPTO_CRC32C
+ select LIBCRC32C
select ZLIB_INFLATE
select ZLIB_DEFLATE
select LZO_COMPRESS
diff --git a/fs/btrfs/backref.c b/fs/btrfs/backref.c
index 89116afda7a2..e5d85311d5d5 100644
--- a/fs/btrfs/backref.c
+++ b/fs/btrfs/backref.c
@@ -1483,7 +1483,7 @@ int btrfs_check_shared(struct btrfs_root *root, u64 inum, u64 bytenr,
ulist_init(roots);
ulist_init(tmp);
- trans = btrfs_attach_transaction(root);
+ trans = btrfs_join_transaction_nostart(root);
if (IS_ERR(trans)) {
if (PTR_ERR(trans) != -ENOENT && PTR_ERR(trans) != -EROFS) {
ret = PTR_ERR(trans);
diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c
index 41a2bd2e0c56..5f7ee70b3d1a 100644
--- a/fs/btrfs/disk-io.c
+++ b/fs/btrfs/disk-io.c
@@ -4106,6 +4106,7 @@ void close_ctree(struct btrfs_fs_info *fs_info)
percpu_counter_destroy(&fs_info->dev_replace.bio_counter);
cleanup_srcu_struct(&fs_info->subvol_srcu);
+ btrfs_free_csum_hash(fs_info);
btrfs_free_stripe_hash_table(fs_info);
btrfs_free_ref_cache(fs_info);
}
diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c
index 1af069a9a0c7..ee582a36653d 100644
--- a/fs/btrfs/inode.c
+++ b/fs/btrfs/inode.c
@@ -395,10 +395,31 @@ static noinline int add_async_extent(struct async_chunk *cow,
return 0;
}
+/*
+ * Check if the inode has flags compatible with compression
+ */
+static inline bool inode_can_compress(struct inode *inode)
+{
+ if (BTRFS_I(inode)->flags & BTRFS_INODE_NODATACOW ||
+ BTRFS_I(inode)->flags & BTRFS_INODE_NODATASUM)
+ return false;
+ return true;
+}
+
+/*
+ * Check if the inode needs to be submitted to compression, based on mount
+ * options, defragmentation, properties or heuristics.
+ */
static inline int inode_need_compress(struct inode *inode, u64 start, u64 end)
{
struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ if (!inode_can_compress(inode)) {
+ WARN(IS_ENABLED(CONFIG_BTRFS_DEBUG),
+ KERN_ERR "BTRFS: unexpected compression for ino %llu\n",
+ btrfs_ino(BTRFS_I(inode)));
+ return 0;
+ }
/* force compress */
if (btrfs_test_opt(fs_info, FORCE_COMPRESS))
return 1;
@@ -1631,7 +1652,8 @@ int btrfs_run_delalloc_range(struct inode *inode, struct page *locked_page,
} else if (BTRFS_I(inode)->flags & BTRFS_INODE_PREALLOC && !force_cow) {
ret = run_delalloc_nocow(inode, locked_page, start, end,
page_started, 0, nr_written);
- } else if (!inode_need_compress(inode, start, end)) {
+ } else if (!inode_can_compress(inode) ||
+ !inode_need_compress(inode, start, end)) {
ret = cow_file_range(inode, locked_page, start, end, end,
page_started, nr_written, 1, NULL);
} else {
diff --git a/fs/btrfs/locking.c b/fs/btrfs/locking.c
index 98fccce4208c..393eceda57c8 100644
--- a/fs/btrfs/locking.c
+++ b/fs/btrfs/locking.c
@@ -346,9 +346,12 @@ void btrfs_tree_unlock(struct extent_buffer *eb)
if (blockers) {
btrfs_assert_no_spinning_writers(eb);
eb->blocking_writers--;
- /* Use the lighter barrier after atomic */
- smp_mb__after_atomic();
- cond_wake_up_nomb(&eb->write_lock_wq);
+ /*
+ * We need to order modifying blocking_writers above with
+ * actually waking up the sleepers to ensure they see the
+ * updated value of blocking_writers
+ */
+ cond_wake_up(&eb->write_lock_wq);
} else {
btrfs_assert_spinning_writers_put(eb);
write_unlock(&eb->lock);
diff --git a/fs/btrfs/ordered-data.c b/fs/btrfs/ordered-data.c
index 1744ba8b2754..ae7f64a8facb 100644
--- a/fs/btrfs/ordered-data.c
+++ b/fs/btrfs/ordered-data.c
@@ -985,13 +985,14 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
struct extent_state **cached_state)
{
struct btrfs_ordered_extent *ordered;
- struct extent_state *cachedp = NULL;
+ struct extent_state *cache = NULL;
+ struct extent_state **cachedp = &cache;
if (cached_state)
- cachedp = *cached_state;
+ cachedp = cached_state;
while (1) {
- lock_extent_bits(tree, start, end, &cachedp);
+ lock_extent_bits(tree, start, end, cachedp);
ordered = btrfs_lookup_ordered_range(inode, start,
end - start + 1);
if (!ordered) {
@@ -1001,10 +1002,10 @@ void btrfs_lock_and_flush_ordered_range(struct extent_io_tree *tree,
* aren't exposing it outside of this function
*/
if (!cached_state)
- refcount_dec(&cachedp->refs);
+ refcount_dec(&cache->refs);
break;
}
- unlock_extent_cached(tree, start, end, &cachedp);
+ unlock_extent_cached(tree, start, end, cachedp);
btrfs_start_ordered_extent(&inode->vfs_inode, ordered, 1);
btrfs_put_ordered_extent(ordered);
}
diff --git a/fs/btrfs/send.c b/fs/btrfs/send.c
index 69b59bf75882..c3c0c064c25d 100644
--- a/fs/btrfs/send.c
+++ b/fs/btrfs/send.c
@@ -6322,68 +6322,21 @@ static int changed_extent(struct send_ctx *sctx,
{
int ret = 0;
- if (sctx->cur_ino != sctx->cmp_key->objectid) {
-
- if (result == BTRFS_COMPARE_TREE_CHANGED) {
- struct extent_buffer *leaf_l;
- struct extent_buffer *leaf_r;
- struct btrfs_file_extent_item *ei_l;
- struct btrfs_file_extent_item *ei_r;
-
- leaf_l = sctx->left_path->nodes[0];
- leaf_r = sctx->right_path->nodes[0];
- ei_l = btrfs_item_ptr(leaf_l,
- sctx->left_path->slots[0],
- struct btrfs_file_extent_item);
- ei_r = btrfs_item_ptr(leaf_r,
- sctx->right_path->slots[0],
- struct btrfs_file_extent_item);
-
- /*
- * We may have found an extent item that has changed
- * only its disk_bytenr field and the corresponding
- * inode item was not updated. This case happens due to
- * very specific timings during relocation when a leaf
- * that contains file extent items is COWed while
- * relocation is ongoing and its in the stage where it
- * updates data pointers. So when this happens we can
- * safely ignore it since we know it's the same extent,
- * but just at different logical and physical locations
- * (when an extent is fully replaced with a new one, we
- * know the generation number must have changed too,
- * since snapshot creation implies committing the current
- * transaction, and the inode item must have been updated
- * as well).
- * This replacement of the disk_bytenr happens at
- * relocation.c:replace_file_extents() through
- * relocation.c:btrfs_reloc_cow_block().
- */
- if (btrfs_file_extent_generation(leaf_l, ei_l) ==
- btrfs_file_extent_generation(leaf_r, ei_r) &&
- btrfs_file_extent_ram_bytes(leaf_l, ei_l) ==
- btrfs_file_extent_ram_bytes(leaf_r, ei_r) &&
- btrfs_file_extent_compression(leaf_l, ei_l) ==
- btrfs_file_extent_compression(leaf_r, ei_r) &&
- btrfs_file_extent_encryption(leaf_l, ei_l) ==
- btrfs_file_extent_encryption(leaf_r, ei_r) &&
- btrfs_file_extent_other_encoding(leaf_l, ei_l) ==
- btrfs_file_extent_other_encoding(leaf_r, ei_r) &&
- btrfs_file_extent_type(leaf_l, ei_l) ==
- btrfs_file_extent_type(leaf_r, ei_r) &&
- btrfs_file_extent_disk_bytenr(leaf_l, ei_l) !=
- btrfs_file_extent_disk_bytenr(leaf_r, ei_r) &&
- btrfs_file_extent_disk_num_bytes(leaf_l, ei_l) ==
- btrfs_file_extent_disk_num_bytes(leaf_r, ei_r) &&
- btrfs_file_extent_offset(leaf_l, ei_l) ==
- btrfs_file_extent_offset(leaf_r, ei_r) &&
- btrfs_file_extent_num_bytes(leaf_l, ei_l) ==
- btrfs_file_extent_num_bytes(leaf_r, ei_r))
- return 0;
- }
-
- inconsistent_snapshot_error(sctx, result, "extent");
- return -EIO;
- }
+ /*
+ * We have found an extent item that changed without the inode item
+ * having changed. This can happen either after relocation (where the
+ * disk_bytenr of an extent item is replaced at
+ * relocation.c:replace_file_extents()) or after deduplication into a
+ * file in both the parent and send snapshots (where an extent item can
+ * get modified or replaced with a new one). Note that deduplication
+ * updates the inode item, but it only changes the iversion (sequence
+ * field in the inode item) of the inode, so if a file is deduplicated
+ * the same amount of times in both the parent and send snapshots, its
+ * iversion becames the same in both snapshots, whence the inode item is
+ * the same on both snapshots.
+ */
+ if (sctx->cur_ino != sctx->cmp_key->objectid)
+ return 0;
if (!sctx->cur_inode_new_gen && !sctx->cur_inode_deleted) {
if (result != BTRFS_COMPARE_TREE_DELETED)
diff --git a/fs/btrfs/transaction.c b/fs/btrfs/transaction.c
index 3b8ae1a8f02d..e3adb714c04b 100644
--- a/fs/btrfs/transaction.c
+++ b/fs/btrfs/transaction.c
@@ -28,15 +28,18 @@ static const unsigned int btrfs_blocked_trans_types[TRANS_STATE_MAX] = {
[TRANS_STATE_COMMIT_START] = (__TRANS_START | __TRANS_ATTACH),
[TRANS_STATE_COMMIT_DOING] = (__TRANS_START |
__TRANS_ATTACH |
- __TRANS_JOIN),
+ __TRANS_JOIN |
+ __TRANS_JOIN_NOSTART),
[TRANS_STATE_UNBLOCKED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
- __TRANS_JOIN_NOLOCK),
+ __TRANS_JOIN_NOLOCK |
+ __TRANS_JOIN_NOSTART),
[TRANS_STATE_COMPLETED] = (__TRANS_START |
__TRANS_ATTACH |
__TRANS_JOIN |
- __TRANS_JOIN_NOLOCK),
+ __TRANS_JOIN_NOLOCK |
+ __TRANS_JOIN_NOSTART),
};
void btrfs_put_transaction(struct btrfs_transaction *transaction)
@@ -543,7 +546,8 @@ again:
ret = join_transaction(fs_info, type);
if (ret == -EBUSY) {
wait_current_trans(fs_info);
- if (unlikely(type == TRANS_ATTACH))
+ if (unlikely(type == TRANS_ATTACH ||
+ type == TRANS_JOIN_NOSTART))
ret = -ENOENT;
}
} while (ret == -EBUSY);
@@ -660,6 +664,16 @@ struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root
}
/*
+ * Similar to regular join but it never starts a transaction when none is
+ * running or after waiting for the current one to finish.
+ */
+struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root)
+{
+ return start_transaction(root, 0, TRANS_JOIN_NOSTART,
+ BTRFS_RESERVE_NO_FLUSH, true);
+}
+
+/*
* btrfs_attach_transaction() - catch the running transaction
*
* It is used when we want to commit the current the transaction, but
@@ -2037,6 +2051,16 @@ int btrfs_commit_transaction(struct btrfs_trans_handle *trans)
}
} else {
spin_unlock(&fs_info->trans_lock);
+ /*
+ * The previous transaction was aborted and was already removed
+ * from the list of transactions at fs_info->trans_list. So we
+ * abort to prevent writing a new superblock that reflects a
+ * corrupt state (pointing to trees with unwritten nodes/leafs).
+ */
+ if (test_bit(BTRFS_FS_STATE_TRANS_ABORTED, &fs_info->fs_state)) {
+ ret = -EROFS;
+ goto cleanup_transaction;
+ }
}
extwriter_counter_dec(cur_trans, trans->type);
diff --git a/fs/btrfs/transaction.h b/fs/btrfs/transaction.h
index 527ea94b57d9..2c5a6f6e5bb0 100644
--- a/fs/btrfs/transaction.h
+++ b/fs/btrfs/transaction.h
@@ -94,11 +94,13 @@ struct btrfs_transaction {
#define __TRANS_JOIN (1U << 11)
#define __TRANS_JOIN_NOLOCK (1U << 12)
#define __TRANS_DUMMY (1U << 13)
+#define __TRANS_JOIN_NOSTART (1U << 14)
#define TRANS_START (__TRANS_START | __TRANS_FREEZABLE)
#define TRANS_ATTACH (__TRANS_ATTACH)
#define TRANS_JOIN (__TRANS_JOIN | __TRANS_FREEZABLE)
#define TRANS_JOIN_NOLOCK (__TRANS_JOIN_NOLOCK)
+#define TRANS_JOIN_NOSTART (__TRANS_JOIN_NOSTART)
#define TRANS_EXTWRITERS (__TRANS_START | __TRANS_ATTACH)
@@ -183,6 +185,7 @@ struct btrfs_trans_handle *btrfs_start_transaction_fallback_global_rsv(
int min_factor);
struct btrfs_trans_handle *btrfs_join_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_join_transaction_nolock(struct btrfs_root *root);
+struct btrfs_trans_handle *btrfs_join_transaction_nostart(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction(struct btrfs_root *root);
struct btrfs_trans_handle *btrfs_attach_transaction_barrier(
struct btrfs_root *root);
diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c
index a13ddba1ebc3..d74b74ca07af 100644
--- a/fs/btrfs/volumes.c
+++ b/fs/btrfs/volumes.c
@@ -5941,6 +5941,7 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
u64 stripe_len;
u64 raid56_full_stripe_start = (u64)-1;
int data_stripes;
+ int ret = 0;
ASSERT(op != BTRFS_MAP_DISCARD);
@@ -5961,8 +5962,8 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
btrfs_crit(fs_info,
"stripe math has gone wrong, stripe_offset=%llu offset=%llu start=%llu logical=%llu stripe_len=%llu",
stripe_offset, offset, em->start, logical, stripe_len);
- free_extent_map(em);
- return -EINVAL;
+ ret = -EINVAL;
+ goto out;
}
/* stripe_offset is the offset of this block in its stripe */
@@ -6009,7 +6010,10 @@ int btrfs_get_io_geometry(struct btrfs_fs_info *fs_info, enum btrfs_map_op op,
io_geom->stripe_offset = stripe_offset;
io_geom->raid56_stripe_offset = raid56_full_stripe_start;
- return 0;
+out:
+ /* once for us */
+ free_extent_map(em);
+ return ret;
}
static int __btrfs_map_block(struct btrfs_fs_info *fs_info,