diff options
Diffstat (limited to 'fs/bcachefs/extents.c')
-rw-r--r-- | fs/bcachefs/extents.c | 503 |
1 files changed, 311 insertions, 192 deletions
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c index 1b25f84e4b9c..1a331e539204 100644 --- a/fs/bcachefs/extents.c +++ b/fs/bcachefs/extents.c @@ -8,11 +8,13 @@ #include "bcachefs.h" #include "bkey_methods.h" +#include "btree_cache.h" #include "btree_gc.h" #include "btree_io.h" #include "btree_iter.h" #include "buckets.h" #include "checksum.h" +#include "compress.h" #include "debug.h" #include "disk_groups.h" #include "error.h" @@ -162,17 +164,19 @@ int bch2_bkey_pick_read_device(struct bch_fs *c, struct bkey_s_c k, /* KEY_TYPE_btree_ptr: */ -int bch2_btree_ptr_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BCH_REPLICAS_MAX) { - prt_printf(err, "value too big (%zu > %u)", - bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return -BCH_ERR_invalid_bkey; - } + int ret = 0; + + bkey_fsck_err_on(bkey_val_u64s(k.k) > BCH_REPLICAS_MAX, c, err, + btree_ptr_val_too_big, + "value too big (%zu > %u)", bkey_val_u64s(k.k), BCH_REPLICAS_MAX); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, @@ -181,17 +185,25 @@ void bch2_btree_ptr_to_text(struct printbuf *out, struct bch_fs *c, bch2_bkey_ptrs_to_text(out, c, k); } -int bch2_btree_ptr_v2_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_btree_ptr_v2_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { - if (bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX) { - prt_printf(err, "value too big (%zu > %zu)", - bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return -BCH_ERR_invalid_bkey; - } + struct bkey_s_c_btree_ptr_v2 bp = bkey_s_c_to_btree_ptr_v2(k); + int ret = 0; + + bkey_fsck_err_on(bkey_val_u64s(k.k) > BKEY_BTREE_PTR_VAL_U64s_MAX, + c, err, btree_ptr_v2_val_too_big, + "value too big (%zu > %zu)", + bkey_val_u64s(k.k), BKEY_BTREE_PTR_VAL_U64s_MAX); - return bch2_bkey_ptrs_invalid(c, k, flags, err); + bkey_fsck_err_on(bpos_ge(bp.v->min_key, bp.k->p), + c, err, btree_ptr_v2_min_key_bad, + "min_key > key"); + + ret = bch2_bkey_ptrs_invalid(c, k, flags, err); +fsck_err: + return ret; } void bch2_btree_ptr_v2_to_text(struct printbuf *out, struct bch_fs *c, @@ -372,19 +384,18 @@ bool bch2_extent_merge(struct bch_fs *c, struct bkey_s l, struct bkey_s_c r) /* KEY_TYPE_reservation: */ -int bch2_reservation_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_reservation_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { struct bkey_s_c_reservation r = bkey_s_c_to_reservation(k); + int ret = 0; - if (!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX) { - prt_printf(err, "invalid nr_replicas (%u)", - r.v->nr_replicas); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!r.v->nr_replicas || r.v->nr_replicas > BCH_REPLICAS_MAX, c, err, + reservation_key_nr_replicas_invalid, + "invalid nr_replicas (%u)", r.v->nr_replicas); +fsck_err: + return ret; } void bch2_reservation_to_text(struct printbuf *out, struct bch_fs *c, @@ -644,37 +655,31 @@ unsigned bch2_bkey_replicas(struct bch_fs *c, struct bkey_s_c k) return replicas; } -unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +static inline unsigned __extent_ptr_durability(struct bch_dev *ca, struct extent_ptr_decoded *p) { - struct bch_dev *ca; - if (p->ptr.cached) return 0; - ca = bch_dev_bkey_exists(c, p->ptr.dev); - - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return p->has_ec + ? p->ec.redundancy + 1 + : ca->mi.durability; } -unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +unsigned bch2_extent_ptr_desired_durability(struct bch_fs *c, struct extent_ptr_decoded *p) { - struct bch_dev *ca; + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); - if (p->ptr.cached) - return 0; + return __extent_ptr_durability(ca, p); +} - ca = bch_dev_bkey_exists(c, p->ptr.dev); +unsigned bch2_extent_ptr_durability(struct bch_fs *c, struct extent_ptr_decoded *p) +{ + struct bch_dev *ca = bch_dev_bkey_exists(c, p->ptr.dev); if (ca->mi.state == BCH_MEMBER_STATE_failed) return 0; - return ca->mi.durability + - (p->has_ec - ? p->ec.redundancy - : 0); + return __extent_ptr_durability(ca, p); } unsigned bch2_bkey_durability(struct bch_fs *c, struct bkey_s_c k) @@ -757,18 +762,6 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs, return i; } -static void extent_entry_drop(struct bkey_s k, union bch_extent_entry *entry) -{ - union bch_extent_entry *next = extent_entry_next(entry); - - /* stripes have ptrs, but their layout doesn't work with this code */ - BUG_ON(k.k->type == KEY_TYPE_stripe); - - memmove_u64s_down(entry, next, - (u64 *) bkey_val_end(k) - (u64 *) next); - k.k->u64s -= (u64 *) next - (u64 *) entry; -} - /* * Returns pointer to the next entry after the one being dropped: */ @@ -856,7 +849,6 @@ void bch2_bkey_drop_device_noerror(struct bkey_s k, unsigned dev) const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned dev) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) if (ptr->dev == dev) @@ -868,7 +860,6 @@ const struct bch_extent_ptr *bch2_bkey_has_device_c(struct bkey_s_c k, unsigned bool bch2_bkey_has_target(struct bch_fs *c, struct bkey_s_c k, unsigned target) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr; bkey_for_each_ptr(ptrs, ptr) if (bch2_dev_in_target(c, ptr->dev, target) && @@ -987,15 +978,38 @@ bool bch2_extent_normalize(struct bch_fs *c, struct bkey_s k) return bkey_deleted(k.k); } +void bch2_extent_ptr_to_text(struct printbuf *out, struct bch_fs *c, const struct bch_extent_ptr *ptr) +{ + struct bch_dev *ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] + ? bch_dev_bkey_exists(c, ptr->dev) + : NULL; + + if (!ca) { + prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, + (u64) ptr->offset, ptr->gen, + ptr->cached ? " cached" : ""); + } else { + u32 offset; + u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); + + prt_printf(out, "ptr: %u:%llu:%u gen %u", + ptr->dev, b, offset, ptr->gen); + if (ptr->cached) + prt_str(out, " cached"); + if (ptr->unwritten) + prt_str(out, " unwritten"); + if (b >= ca->mi.first_bucket && + b < ca->mi.nbuckets && + ptr_stale(ca, ptr)) + prt_printf(out, " stale"); + } +} + void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, struct bkey_s_c k) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); const union bch_extent_entry *entry; - struct bch_extent_crc_unpacked crc; - const struct bch_extent_ptr *ptr; - const struct bch_extent_stripe_ptr *ec; - struct bch_dev *ca; bool first = true; if (c) @@ -1007,47 +1021,43 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, switch (__extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: - ptr = entry_to_ptr(entry); - ca = c && ptr->dev < c->sb.nr_devices && c->devs[ptr->dev] - ? bch_dev_bkey_exists(c, ptr->dev) - : NULL; - - if (!ca) { - prt_printf(out, "ptr: %u:%llu gen %u%s", ptr->dev, - (u64) ptr->offset, ptr->gen, - ptr->cached ? " cached" : ""); - } else { - u32 offset; - u64 b = sector_to_bucket_and_offset(ca, ptr->offset, &offset); - - prt_printf(out, "ptr: %u:%llu:%u gen %u", - ptr->dev, b, offset, ptr->gen); - if (ptr->cached) - prt_str(out, " cached"); - if (ptr->unwritten) - prt_str(out, " unwritten"); - if (ca && ptr_stale(ca, ptr)) - prt_printf(out, " stale"); - } + bch2_extent_ptr_to_text(out, c, entry_to_ptr(entry)); break; + case BCH_EXTENT_ENTRY_crc32: case BCH_EXTENT_ENTRY_crc64: - case BCH_EXTENT_ENTRY_crc128: - crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); + case BCH_EXTENT_ENTRY_crc128: { + struct bch_extent_crc_unpacked crc = + bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum %s compress %s", + prt_printf(out, "crc: c_size %u size %u offset %u nonce %u csum ", crc.compressed_size, crc.uncompressed_size, - crc.offset, crc.nonce, - bch2_csum_types[crc.csum_type], - bch2_compression_types[crc.compression_type]); + crc.offset, crc.nonce); + bch2_prt_csum_type(out, crc.csum_type); + prt_str(out, " compress "); + bch2_prt_compression_type(out, crc.compression_type); break; - case BCH_EXTENT_ENTRY_stripe_ptr: - ec = &entry->stripe_ptr; + } + case BCH_EXTENT_ENTRY_stripe_ptr: { + const struct bch_extent_stripe_ptr *ec = &entry->stripe_ptr; prt_printf(out, "ec: idx %llu block %u", (u64) ec->idx, ec->block); break; + } + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + prt_str(out, "rebalance: target "); + if (c) + bch2_target_to_text(out, c, r->target); + else + prt_printf(out, "%u", r->target); + prt_str(out, " compression "); + bch2_compression_opt_to_text(out, r->compression); + break; + } default: prt_printf(out, "(invalid extent entry %.16llx)", *((u64 *) entry)); return; @@ -1057,7 +1067,7 @@ void bch2_bkey_ptrs_to_text(struct printbuf *out, struct bch_fs *c, } } -static int extent_ptr_invalid(const struct bch_fs *c, +static int extent_ptr_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, const struct bch_extent_ptr *ptr, @@ -1066,10 +1076,10 @@ static int extent_ptr_invalid(const struct bch_fs *c, struct printbuf *err) { struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); - const struct bch_extent_ptr *ptr2; u64 bucket; u32 bucket_offset; struct bch_dev *ca; + int ret = 0; if (!bch2_dev_exists2(c, ptr->dev)) { /* @@ -1080,41 +1090,33 @@ static int extent_ptr_invalid(const struct bch_fs *c, if (flags & BKEY_INVALID_WRITE) return 0; - prt_printf(err, "pointer to invalid device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; + bkey_fsck_err(c, err, ptr_to_invalid_device, + "pointer to invalid device (%u)", ptr->dev); } ca = bch_dev_bkey_exists(c, ptr->dev); bkey_for_each_ptr(ptrs, ptr2) - if (ptr != ptr2 && ptr->dev == ptr2->dev) { - prt_printf(err, "multiple pointers to same device (%u)", ptr->dev); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(ptr != ptr2 && ptr->dev == ptr2->dev, c, err, + ptr_to_duplicate_device, + "multiple pointers to same device (%u)", ptr->dev); bucket = sector_to_bucket_and_offset(ca, ptr->offset, &bucket_offset); - if (bucket >= ca->mi.nbuckets) { - prt_printf(err, "pointer past last bucket (%llu > %llu)", - bucket, ca->mi.nbuckets); - return -BCH_ERR_invalid_bkey; - } - - if (ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket)) { - prt_printf(err, "pointer before first bucket (%llu < %u)", - bucket, ca->mi.first_bucket); - return -BCH_ERR_invalid_bkey; - } - - if (bucket_offset + size_ondisk > ca->mi.bucket_size) { - prt_printf(err, "pointer spans multiple buckets (%u + %u > %u)", + bkey_fsck_err_on(bucket >= ca->mi.nbuckets, c, err, + ptr_after_last_bucket, + "pointer past last bucket (%llu > %llu)", bucket, ca->mi.nbuckets); + bkey_fsck_err_on(ptr->offset < bucket_to_sector(ca, ca->mi.first_bucket), c, err, + ptr_before_first_bucket, + "pointer before first bucket (%llu < %u)", bucket, ca->mi.first_bucket); + bkey_fsck_err_on(bucket_offset + size_ondisk > ca->mi.bucket_size, c, err, + ptr_spans_multiple_buckets, + "pointer spans multiple buckets (%u + %u > %u)", bucket_offset, size_ondisk, ca->mi.bucket_size); - return -BCH_ERR_invalid_bkey; - } - - return 0; +fsck_err: + return ret; } -int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, +int bch2_bkey_ptrs_invalid(struct bch_fs *c, struct bkey_s_c k, enum bkey_invalid_flags flags, struct printbuf *err) { @@ -1124,24 +1126,22 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, unsigned size_ondisk = k.k->size; unsigned nonce = UINT_MAX; unsigned nr_ptrs = 0; - bool unwritten = false, have_ec = false, crc_since_last_ptr = false; - int ret; + bool have_written = false, have_unwritten = false, have_ec = false, crc_since_last_ptr = false; + int ret = 0; if (bkey_is_btree_ptr(k.k)) size_ondisk = btree_sectors(c); bkey_extent_entry_for_each(ptrs, entry) { - if (__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX) { - prt_printf(err, "invalid extent entry type (got %u, max %u)", - __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(__extent_entry_type(entry) >= BCH_EXTENT_ENTRY_MAX, c, err, + extent_ptrs_invalid_entry, + "invalid extent entry type (got %u, max %u)", + __extent_entry_type(entry), BCH_EXTENT_ENTRY_MAX); - if (bkey_is_btree_ptr(k.k) && - !extent_entry_is_ptr(entry)) { - prt_printf(err, "has non ptr field"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(bkey_is_btree_ptr(k.k) && + !extent_entry_is_ptr(entry), c, err, + btree_ptr_has_non_ptr, + "has non ptr field"); switch (extent_entry_type(entry)) { case BCH_EXTENT_ENTRY_ptr: @@ -1150,22 +1150,15 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, if (ret) return ret; - if (nr_ptrs && unwritten != entry->ptr.unwritten) { - prt_printf(err, "extent with unwritten and written ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (k.k->type != KEY_TYPE_extent && entry->ptr.unwritten) { - prt_printf(err, "has unwritten ptrs"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(entry->ptr.cached && have_ec, c, err, + ptr_cached_and_erasure_coded, + "cached, erasure coded ptr"); - if (entry->ptr.cached && have_ec) { - prt_printf(err, "cached, erasure coded ptr"); - return -BCH_ERR_invalid_bkey; - } + if (!entry->ptr.unwritten) + have_written = true; + else + have_unwritten = true; - unwritten = entry->ptr.unwritten; have_ec = false; crc_since_last_ptr = false; nr_ptrs++; @@ -1175,72 +1168,77 @@ int bch2_bkey_ptrs_invalid(const struct bch_fs *c, struct bkey_s_c k, case BCH_EXTENT_ENTRY_crc128: crc = bch2_extent_crc_unpack(k.k, entry_to_crc(entry)); - if (crc.offset + crc.live_size > - crc.uncompressed_size) { - prt_printf(err, "checksum offset + key size > uncompressed size"); - return -BCH_ERR_invalid_bkey; - } - - size_ondisk = crc.compressed_size; - - if (!bch2_checksum_type_valid(c, crc.csum_type)) { - prt_printf(err, "invalid checksum type"); - return -BCH_ERR_invalid_bkey; - } - - if (crc.compression_type >= BCH_COMPRESSION_TYPE_NR) { - prt_printf(err, "invalid compression type"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc.offset + crc.live_size > crc.uncompressed_size, c, err, + ptr_crc_uncompressed_size_too_small, + "checksum offset + key size > uncompressed size"); + bkey_fsck_err_on(!bch2_checksum_type_valid(c, crc.csum_type), c, err, + ptr_crc_csum_type_unknown, + "invalid checksum type"); + bkey_fsck_err_on(crc.compression_type >= BCH_COMPRESSION_TYPE_NR, c, err, + ptr_crc_compression_type_unknown, + "invalid compression type"); if (bch2_csum_type_is_encryption(crc.csum_type)) { if (nonce == UINT_MAX) nonce = crc.offset + crc.nonce; - else if (nonce != crc.offset + crc.nonce) { - prt_printf(err, "incorrect nonce"); - return -BCH_ERR_invalid_bkey; - } + else if (nonce != crc.offset + crc.nonce) + bkey_fsck_err(c, err, ptr_crc_nonce_mismatch, + "incorrect nonce"); } - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(crc_since_last_ptr, c, err, + ptr_crc_redundant, + "redundant crc entry"); crc_since_last_ptr = true; + + bkey_fsck_err_on(crc_is_encoded(crc) && + (crc.uncompressed_size > c->opts.encoded_extent_max >> 9) && + (flags & (BKEY_INVALID_WRITE|BKEY_INVALID_COMMIT)), c, err, + ptr_crc_uncompressed_size_too_big, + "too large encoded extent"); + + size_ondisk = crc.compressed_size; break; case BCH_EXTENT_ENTRY_stripe_ptr: - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } + bkey_fsck_err_on(have_ec, c, err, + ptr_stripe_redundant, + "redundant stripe entry"); have_ec = true; break; - case BCH_EXTENT_ENTRY_rebalance: + case BCH_EXTENT_ENTRY_rebalance: { + const struct bch_extent_rebalance *r = &entry->rebalance; + + if (!bch2_compression_opt_valid(r->compression)) { + struct bch_compression_opt opt = __bch2_compression_decode(r->compression); + prt_printf(err, "invalid compression opt %u:%u", + opt.type, opt.level); + return -BCH_ERR_invalid_bkey; + } break; } + } } - if (!nr_ptrs) { - prt_str(err, "no ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (nr_ptrs >= BCH_BKEY_PTRS_MAX) { - prt_str(err, "too many ptrs"); - return -BCH_ERR_invalid_bkey; - } - - if (crc_since_last_ptr) { - prt_printf(err, "redundant crc entry"); - return -BCH_ERR_invalid_bkey; - } - - if (have_ec) { - prt_printf(err, "redundant stripe entry"); - return -BCH_ERR_invalid_bkey; - } - - return 0; + bkey_fsck_err_on(!nr_ptrs, c, err, + extent_ptrs_no_ptrs, + "no ptrs"); + bkey_fsck_err_on(nr_ptrs > BCH_BKEY_PTRS_MAX, c, err, + extent_ptrs_too_many_ptrs, + "too many ptrs: %u > %u", nr_ptrs, BCH_BKEY_PTRS_MAX); + bkey_fsck_err_on(have_written && have_unwritten, c, err, + extent_ptrs_written_and_unwritten, + "extent with unwritten and written ptrs"); + bkey_fsck_err_on(k.k->type != KEY_TYPE_extent && have_unwritten, c, err, + extent_ptrs_unwritten, + "has unwritten ptrs"); + bkey_fsck_err_on(crc_since_last_ptr, c, err, + extent_ptrs_redundant_crc, + "redundant crc entry"); + bkey_fsck_err_on(have_ec, c, err, + extent_ptrs_redundant_stripe, + "redundant stripe entry"); +fsck_err: + return ret; } void bch2_ptr_swab(struct bkey_s k) @@ -1281,6 +1279,127 @@ void bch2_ptr_swab(struct bkey_s k) } } +const struct bch_extent_rebalance *bch2_bkey_rebalance_opts(struct bkey_s_c k) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + const union bch_extent_entry *entry; + + bkey_extent_entry_for_each(ptrs, entry) + if (__extent_entry_type(entry) == BCH_EXTENT_ENTRY_rebalance) + return &entry->rebalance; + + return NULL; +} + +unsigned bch2_bkey_ptrs_need_rebalance(struct bch_fs *c, struct bkey_s_c k, + unsigned target, unsigned compression) +{ + struct bkey_ptrs_c ptrs = bch2_bkey_ptrs_c(k); + unsigned rewrite_ptrs = 0; + + if (compression) { + unsigned compression_type = bch2_compression_opt_to_type(compression); + const union bch_extent_entry *entry; + struct extent_ptr_decoded p; + unsigned i = 0; + + bkey_for_each_ptr_decode(k.k, ptrs, p, entry) { + if (p.crc.compression_type == BCH_COMPRESSION_TYPE_incompressible || + p.ptr.unwritten) { + rewrite_ptrs = 0; + goto incompressible; + } + + if (!p.ptr.cached && p.crc.compression_type != compression_type) + rewrite_ptrs |= 1U << i; + i++; + } + } +incompressible: + if (target && bch2_target_accepts_data(c, BCH_DATA_user, target)) { + unsigned i = 0; + + bkey_for_each_ptr(ptrs, ptr) { + if (!ptr->cached && !bch2_dev_in_target(c, ptr->dev, target)) + rewrite_ptrs |= 1U << i; + i++; + } + } + + return rewrite_ptrs; +} + +bool bch2_bkey_needs_rebalance(struct bch_fs *c, struct bkey_s_c k) +{ + const struct bch_extent_rebalance *r = bch2_bkey_rebalance_opts(k); + + /* + * If it's an indirect extent, we don't delete the rebalance entry when + * done so that we know what options were applied - check if it still + * needs work done: + */ + if (r && + k.k->type == KEY_TYPE_reflink_v && + !bch2_bkey_ptrs_need_rebalance(c, k, r->target, r->compression)) + r = NULL; + + return r != NULL; +} + +int bch2_bkey_set_needs_rebalance(struct bch_fs *c, struct bkey_i *_k, + struct bch_io_opts *opts) +{ + struct bkey_s k = bkey_i_to_s(_k); + struct bch_extent_rebalance *r; + unsigned target = opts->background_target; + unsigned compression = background_compression(*opts); + bool needs_rebalance; + + if (!bkey_extent_is_direct_data(k.k)) + return 0; + + /* get existing rebalance entry: */ + r = (struct bch_extent_rebalance *) bch2_bkey_rebalance_opts(k.s_c); + if (r) { + if (k.k->type == KEY_TYPE_reflink_v) { + /* + * indirect extents: existing options take precedence, + * so that we don't move extents back and forth if + * they're referenced by different inodes with different + * options: + */ + if (r->target) + target = r->target; + if (r->compression) + compression = r->compression; + } + + r->target = target; + r->compression = compression; + } + + needs_rebalance = bch2_bkey_ptrs_need_rebalance(c, k.s_c, target, compression); + + if (needs_rebalance && !r) { + union bch_extent_entry *new = bkey_val_end(k); + + new->rebalance.type = 1U << BCH_EXTENT_ENTRY_rebalance; + new->rebalance.compression = compression; + new->rebalance.target = target; + new->rebalance.unused = 0; + k.k->u64s += extent_entry_u64s(new); + } else if (!needs_rebalance && r && k.k->type != KEY_TYPE_reflink_v) { + /* + * For indirect extents, don't delete the rebalance entry when + * we're finished so that we know we specifically moved it or + * compressed it to its current location/compression type + */ + extent_entry_drop(k, (union bch_extent_entry *) r); + } + + return 0; +} + /* Generic extent code: */ int bch2_cut_front_s(struct bpos where, struct bkey_s k) |