aboutsummaryrefslogtreecommitdiff
path: root/fs/bcachefs
diff options
context:
space:
mode:
Diffstat (limited to 'fs/bcachefs')
-rw-r--r--fs/bcachefs/alloc_background.c24
-rw-r--r--fs/bcachefs/btree_journal_iter.c2
-rw-r--r--fs/bcachefs/buckets.c17
-rw-r--r--fs/bcachefs/data_update.c1
-rw-r--r--fs/bcachefs/ec.h4
-rw-r--r--fs/bcachefs/errcode.h1
-rw-r--r--fs/bcachefs/extents.c49
-rw-r--r--fs/bcachefs/extents.h23
-rw-r--r--fs/bcachefs/fs-io-buffered.c157
-rw-r--r--fs/bcachefs/fs-io-buffered.h6
-rw-r--r--fs/bcachefs/fs.c18
-rw-r--r--fs/bcachefs/fs.h7
-rw-r--r--fs/bcachefs/fsck.c18
-rw-r--r--fs/bcachefs/replicas.c5
-rw-r--r--fs/bcachefs/sb-errors_format.h10
-rw-r--r--fs/bcachefs/sb-members.c3
-rw-r--r--fs/bcachefs/sb-members_format.h5
-rw-r--r--fs/bcachefs/sysfs.c2
18 files changed, 169 insertions, 183 deletions
diff --git a/fs/bcachefs/alloc_background.c b/fs/bcachefs/alloc_background.c
index ba46f1c1d78a..dc3a4024aab6 100644
--- a/fs/bcachefs/alloc_background.c
+++ b/fs/bcachefs/alloc_background.c
@@ -1968,8 +1968,8 @@ static void bch2_do_discards_fast_work(struct work_struct *work)
break;
}
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
percpu_ref_put(&ca->io_ref);
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
@@ -1979,18 +1979,18 @@ static void bch2_discard_one_bucket_fast(struct bch_dev *ca, u64 bucket)
if (discard_in_flight_add(ca, bucket, false))
return;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_discard_fast))
- goto put_ioref;
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ goto put_ref;
if (queue_work(c->write_ref_wq, &ca->discard_fast_work))
return;
- bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
-put_ioref:
percpu_ref_put(&ca->io_ref);
+put_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_discard_fast);
}
static int invalidate_one_bucket(struct btree_trans *trans,
@@ -2132,26 +2132,26 @@ static void bch2_do_invalidates_work(struct work_struct *work)
bch2_trans_iter_exit(trans, &iter);
err:
bch2_trans_put(trans);
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
percpu_ref_put(&ca->io_ref);
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
void bch2_dev_do_invalidates(struct bch_dev *ca)
{
struct bch_fs *c = ca->fs;
- if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
return;
- if (!bch2_write_ref_tryget(c, BCH_WRITE_REF_invalidate))
- goto put_ioref;
+ if (!bch2_dev_get_ioref(c, ca->dev_idx, WRITE))
+ goto put_ref;
if (queue_work(c->write_ref_wq, &ca->invalidate_work))
return;
- bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
-put_ioref:
percpu_ref_put(&ca->io_ref);
+put_ref:
+ bch2_write_ref_put(c, BCH_WRITE_REF_invalidate);
}
void bch2_do_invalidates(struct bch_fs *c)
diff --git a/fs/bcachefs/btree_journal_iter.c b/fs/bcachefs/btree_journal_iter.c
index 74933490aaba..c1657182c275 100644
--- a/fs/bcachefs/btree_journal_iter.c
+++ b/fs/bcachefs/btree_journal_iter.c
@@ -530,6 +530,8 @@ static void __journal_keys_sort(struct journal_keys *keys)
{
sort(keys->data, keys->nr, sizeof(keys->data[0]), journal_sort_key_cmp, NULL);
+ cond_resched();
+
struct journal_key *dst = keys->data;
darray_for_each(*keys, src) {
diff --git a/fs/bcachefs/buckets.c b/fs/bcachefs/buckets.c
index a2274429e7f4..721bbe1dffc1 100644
--- a/fs/bcachefs/buckets.c
+++ b/fs/bcachefs/buckets.c
@@ -100,12 +100,13 @@ static int bch2_check_fix_ptr(struct btree_trans *trans,
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (!ca) {
- if (fsck_err(trans, ptr_to_invalid_device,
- "pointer to missing device %u\n"
- "while marking %s",
- p.ptr.dev,
- (printbuf_reset(&buf),
- bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
+ if (fsck_err_on(p.ptr.dev != BCH_SB_MEMBER_INVALID,
+ trans, ptr_to_invalid_device,
+ "pointer to missing device %u\n"
+ "while marking %s",
+ p.ptr.dev,
+ (printbuf_reset(&buf),
+ bch2_bkey_val_to_text(&buf, c, k), buf.buf)))
*do_update = true;
return 0;
}
@@ -562,7 +563,7 @@ static int bch2_trigger_pointer(struct btree_trans *trans,
struct bch_fs *c = trans->c;
struct bch_dev *ca = bch2_dev_tryget(c, p.ptr.dev);
if (unlikely(!ca)) {
- if (insert)
+ if (insert && p.ptr.dev != BCH_SB_MEMBER_INVALID)
ret = -EIO;
goto err;
}
@@ -876,7 +877,7 @@ int bch2_trigger_extent(struct btree_trans *trans,
need_rebalance_delta -= s != 0;
need_rebalance_sectors_delta -= s;
- s = bch2_bkey_sectors_need_rebalance(c, old);
+ s = bch2_bkey_sectors_need_rebalance(c, new.s_c);
need_rebalance_delta += s != 0;
need_rebalance_sectors_delta += s;
diff --git a/fs/bcachefs/data_update.c b/fs/bcachefs/data_update.c
index 65176d51b502..004894ad4147 100644
--- a/fs/bcachefs/data_update.c
+++ b/fs/bcachefs/data_update.c
@@ -337,6 +337,7 @@ restart_drop_extra_replicas:
printbuf_exit(&buf);
bch2_fatal_error(c);
+ ret = -EIO;
goto out;
}
diff --git a/fs/bcachefs/ec.h b/fs/bcachefs/ec.h
index 90962b3c0130..9baf3411a8f9 100644
--- a/fs/bcachefs/ec.h
+++ b/fs/bcachefs/ec.h
@@ -97,7 +97,9 @@ static inline bool __bch2_ptr_matches_stripe(const struct bch_extent_ptr *stripe
const struct bch_extent_ptr *data_ptr,
unsigned sectors)
{
- return data_ptr->dev == stripe_ptr->dev &&
+ return (data_ptr->dev == stripe_ptr->dev ||
+ data_ptr->dev == BCH_SB_MEMBER_INVALID ||
+ stripe_ptr->dev == BCH_SB_MEMBER_INVALID) &&
data_ptr->gen == stripe_ptr->gen &&
data_ptr->offset >= stripe_ptr->offset &&
data_ptr->offset < stripe_ptr->offset + sectors;
diff --git a/fs/bcachefs/errcode.h b/fs/bcachefs/errcode.h
index ab5a7adece10..742dcdd3e5d7 100644
--- a/fs/bcachefs/errcode.h
+++ b/fs/bcachefs/errcode.h
@@ -257,7 +257,6 @@
x(BCH_ERR_nopromote, nopromote_in_flight) \
x(BCH_ERR_nopromote, nopromote_no_writes) \
x(BCH_ERR_nopromote, nopromote_enomem) \
- x(0, need_inode_lock) \
x(0, invalid_snapshot_node) \
x(0, option_needs_open_fs)
diff --git a/fs/bcachefs/extents.c b/fs/bcachefs/extents.c
index e317df3644a1..324303bf4353 100644
--- a/fs/bcachefs/extents.c
+++ b/fs/bcachefs/extents.c
@@ -781,14 +781,17 @@ static union bch_extent_entry *extent_entry_prev(struct bkey_ptrs ptrs,
/*
* Returns pointer to the next entry after the one being dropped:
*/
-union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+void bch2_bkey_drop_ptr_noerror(struct bkey_s k, struct bch_extent_ptr *ptr)
{
struct bkey_ptrs ptrs = bch2_bkey_ptrs(k);
union bch_extent_entry *entry = to_entry(ptr), *next;
- union bch_extent_entry *ret = entry;
bool drop_crc = true;
+ if (k.k->type == KEY_TYPE_stripe) {
+ ptr->dev = BCH_SB_MEMBER_INVALID;
+ return;
+ }
+
EBUG_ON(ptr < &ptrs.start->ptr ||
ptr >= &ptrs.end->ptr);
EBUG_ON(ptr->type != 1 << BCH_EXTENT_ENTRY_ptr);
@@ -811,21 +814,16 @@ union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s k,
break;
if ((extent_entry_is_crc(entry) && drop_crc) ||
- extent_entry_is_stripe_ptr(entry)) {
- ret = (void *) ret - extent_entry_bytes(entry);
+ extent_entry_is_stripe_ptr(entry))
extent_entry_drop(k, entry);
- }
}
-
- return ret;
}
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
- struct bch_extent_ptr *ptr)
+void bch2_bkey_drop_ptr(struct bkey_s k, struct bch_extent_ptr *ptr)
{
bool have_dirty = bch2_bkey_dirty_devs(k.s_c).nr;
- union bch_extent_entry *ret =
- bch2_bkey_drop_ptr_noerror(k, ptr);
+
+ bch2_bkey_drop_ptr_noerror(k, ptr);
/*
* If we deleted all the dirty pointers and there's still cached
@@ -837,14 +835,10 @@ union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s k,
!bch2_bkey_dirty_devs(k.s_c).nr) {
k.k->type = KEY_TYPE_error;
set_bkey_val_u64s(k.k, 0);
- ret = NULL;
} else if (!bch2_bkey_nr_ptrs(k.s_c)) {
k.k->type = KEY_TYPE_deleted;
set_bkey_val_u64s(k.k, 0);
- ret = NULL;
}
-
- return ret;
}
void bch2_bkey_drop_device(struct bkey_s k, unsigned dev)
@@ -929,8 +923,29 @@ bool bch2_extents_match(struct bkey_s_c k1, struct bkey_s_c k2)
bkey_for_each_ptr_decode(k2.k, ptrs2, p2, entry2)
if (p1.ptr.dev == p2.ptr.dev &&
p1.ptr.gen == p2.ptr.gen &&
+
+ /*
+ * This checks that the two pointers point
+ * to the same region on disk - adjusting
+ * for the difference in where the extents
+ * start, since one may have been trimmed:
+ */
(s64) p1.ptr.offset + p1.crc.offset - bkey_start_offset(k1.k) ==
- (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k))
+ (s64) p2.ptr.offset + p2.crc.offset - bkey_start_offset(k2.k) &&
+
+ /*
+ * This additionally checks that the
+ * extents overlap on disk, since the
+ * previous check may trigger spuriously
+ * when one extent is immediately partially
+ * overwritten with another extent (so that
+ * on disk they are adjacent) and
+ * compression is in use:
+ */
+ ((p1.ptr.offset >= p2.ptr.offset &&
+ p1.ptr.offset < p2.ptr.offset + p2.crc.compressed_size) ||
+ (p2.ptr.offset >= p1.ptr.offset &&
+ p2.ptr.offset < p1.ptr.offset + p1.crc.compressed_size)))
return true;
return false;
diff --git a/fs/bcachefs/extents.h b/fs/bcachefs/extents.h
index 709dd83183be..42a7c6d820a0 100644
--- a/fs/bcachefs/extents.h
+++ b/fs/bcachefs/extents.h
@@ -649,26 +649,21 @@ static inline void bch2_bkey_append_ptr(struct bkey_i *k, struct bch_extent_ptr
void bch2_extent_ptr_decoded_append(struct bkey_i *,
struct extent_ptr_decoded *);
-union bch_extent_entry *bch2_bkey_drop_ptr_noerror(struct bkey_s,
- struct bch_extent_ptr *);
-union bch_extent_entry *bch2_bkey_drop_ptr(struct bkey_s,
- struct bch_extent_ptr *);
+void bch2_bkey_drop_ptr_noerror(struct bkey_s, struct bch_extent_ptr *);
+void bch2_bkey_drop_ptr(struct bkey_s, struct bch_extent_ptr *);
#define bch2_bkey_drop_ptrs(_k, _ptr, _cond) \
do { \
- struct bkey_ptrs _ptrs = bch2_bkey_ptrs(_k); \
+ __label__ _again; \
+ struct bkey_ptrs _ptrs; \
+_again: \
+ _ptrs = bch2_bkey_ptrs(_k); \
\
- struct bch_extent_ptr *_ptr = &_ptrs.start->ptr; \
- \
- while ((_ptr = bkey_ptr_next(_ptrs, _ptr))) { \
+ bkey_for_each_ptr(_ptrs, _ptr) \
if (_cond) { \
- _ptr = (void *) bch2_bkey_drop_ptr(_k, _ptr); \
- _ptrs = bch2_bkey_ptrs(_k); \
- continue; \
+ bch2_bkey_drop_ptr(_k, _ptr); \
+ goto _again; \
} \
- \
- (_ptr)++; \
- } \
} while (0)
bool bch2_bkey_matches_ptr(struct bch_fs *, struct bkey_s_c,
diff --git a/fs/bcachefs/fs-io-buffered.c b/fs/bcachefs/fs-io-buffered.c
index 184d03851676..ff60c041abe5 100644
--- a/fs/bcachefs/fs-io-buffered.c
+++ b/fs/bcachefs/fs-io-buffered.c
@@ -659,7 +659,7 @@ int bch2_writepages(struct address_space *mapping, struct writeback_control *wbc
int bch2_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
@@ -728,12 +728,11 @@ out:
goto err;
}
- *pagep = &folio->page;
+ *foliop = folio;
return 0;
err:
folio_unlock(folio);
folio_put(folio);
- *pagep = NULL;
err_unlock:
bch2_pagecache_add_put(inode);
kfree(res);
@@ -743,12 +742,11 @@ err_unlock:
int bch2_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *page, void *fsdata)
+ struct folio *folio, void *fsdata)
{
struct bch_inode_info *inode = to_bch_ei(mapping->host);
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation *res = fsdata;
- struct folio *folio = page_folio(page);
unsigned offset = pos - folio_pos(folio);
lockdep_assert_held(&inode->v.i_rwsem);
@@ -802,8 +800,7 @@ static noinline void folios_trunc(folios *fs, struct folio **fi)
static int __bch2_buffered_write(struct bch_inode_info *inode,
struct address_space *mapping,
struct iov_iter *iter,
- loff_t pos, unsigned len,
- bool inode_locked)
+ loff_t pos, unsigned len)
{
struct bch_fs *c = inode->v.i_sb->s_fs_info;
struct bch2_folio_reservation res;
@@ -827,15 +824,6 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
BUG_ON(!fs.nr);
- /*
- * If we're not using the inode lock, we need to lock all the folios for
- * atomiticity of writes vs. other writes:
- */
- if (!inode_locked && folio_end_pos(darray_last(fs)) < end) {
- ret = -BCH_ERR_need_inode_lock;
- goto out;
- }
-
f = darray_first(fs);
if (pos != folio_pos(f) && !folio_test_uptodate(f)) {
ret = bch2_read_single_folio(f, mapping);
@@ -932,10 +920,8 @@ static int __bch2_buffered_write(struct bch_inode_info *inode,
end = pos + copied;
spin_lock(&inode->v.i_lock);
- if (end > inode->v.i_size) {
- BUG_ON(!inode_locked);
+ if (end > inode->v.i_size)
i_size_write(&inode->v, end);
- }
spin_unlock(&inode->v.i_lock);
f_pos = pos;
@@ -979,68 +965,12 @@ static ssize_t bch2_buffered_write(struct kiocb *iocb, struct iov_iter *iter)
struct file *file = iocb->ki_filp;
struct address_space *mapping = file->f_mapping;
struct bch_inode_info *inode = file_bch_inode(file);
- loff_t pos;
- bool inode_locked = false;
- ssize_t written = 0, written2 = 0, ret = 0;
-
- /*
- * We don't take the inode lock unless i_size will be changing. Folio
- * locks provide exclusion with other writes, and the pagecache add lock
- * provides exclusion with truncate and hole punching.
- *
- * There is one nasty corner case where atomicity would be broken
- * without great care: when copying data from userspace to the page
- * cache, we do that with faults disable - a page fault would recurse
- * back into the filesystem, taking filesystem locks again, and
- * deadlock; so it's done with faults disabled, and we fault in the user
- * buffer when we aren't holding locks.
- *
- * If we do part of the write, but we then race and in the userspace
- * buffer have been evicted and are no longer resident, then we have to
- * drop our folio locks to re-fault them in, breaking write atomicity.
- *
- * To fix this, we restart the write from the start, if we weren't
- * holding the inode lock.
- *
- * There is another wrinkle after that; if we restart the write from the
- * start, and then get an unrecoverable error, we _cannot_ claim to
- * userspace that we did not write data we actually did - so we must
- * track (written2) the most we ever wrote.
- */
-
- if ((iocb->ki_flags & IOCB_APPEND) ||
- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v))) {
- inode_lock(&inode->v);
- inode_locked = true;
- }
-
- ret = generic_write_checks(iocb, iter);
- if (ret <= 0)
- goto unlock;
-
- ret = file_remove_privs_flags(file, !inode_locked ? IOCB_NOWAIT : 0);
- if (ret) {
- if (!inode_locked) {
- inode_lock(&inode->v);
- inode_locked = true;
- ret = file_remove_privs_flags(file, 0);
- }
- if (ret)
- goto unlock;
- }
-
- ret = file_update_time(file);
- if (ret)
- goto unlock;
-
- pos = iocb->ki_pos;
+ loff_t pos = iocb->ki_pos;
+ ssize_t written = 0;
+ int ret = 0;
bch2_pagecache_add_get(inode);
- if (!inode_locked &&
- (iocb->ki_pos + iov_iter_count(iter) > i_size_read(&inode->v)))
- goto get_inode_lock;
-
do {
unsigned offset = pos & (PAGE_SIZE - 1);
unsigned bytes = iov_iter_count(iter);
@@ -1065,17 +995,12 @@ again:
}
}
- if (unlikely(bytes != iov_iter_count(iter) && !inode_locked))
- goto get_inode_lock;
-
if (unlikely(fatal_signal_pending(current))) {
ret = -EINTR;
break;
}
- ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes, inode_locked);
- if (ret == -BCH_ERR_need_inode_lock)
- goto get_inode_lock;
+ ret = __bch2_buffered_write(inode, mapping, iter, pos, bytes);
if (unlikely(ret < 0))
break;
@@ -1096,46 +1021,50 @@ again:
}
pos += ret;
written += ret;
- written2 = max(written, written2);
-
- if (ret != bytes && !inode_locked)
- goto get_inode_lock;
ret = 0;
balance_dirty_pages_ratelimited(mapping);
-
- if (0) {
-get_inode_lock:
- bch2_pagecache_add_put(inode);
- inode_lock(&inode->v);
- inode_locked = true;
- bch2_pagecache_add_get(inode);
-
- iov_iter_revert(iter, written);
- pos -= written;
- written = 0;
- ret = 0;
- }
} while (iov_iter_count(iter));
- bch2_pagecache_add_put(inode);
-unlock:
- if (inode_locked)
- inode_unlock(&inode->v);
- iocb->ki_pos += written;
+ bch2_pagecache_add_put(inode);
- ret = max(written, written2) ?: ret;
- if (ret > 0)
- ret = generic_write_sync(iocb, ret);
- return ret;
+ return written ? written : ret;
}
-ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *iter)
+ssize_t bch2_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
- ssize_t ret = iocb->ki_flags & IOCB_DIRECT
- ? bch2_direct_write(iocb, iter)
- : bch2_buffered_write(iocb, iter);
+ struct file *file = iocb->ki_filp;
+ struct bch_inode_info *inode = file_bch_inode(file);
+ ssize_t ret;
+
+ if (iocb->ki_flags & IOCB_DIRECT) {
+ ret = bch2_direct_write(iocb, from);
+ goto out;
+ }
+
+ inode_lock(&inode->v);
+
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0)
+ goto unlock;
+
+ ret = file_remove_privs(file);
+ if (ret)
+ goto unlock;
+
+ ret = file_update_time(file);
+ if (ret)
+ goto unlock;
+
+ ret = bch2_buffered_write(iocb, from);
+ if (likely(ret > 0))
+ iocb->ki_pos += ret;
+unlock:
+ inode_unlock(&inode->v);
+ if (ret > 0)
+ ret = generic_write_sync(iocb, ret);
+out:
return bch2_err_class(ret);
}
diff --git a/fs/bcachefs/fs-io-buffered.h b/fs/bcachefs/fs-io-buffered.h
index a6126ff790e6..3207ebbb4ab4 100644
--- a/fs/bcachefs/fs-io-buffered.h
+++ b/fs/bcachefs/fs-io-buffered.h
@@ -10,10 +10,10 @@ int bch2_read_folio(struct file *, struct folio *);
int bch2_writepages(struct address_space *, struct writeback_control *);
void bch2_readahead(struct readahead_control *);
-int bch2_write_begin(struct file *, struct address_space *, loff_t,
- unsigned, struct page **, void **);
+int bch2_write_begin(struct file *, struct address_space *, loff_t pos,
+ unsigned len, struct folio **, void **);
int bch2_write_end(struct file *, struct address_space *, loff_t,
- unsigned, unsigned, struct page *, void *);
+ unsigned len, unsigned copied, struct folio *, void *);
ssize_t bch2_write_iter(struct kiocb *, struct iov_iter *);
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 94c392abef65..011817afc3ad 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -177,6 +177,14 @@ static unsigned bch2_inode_hash(subvol_inum inum)
return jhash_3words(inum.subvol, inum.inum >> 32, inum.inum, JHASH_INITVAL);
}
+struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+ return to_bch_ei(ilookup5_nowait(c->vfs_sb,
+ bch2_inode_hash(inum),
+ bch2_iget5_test,
+ &inum));
+}
+
static struct bch_inode_info *bch2_inode_insert(struct bch_fs *c, struct bch_inode_info *inode)
{
subvol_inum inum = inode_inum(inode);
@@ -1644,14 +1652,16 @@ again:
break;
}
} else if (clean_pass && this_pass_clean) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
mutex_unlock(&c->vfs_inodes_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
goto again;
}
}
diff --git a/fs/bcachefs/fs.h b/fs/bcachefs/fs.h
index c3af7225ff69..990ec43e0365 100644
--- a/fs/bcachefs/fs.h
+++ b/fs/bcachefs/fs.h
@@ -56,6 +56,8 @@ static inline subvol_inum inode_inum(struct bch_inode_info *inode)
};
}
+struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *, subvol_inum);
+
/*
* Set if we've gotten a btree error for this inode, and thus the vfs inode and
* btree inode may be inconsistent:
@@ -194,6 +196,11 @@ int bch2_vfs_init(void);
#define bch2_inode_update_after_write(_trans, _inode, _inode_u, _fields) ({ do {} while (0); })
+static inline struct bch_inode_info *__bch2_inode_hash_find(struct bch_fs *c, subvol_inum inum)
+{
+ return NULL;
+}
+
static inline void bch2_evict_subvolume_inodes(struct bch_fs *c,
snapshot_id_list *s) {}
static inline void bch2_vfs_exit(void) {}
diff --git a/fs/bcachefs/fsck.c b/fs/bcachefs/fsck.c
index 83bd31b44aad..9b3470a97546 100644
--- a/fs/bcachefs/fsck.c
+++ b/fs/bcachefs/fsck.c
@@ -8,6 +8,7 @@
#include "darray.h"
#include "dirent.h"
#include "error.h"
+#include "fs.h"
#include "fs-common.h"
#include "fsck.h"
#include "inode.h"
@@ -962,6 +963,22 @@ fsck_err:
return ret;
}
+static bool bch2_inode_open(struct bch_fs *c, struct bpos p)
+{
+ subvol_inum inum = {
+ .subvol = snapshot_t(c, p.snapshot)->subvol,
+ .inum = p.offset,
+ };
+
+ /* snapshot tree corruption, can't safely delete */
+ if (!inum.subvol) {
+ bch_err_ratelimited(c, "%s(): snapshot %u has no subvol", __func__, p.snapshot);
+ return true;
+ }
+
+ return __bch2_inode_hash_find(c, inum) != NULL;
+}
+
static int check_inode(struct btree_trans *trans,
struct btree_iter *iter,
struct bkey_s_c k,
@@ -1040,6 +1057,7 @@ static int check_inode(struct btree_trans *trans,
}
if (u.bi_flags & BCH_INODE_unlinked &&
+ !bch2_inode_open(c, k.k->p) &&
(!c->sb.clean ||
fsck_err(trans, inode_unlinked_but_clean,
"filesystem marked clean, but inode %llu unlinked",
diff --git a/fs/bcachefs/replicas.c b/fs/bcachefs/replicas.c
index 12b1d28b7eb4..1f34c92a6d11 100644
--- a/fs/bcachefs/replicas.c
+++ b/fs/bcachefs/replicas.c
@@ -82,7 +82,8 @@ int bch2_replicas_entry_validate(struct bch_replicas_entry_v1 *r,
}
for (unsigned i = 0; i < r->nr_devs; i++)
- if (!bch2_member_exists(sb, r->devs[i])) {
+ if (r->devs[i] != BCH_SB_MEMBER_INVALID &&
+ !bch2_member_exists(sb, r->devs[i])) {
prt_printf(err, "invalid device %u in entry ", r->devs[i]);
goto bad;
}
@@ -795,7 +796,7 @@ bool bch2_have_enough_devs(struct bch_fs *c, struct bch_devs_mask devs,
nr_online += test_bit(e->devs[i], devs.d);
struct bch_dev *ca = bch2_dev_rcu(c, e->devs[i]);
- nr_failed += ca && ca->mi.state == BCH_MEMBER_STATE_failed;
+ nr_failed += !ca || ca->mi.state == BCH_MEMBER_STATE_failed;
}
rcu_read_unlock();
diff --git a/fs/bcachefs/sb-errors_format.h b/fs/bcachefs/sb-errors_format.h
index d3a498617303..f0c14702f9e6 100644
--- a/fs/bcachefs/sb-errors_format.h
+++ b/fs/bcachefs/sb-errors_format.h
@@ -23,7 +23,7 @@ enum bch_fsck_flags {
x(jset_past_bucket_end, 9, 0) \
x(jset_seq_blacklisted, 10, 0) \
x(journal_entries_missing, 11, 0) \
- x(journal_entry_replicas_not_marked, 12, 0) \
+ x(journal_entry_replicas_not_marked, 12, FSCK_AUTOFIX) \
x(journal_entry_past_jset_end, 13, 0) \
x(journal_entry_replicas_data_mismatch, 14, 0) \
x(journal_entry_bkey_u64s_0, 15, 0) \
@@ -288,10 +288,10 @@ enum bch_fsck_flags {
x(invalid_btree_id, 274, 0) \
x(alloc_key_io_time_bad, 275, 0) \
x(alloc_key_fragmentation_lru_wrong, 276, FSCK_AUTOFIX) \
- x(accounting_key_junk_at_end, 277, 0) \
- x(accounting_key_replicas_nr_devs_0, 278, 0) \
- x(accounting_key_replicas_nr_required_bad, 279, 0) \
- x(accounting_key_replicas_devs_unsorted, 280, 0) \
+ x(accounting_key_junk_at_end, 277, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_nr_devs_0, 278, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_nr_required_bad, 279, FSCK_AUTOFIX) \
+ x(accounting_key_replicas_devs_unsorted, 280, FSCK_AUTOFIX) \
enum bch_sb_error_id {
#define x(t, n, ...) BCH_FSCK_ERR_##t = n,
diff --git a/fs/bcachefs/sb-members.c b/fs/bcachefs/sb-members.c
index 39196f2a4197..4b765422dd77 100644
--- a/fs/bcachefs/sb-members.c
+++ b/fs/bcachefs/sb-members.c
@@ -11,7 +11,8 @@
void bch2_dev_missing(struct bch_fs *c, unsigned dev)
{
- bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
+ if (dev != BCH_SB_MEMBER_INVALID)
+ bch2_fs_inconsistent(c, "pointer to nonexistent device %u", dev);
}
void bch2_dev_bucket_missing(struct bch_fs *c, struct bpos bucket)
diff --git a/fs/bcachefs/sb-members_format.h b/fs/bcachefs/sb-members_format.h
index e2630548c0f6..d727d2dfda08 100644
--- a/fs/bcachefs/sb-members_format.h
+++ b/fs/bcachefs/sb-members_format.h
@@ -8,6 +8,11 @@
*/
#define BCH_SB_MEMBERS_MAX 64
+/*
+ * Sentinal value - indicates a device that does not exist
+ */
+#define BCH_SB_MEMBER_INVALID 255
+
#define BCH_MIN_NR_NBUCKETS (1 << 6)
#define BCH_IOPS_MEASUREMENTS() \
diff --git a/fs/bcachefs/sysfs.c b/fs/bcachefs/sysfs.c
index f393023a3ae2..33f2a64c14c9 100644
--- a/fs/bcachefs/sysfs.c
+++ b/fs/bcachefs/sysfs.c
@@ -461,7 +461,7 @@ STORE(bch2_fs)
sc.gfp_mask = GFP_KERNEL;
sc.nr_to_scan = strtoul_or_return(buf);
- c->btree_key_cache.shrink->scan_objects(c->btree_cache.shrink, &sc);
+ c->btree_key_cache.shrink->scan_objects(c->btree_key_cache.shrink, &sc);
}
if (attr == &sysfs_trigger_gc)