aboutsummaryrefslogtreecommitdiff
path: root/fs/btrfs/file.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/btrfs/file.c')
-rw-r--r--fs/btrfs/file.c403
1 files changed, 295 insertions, 108 deletions
diff --git a/fs/btrfs/file.c b/fs/btrfs/file.c
index f47731c45bb5..e764ac3f22e2 100644
--- a/fs/btrfs/file.c
+++ b/fs/btrfs/file.c
@@ -22,10 +22,8 @@
#include "disk-io.h"
#include "transaction.h"
#include "btrfs_inode.h"
-#include "print-tree.h"
#include "tree-log.h"
#include "locking.h"
-#include "volumes.h"
#include "qgroup.h"
#include "compression.h"
#include "delalloc-space.h"
@@ -111,8 +109,8 @@ static void btrfs_drop_pages(struct btrfs_fs_info *fs_info,
* accessed as prepare_pages should have marked them accessed
* in prepare_pages via find_or_create_page()
*/
- btrfs_page_clamp_clear_checked(fs_info, pages[i], block_start,
- block_len);
+ btrfs_folio_clamp_clear_checked(fs_info, page_folio(pages[i]),
+ block_start, block_len);
unlock_page(pages[i]);
put_page(pages[i]);
}
@@ -130,7 +128,7 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
struct extent_state **cached, bool noreserve)
{
struct btrfs_fs_info *fs_info = inode->root->fs_info;
- int err = 0;
+ int ret = 0;
int i;
u64 num_bytes;
u64 start_pos;
@@ -160,17 +158,20 @@ int btrfs_dirty_pages(struct btrfs_inode *inode, struct page **pages,
EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING | EXTENT_DEFRAG,
cached);
- err = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
+ ret = btrfs_set_extent_delalloc(inode, start_pos, end_of_last_block,
extra_bits, cached);
- if (err)
- return err;
+ if (ret)
+ return ret;
for (i = 0; i < num_pages; i++) {
struct page *p = pages[i];
- btrfs_page_clamp_set_uptodate(fs_info, p, start_pos, num_bytes);
- btrfs_page_clamp_clear_checked(fs_info, p, start_pos, num_bytes);
- btrfs_page_clamp_set_dirty(fs_info, p, start_pos, num_bytes);
+ btrfs_folio_clamp_set_uptodate(fs_info, page_folio(p),
+ start_pos, num_bytes);
+ btrfs_folio_clamp_clear_checked(fs_info, page_folio(p),
+ start_pos, num_bytes);
+ btrfs_folio_clamp_set_dirty(fs_info, page_folio(p),
+ start_pos, num_bytes);
}
/*
@@ -205,7 +206,6 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
struct btrfs_fs_info *fs_info = root->fs_info;
struct extent_buffer *leaf;
struct btrfs_file_extent_item *fi;
- struct btrfs_ref ref = { 0 };
struct btrfs_key key;
struct btrfs_key new_key;
u64 ino = btrfs_ino(inode);
@@ -245,7 +245,7 @@ int btrfs_drop_extents(struct btrfs_trans_handle *trans,
if (args->start >= inode->disk_i_size && !args->replace_extent)
modify_tree = 0;
- update_refs = (root->root_key.objectid != BTRFS_TREE_LOG_OBJECTID);
+ update_refs = (btrfs_root_id(root) != BTRFS_TREE_LOG_OBJECTID);
while (1) {
recow = 0;
ret = btrfs_lookup_file_extent(trans, root, path, ino,
@@ -372,15 +372,17 @@ next_slot:
btrfs_mark_buffer_dirty(trans, leaf);
if (update_refs && disk_bytenr > 0) {
- btrfs_init_generic_ref(&ref,
- BTRFS_ADD_DELAYED_REF,
- disk_bytenr, num_bytes, 0,
- root->root_key.objectid);
- btrfs_init_data_ref(&ref,
- root->root_key.objectid,
- new_key.objectid,
- args->start - extent_offset,
- 0, false);
+ struct btrfs_ref ref = {
+ .action = BTRFS_ADD_DELAYED_REF,
+ .bytenr = disk_bytenr,
+ .num_bytes = num_bytes,
+ .parent = 0,
+ .owning_root = btrfs_root_id(root),
+ .ref_root = btrfs_root_id(root),
+ };
+ btrfs_init_data_ref(&ref, new_key.objectid,
+ args->start - extent_offset,
+ 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -463,15 +465,17 @@ delete_extent_item:
extent_end = ALIGN(extent_end,
fs_info->sectorsize);
} else if (update_refs && disk_bytenr > 0) {
- btrfs_init_generic_ref(&ref,
- BTRFS_DROP_DELAYED_REF,
- disk_bytenr, num_bytes, 0,
- root->root_key.objectid);
- btrfs_init_data_ref(&ref,
- root->root_key.objectid,
- key.objectid,
- key.offset - extent_offset, 0,
- false);
+ struct btrfs_ref ref = {
+ .action = BTRFS_DROP_DELAYED_REF,
+ .bytenr = disk_bytenr,
+ .num_bytes = num_bytes,
+ .parent = 0,
+ .owning_root = btrfs_root_id(root),
+ .ref_root = btrfs_root_id(root),
+ };
+ btrfs_init_data_ref(&ref, key.objectid,
+ key.offset - extent_offset,
+ 0, false);
ret = btrfs_free_extent(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -747,10 +751,13 @@ again:
extent_end - split);
btrfs_mark_buffer_dirty(trans, leaf);
- btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF, bytenr,
- num_bytes, 0, root->root_key.objectid);
- btrfs_init_data_ref(&ref, root->root_key.objectid, ino,
- orig_offset, 0, false);
+ ref.action = BTRFS_ADD_DELAYED_REF;
+ ref.bytenr = bytenr;
+ ref.num_bytes = num_bytes;
+ ref.parent = 0;
+ ref.owning_root = btrfs_root_id(root);
+ ref.ref_root = btrfs_root_id(root);
+ btrfs_init_data_ref(&ref, ino, orig_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
if (ret) {
btrfs_abort_transaction(trans, ret);
@@ -773,10 +780,14 @@ again:
other_start = end;
other_end = 0;
- btrfs_init_generic_ref(&ref, BTRFS_DROP_DELAYED_REF, bytenr,
- num_bytes, 0, root->root_key.objectid);
- btrfs_init_data_ref(&ref, root->root_key.objectid, ino, orig_offset,
- 0, false);
+
+ ref.action = BTRFS_DROP_DELAYED_REF;
+ ref.bytenr = bytenr;
+ ref.num_bytes = num_bytes;
+ ref.parent = 0;
+ ref.owning_root = btrfs_root_id(root);
+ ref.ref_root = btrfs_root_id(root);
+ btrfs_init_data_ref(&ref, ino, orig_offset, 0, false);
if (extent_mergeable(leaf, path->slots[0] + 1,
ino, bytenr, orig_offset,
&other_start, &other_end)) {
@@ -869,9 +880,9 @@ static int prepare_uptodate_page(struct inode *inode,
* released.
*
* The private flag check is essential for subpage as we need
- * to store extra bitmap using page->private.
+ * to store extra bitmap using folio private.
*/
- if (page->mapping != inode->i_mapping || !PagePrivate(page)) {
+ if (page->mapping != inode->i_mapping || !folio_test_private(folio)) {
unlock_page(page);
return -EAGAIN;
}
@@ -914,7 +925,7 @@ static noinline int prepare_pages(struct inode *inode, struct page **pages,
unsigned long index = pos >> PAGE_SHIFT;
gfp_t mask = get_prepare_gfp_flags(inode, nowait);
fgf_t fgp_flags = get_prepare_fgp_flags(nowait);
- int err = 0;
+ int ret = 0;
int faili;
for (i = 0; i < num_pages; i++) {
@@ -924,28 +935,28 @@ again:
if (!pages[i]) {
faili = i - 1;
if (nowait)
- err = -EAGAIN;
+ ret = -EAGAIN;
else
- err = -ENOMEM;
+ ret = -ENOMEM;
goto fail;
}
- err = set_page_extent_mapped(pages[i]);
- if (err < 0) {
+ ret = set_page_extent_mapped(pages[i]);
+ if (ret < 0) {
faili = i;
goto fail;
}
if (i == 0)
- err = prepare_uptodate_page(inode, pages[i], pos,
+ ret = prepare_uptodate_page(inode, pages[i], pos,
force_uptodate);
- if (!err && i == num_pages - 1)
- err = prepare_uptodate_page(inode, pages[i],
+ if (!ret && i == num_pages - 1)
+ ret = prepare_uptodate_page(inode, pages[i],
pos + write_bytes, false);
- if (err) {
+ if (ret) {
put_page(pages[i]);
- if (!nowait && err == -EAGAIN) {
- err = 0;
+ if (!nowait && ret == -EAGAIN) {
+ ret = 0;
goto again;
}
faili = i - 1;
@@ -961,7 +972,7 @@ fail:
put_page(pages[faili]);
faili--;
}
- return err;
+ return ret;
}
@@ -1134,7 +1145,7 @@ static int btrfs_write_check(struct kiocb *iocb, struct iov_iter *from,
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos = iocb->ki_pos;
int ret;
loff_t oldsize;
@@ -1182,7 +1193,7 @@ static noinline ssize_t btrfs_buffered_write(struct kiocb *iocb,
struct file *file = iocb->ki_filp;
loff_t pos;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct page **pages = NULL;
struct extent_changeset *data_reserved = NULL;
u64 release_bytes = 0;
@@ -1458,13 +1469,13 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
loff_t pos;
ssize_t written = 0;
ssize_t written_buffered;
size_t prev_left = 0;
loff_t endbyte;
- ssize_t err;
+ ssize_t ret;
unsigned int ilock_flags = 0;
struct iomap_dio *dio;
@@ -1481,9 +1492,9 @@ static ssize_t btrfs_direct_write(struct kiocb *iocb, struct iov_iter *from)
ilock_flags |= BTRFS_ILOCK_SHARED;
relock:
- err = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
- if (err < 0)
- return err;
+ ret = btrfs_inode_lock(BTRFS_I(inode), ilock_flags);
+ if (ret < 0)
+ return ret;
/* Shared lock cannot be used with security bits set. */
if ((ilock_flags & BTRFS_ILOCK_SHARED) && !IS_NOSEC(inode)) {
@@ -1492,14 +1503,14 @@ relock:
goto relock;
}
- err = generic_write_checks(iocb, from);
- if (err <= 0) {
+ ret = generic_write_checks(iocb, from);
+ if (ret <= 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
- return err;
+ return ret;
}
- err = btrfs_write_check(iocb, from, err);
- if (err < 0) {
+ ret = btrfs_write_check(iocb, from, ret);
+ if (ret < 0) {
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
goto out;
}
@@ -1551,15 +1562,15 @@ relock:
btrfs_inode_unlock(BTRFS_I(inode), ilock_flags);
if (IS_ERR_OR_NULL(dio))
- err = PTR_ERR_OR_ZERO(dio);
+ ret = PTR_ERR_OR_ZERO(dio);
else
- err = iomap_dio_complete(dio);
+ ret = iomap_dio_complete(dio);
/* No increment (+=) because iomap returns a cumulative value. */
- if (err > 0)
- written = err;
+ if (ret > 0)
+ written = ret;
- if (iov_iter_count(from) > 0 && (err == -EFAULT || err > 0)) {
+ if (iov_iter_count(from) > 0 && (ret == -EFAULT || ret > 0)) {
const size_t left = iov_iter_count(from);
/*
* We have more data left to write. Try to fault in as many as
@@ -1576,7 +1587,7 @@ relock:
* to buffered IO in case we haven't made any progress.
*/
if (left == prev_left) {
- err = -ENOTBLK;
+ ret = -ENOTBLK;
} else {
fault_in_iov_iter_readable(from, left);
prev_left = left;
@@ -1585,10 +1596,10 @@ relock:
}
/*
- * If 'err' is -ENOTBLK or we have not written all data, then it means
+ * If 'ret' is -ENOTBLK or we have not written all data, then it means
* we must fallback to buffered IO.
*/
- if ((err < 0 && err != -ENOTBLK) || !iov_iter_count(from))
+ if ((ret < 0 && ret != -ENOTBLK) || !iov_iter_count(from))
goto out;
buffered:
@@ -1599,14 +1610,14 @@ buffered:
* below, we will block when flushing and waiting for the IO.
*/
if (iocb->ki_flags & IOCB_NOWAIT) {
- err = -EAGAIN;
+ ret = -EAGAIN;
goto out;
}
pos = iocb->ki_pos;
written_buffered = btrfs_buffered_write(iocb, from);
if (written_buffered < 0) {
- err = written_buffered;
+ ret = written_buffered;
goto out;
}
/*
@@ -1614,18 +1625,18 @@ buffered:
* able to read what was just written.
*/
endbyte = pos + written_buffered - 1;
- err = btrfs_fdatawrite_range(inode, pos, endbyte);
- if (err)
+ ret = btrfs_fdatawrite_range(inode, pos, endbyte);
+ if (ret)
goto out;
- err = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
- if (err)
+ ret = filemap_fdatawait_range(inode->i_mapping, pos, endbyte);
+ if (ret)
goto out;
written += written_buffered;
iocb->ki_pos = pos + written_buffered;
invalidate_mapping_pages(file->f_mapping, pos >> PAGE_SHIFT,
endbyte >> PAGE_SHIFT);
out:
- return err < 0 ? err : written;
+ return ret < 0 ? ret : written;
}
static ssize_t btrfs_encoded_write(struct kiocb *iocb, struct iov_iter *from,
@@ -1784,7 +1795,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
{
struct dentry *dentry = file_dentry(file);
struct inode *inode = d_inode(dentry);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct btrfs_trans_handle *trans;
struct btrfs_log_ctx ctx;
@@ -1909,6 +1920,8 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
goto out_release_extents;
}
+ btrfs_init_log_ctx_scratch_eb(&ctx);
+
/*
* We use start here because we will need to wait on the IO to complete
* in btrfs_sync_log, which could require joining a transaction (for
@@ -1928,6 +1941,15 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
trans->in_fsync = true;
ret = btrfs_log_dentry_safe(trans, dentry, &ctx);
+ /*
+ * Scratch eb no longer needed, release before syncing log or commit
+ * transaction, to avoid holding unnecessary memory during such long
+ * operations.
+ */
+ if (ctx.scratch_eb) {
+ free_extent_buffer(ctx.scratch_eb);
+ ctx.scratch_eb = NULL;
+ }
btrfs_release_log_ctx_extents(&ctx);
if (ret < 0) {
/* Fallthrough and commit/free transaction. */
@@ -2003,6 +2025,7 @@ int btrfs_sync_file(struct file *file, loff_t start, loff_t end, int datasync)
ret = btrfs_commit_transaction(trans);
out:
+ free_extent_buffer(ctx.scratch_eb);
ASSERT(list_empty(&ctx.list));
ASSERT(list_empty(&ctx.conflict_inodes));
err = file_check_and_advance_wb_err(file);
@@ -2016,6 +2039,172 @@ out_release_extents:
goto out;
}
+/*
+ * btrfs_page_mkwrite() is not allowed to change the file size as it gets
+ * called from a page fault handler when a page is first dirtied. Hence we must
+ * be careful to check for EOF conditions here. We set the page up correctly
+ * for a written page which means we get ENOSPC checking when writing into
+ * holes and correct delalloc and unwritten extent mapping on filesystems that
+ * support these features.
+ *
+ * We are not allowed to take the i_mutex here so we have to play games to
+ * protect against truncate races as the page could now be beyond EOF. Because
+ * truncate_setsize() writes the inode size before removing pages, once we have
+ * the page lock we can determine safely if the page is beyond EOF. If it is not
+ * beyond EOF, then the page is guaranteed safe against truncation until we
+ * unlock the page.
+ */
+static vm_fault_t btrfs_page_mkwrite(struct vm_fault *vmf)
+{
+ struct page *page = vmf->page;
+ struct folio *folio = page_folio(page);
+ struct inode *inode = file_inode(vmf->vma->vm_file);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
+ struct extent_io_tree *io_tree = &BTRFS_I(inode)->io_tree;
+ struct btrfs_ordered_extent *ordered;
+ struct extent_state *cached_state = NULL;
+ struct extent_changeset *data_reserved = NULL;
+ unsigned long zero_start;
+ loff_t size;
+ vm_fault_t ret;
+ int ret2;
+ int reserved = 0;
+ u64 reserved_space;
+ u64 page_start;
+ u64 page_end;
+ u64 end;
+
+ ASSERT(folio_order(folio) == 0);
+
+ reserved_space = PAGE_SIZE;
+
+ sb_start_pagefault(inode->i_sb);
+ page_start = page_offset(page);
+ page_end = page_start + PAGE_SIZE - 1;
+ end = page_end;
+
+ /*
+ * Reserving delalloc space after obtaining the page lock can lead to
+ * deadlock. For example, if a dirty page is locked by this function
+ * and the call to btrfs_delalloc_reserve_space() ends up triggering
+ * dirty page write out, then the btrfs_writepages() function could
+ * end up waiting indefinitely to get a lock on the page currently
+ * being processed by btrfs_page_mkwrite() function.
+ */
+ ret2 = btrfs_delalloc_reserve_space(BTRFS_I(inode), &data_reserved,
+ page_start, reserved_space);
+ if (!ret2) {
+ ret2 = file_update_time(vmf->vma->vm_file);
+ reserved = 1;
+ }
+ if (ret2) {
+ ret = vmf_error(ret2);
+ if (reserved)
+ goto out;
+ goto out_noreserve;
+ }
+
+ /* Make the VM retry the fault. */
+ ret = VM_FAULT_NOPAGE;
+again:
+ down_read(&BTRFS_I(inode)->i_mmap_lock);
+ lock_page(page);
+ size = i_size_read(inode);
+
+ if ((page->mapping != inode->i_mapping) ||
+ (page_start >= size)) {
+ /* Page got truncated out from underneath us. */
+ goto out_unlock;
+ }
+ wait_on_page_writeback(page);
+
+ lock_extent(io_tree, page_start, page_end, &cached_state);
+ ret2 = set_page_extent_mapped(page);
+ if (ret2 < 0) {
+ ret = vmf_error(ret2);
+ unlock_extent(io_tree, page_start, page_end, &cached_state);
+ goto out_unlock;
+ }
+
+ /*
+ * We can't set the delalloc bits if there are pending ordered
+ * extents. Drop our locks and wait for them to finish.
+ */
+ ordered = btrfs_lookup_ordered_range(BTRFS_I(inode), page_start, PAGE_SIZE);
+ if (ordered) {
+ unlock_extent(io_tree, page_start, page_end, &cached_state);
+ unlock_page(page);
+ up_read(&BTRFS_I(inode)->i_mmap_lock);
+ btrfs_start_ordered_extent(ordered);
+ btrfs_put_ordered_extent(ordered);
+ goto again;
+ }
+
+ if (page->index == ((size - 1) >> PAGE_SHIFT)) {
+ reserved_space = round_up(size - page_start, fs_info->sectorsize);
+ if (reserved_space < PAGE_SIZE) {
+ end = page_start + reserved_space - 1;
+ btrfs_delalloc_release_space(BTRFS_I(inode),
+ data_reserved, page_start,
+ PAGE_SIZE - reserved_space, true);
+ }
+ }
+
+ /*
+ * page_mkwrite gets called when the page is firstly dirtied after it's
+ * faulted in, but write(2) could also dirty a page and set delalloc
+ * bits, thus in this case for space account reason, we still need to
+ * clear any delalloc bits within this page range since we have to
+ * reserve data&meta space before lock_page() (see above comments).
+ */
+ clear_extent_bit(&BTRFS_I(inode)->io_tree, page_start, end,
+ EXTENT_DELALLOC | EXTENT_DO_ACCOUNTING |
+ EXTENT_DEFRAG, &cached_state);
+
+ ret2 = btrfs_set_extent_delalloc(BTRFS_I(inode), page_start, end, 0,
+ &cached_state);
+ if (ret2) {
+ unlock_extent(io_tree, page_start, page_end, &cached_state);
+ ret = VM_FAULT_SIGBUS;
+ goto out_unlock;
+ }
+
+ /* Page is wholly or partially inside EOF. */
+ if (page_start + PAGE_SIZE > size)
+ zero_start = offset_in_page(size);
+ else
+ zero_start = PAGE_SIZE;
+
+ if (zero_start != PAGE_SIZE)
+ memzero_page(page, zero_start, PAGE_SIZE - zero_start);
+
+ btrfs_folio_clear_checked(fs_info, folio, page_start, PAGE_SIZE);
+ btrfs_folio_set_dirty(fs_info, folio, page_start, end + 1 - page_start);
+ btrfs_folio_set_uptodate(fs_info, folio, page_start, end + 1 - page_start);
+
+ btrfs_set_inode_last_sub_trans(BTRFS_I(inode));
+
+ unlock_extent(io_tree, page_start, page_end, &cached_state);
+ up_read(&BTRFS_I(inode)->i_mmap_lock);
+
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ sb_end_pagefault(inode->i_sb);
+ extent_changeset_free(data_reserved);
+ return VM_FAULT_LOCKED;
+
+out_unlock:
+ unlock_page(page);
+ up_read(&BTRFS_I(inode)->i_mmap_lock);
+out:
+ btrfs_delalloc_release_extents(BTRFS_I(inode), PAGE_SIZE);
+ btrfs_delalloc_release_space(BTRFS_I(inode), data_reserved, page_start,
+ reserved_space, (ret != 0));
+out_noreserve:
+ sb_end_pagefault(inode->i_sb);
+ extent_changeset_free(data_reserved);
+ return ret;
+}
+
static const struct vm_operations_struct btrfs_file_vm_ops = {
.fault = filemap_fault,
.map_pages = filemap_map_pages,
@@ -2150,7 +2339,6 @@ out:
hole_em->block_start = EXTENT_MAP_HOLE;
hole_em->block_len = 0;
hole_em->orig_block_len = 0;
- hole_em->compress_type = BTRFS_COMPRESS_NONE;
hole_em->generation = trans->transid;
ret = btrfs_replace_extent_map_range(inode, hole_em, true);
@@ -2174,7 +2362,7 @@ static int find_first_non_hole(struct btrfs_inode *inode, u64 *start, u64 *len)
struct extent_map *em;
int ret = 0;
- em = btrfs_get_extent(inode, NULL, 0,
+ em = btrfs_get_extent(inode, NULL,
round_down(*start, fs_info->sectorsize),
round_up(*len, fs_info->sectorsize));
if (IS_ERR(em))
@@ -2246,7 +2434,6 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
struct extent_buffer *leaf;
struct btrfs_key key;
int slot;
- struct btrfs_ref ref = { 0 };
int ret;
if (replace_len == 0)
@@ -2302,15 +2489,17 @@ static int btrfs_insert_replace_extent(struct btrfs_trans_handle *trans,
extent_info->qgroup_reserved,
&key);
} else {
+ struct btrfs_ref ref = {
+ .action = BTRFS_ADD_DELAYED_REF,
+ .bytenr = extent_info->disk_offset,
+ .num_bytes = extent_info->disk_len,
+ .owning_root = btrfs_root_id(root),
+ .ref_root = btrfs_root_id(root),
+ };
u64 ref_offset;
- btrfs_init_generic_ref(&ref, BTRFS_ADD_DELAYED_REF,
- extent_info->disk_offset,
- extent_info->disk_len, 0,
- root->root_key.objectid);
ref_offset = extent_info->file_offset - extent_info->data_offset;
- btrfs_init_data_ref(&ref, root->root_key.objectid,
- btrfs_ino(inode), ref_offset, 0, false);
+ btrfs_init_data_ref(&ref, btrfs_ino(inode), ref_offset, 0, false);
ret = btrfs_inc_extent_ref(trans, &ref);
}
@@ -2591,7 +2780,7 @@ out:
static int btrfs_punch_hole(struct file *file, loff_t offset, loff_t len)
{
struct inode *inode = file_inode(file);
- struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb);
+ struct btrfs_fs_info *fs_info = inode_to_fs_info(inode);
struct btrfs_root *root = BTRFS_I(inode)->root;
struct extent_state *cached_state = NULL;
struct btrfs_path *path;
@@ -2833,13 +3022,13 @@ static int btrfs_zero_range_check_range_boundary(struct btrfs_inode *inode,
int ret;
offset = round_down(offset, sectorsize);
- em = btrfs_get_extent(inode, NULL, 0, offset, sectorsize);
+ em = btrfs_get_extent(inode, NULL, offset, sectorsize);
if (IS_ERR(em))
return PTR_ERR(em);
if (em->block_start == EXTENT_MAP_HOLE)
ret = RANGE_BOUNDARY_HOLE;
- else if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags))
+ else if (em->flags & EXTENT_FLAG_PREALLOC)
ret = RANGE_BOUNDARY_PREALLOC_EXTENT;
else
ret = RANGE_BOUNDARY_WRITTEN_EXTENT;
@@ -2864,7 +3053,7 @@ static int btrfs_zero_range(struct inode *inode,
u64 bytes_to_reserve = 0;
bool space_reserved = false;
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start,
alloc_end - alloc_start);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -2879,8 +3068,7 @@ static int btrfs_zero_range(struct inode *inode,
* extents and holes, we drop all the existing extents and allocate a
* new prealloc extent, so that we get a larger contiguous disk extent.
*/
- if (em->start <= alloc_start &&
- test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ if (em->start <= alloc_start && (em->flags & EXTENT_FLAG_PREALLOC)) {
const u64 em_end = em->start + em->len;
if (em_end >= offset + len) {
@@ -2908,14 +3096,13 @@ static int btrfs_zero_range(struct inode *inode,
if (BTRFS_BYTES_TO_BLKS(fs_info, offset) ==
BTRFS_BYTES_TO_BLKS(fs_info, offset + len - 1)) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, alloc_start,
- sectorsize);
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, alloc_start, sectorsize);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
goto out;
}
- if (test_bit(EXTENT_FLAG_PREALLOC, &em->flags)) {
+ if (em->flags & EXTENT_FLAG_PREALLOC) {
free_extent_map(em);
ret = btrfs_fallocate_update_isize(inode, offset + len,
mode);
@@ -3004,7 +3191,7 @@ reserve_space:
}
ret = btrfs_prealloc_file_range(inode, mode, alloc_start,
alloc_end - alloc_start,
- i_blocksize(inode),
+ fs_info->sectorsize,
offset + len, &alloc_hint);
unlock_extent(&BTRFS_I(inode)->io_tree, lockstart, lockend,
&cached_state);
@@ -3048,7 +3235,7 @@ static long btrfs_fallocate(struct file *file, int mode,
int ret;
/* Do not allow fallocate in ZONED mode */
- if (btrfs_is_zoned(btrfs_sb(inode->i_sb)))
+ if (btrfs_is_zoned(inode_to_fs_info(inode)))
return -EOPNOTSUPP;
alloc_start = round_down(offset, blocksize);
@@ -3125,7 +3312,7 @@ static long btrfs_fallocate(struct file *file, int mode,
/* First, check if we exceed the qgroup limit */
while (cur_offset < alloc_end) {
- em = btrfs_get_extent(BTRFS_I(inode), NULL, 0, cur_offset,
+ em = btrfs_get_extent(BTRFS_I(inode), NULL, cur_offset,
alloc_end - cur_offset);
if (IS_ERR(em)) {
ret = PTR_ERR(em);
@@ -3136,7 +3323,7 @@ static long btrfs_fallocate(struct file *file, int mode,
last_byte = ALIGN(last_byte, blocksize);
if (em->block_start == EXTENT_MAP_HOLE ||
(cur_offset >= inode->i_size &&
- !test_bit(EXTENT_FLAG_PREALLOC, &em->flags))) {
+ !(em->flags & EXTENT_FLAG_PREALLOC))) {
const u64 range_len = last_byte - cur_offset;
ret = add_falloc_range(&reserve_list, cur_offset, range_len);
@@ -3176,7 +3363,7 @@ static long btrfs_fallocate(struct file *file, int mode,
if (!ret) {
ret = btrfs_prealloc_file_range(inode, mode,
range->start,
- range->len, i_blocksize(inode),
+ range->len, blocksize,
offset + len, &alloc_hint);
/*
* btrfs_prealloc_file_range() releases space even
@@ -3192,7 +3379,7 @@ static long btrfs_fallocate(struct file *file, int mode,
qgroup_reserved -= range->len;
} else if (qgroup_reserved > 0) {
btrfs_qgroup_free_data(BTRFS_I(inode), data_reserved,
- range->start, range->len);
+ range->start, range->len, NULL);
qgroup_reserved -= range->len;
}
list_del(&range->list);
@@ -3709,8 +3896,7 @@ static int btrfs_file_open(struct inode *inode, struct file *filp)
{
int ret;
- filp->f_mode |= FMODE_NOWAIT | FMODE_BUF_RASYNC | FMODE_BUF_WASYNC |
- FMODE_CAN_ODIRECT;
+ filp->f_mode |= FMODE_NOWAIT | FMODE_CAN_ODIRECT;
ret = fsverity_file_open(inode, filp);
if (ret)
@@ -3753,7 +3939,7 @@ static ssize_t btrfs_direct_read(struct kiocb *iocb, struct iov_iter *to)
if (fsverity_active(inode))
return 0;
- if (check_direct_read(btrfs_sb(inode->i_sb), to, iocb->ki_pos))
+ if (check_direct_read(inode_to_fs_info(inode), to, iocb->ki_pos))
return 0;
btrfs_inode_lock(BTRFS_I(inode), BTRFS_ILOCK_SHARED);
@@ -3840,6 +4026,7 @@ const struct file_operations btrfs_file_operations = {
.compat_ioctl = btrfs_compat_ioctl,
#endif
.remap_file_range = btrfs_remap_file_range,
+ .fop_flags = FOP_BUFFER_RASYNC | FOP_BUFFER_WASYNC,
};
int btrfs_fdatawrite_range(struct inode *inode, loff_t start, loff_t end)