diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 11:34:19 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2019-05-07 11:34:19 -0700 |
commit | 9f2e3a53f7ec9ef55e9d01bc29a6285d291c151e (patch) | |
tree | c25b0eb20dac1a39a6b55c521b2658dcceb7d532 /fs/btrfs/extent_io.h | |
parent | 78438ce18f26dbcaa8993bb45d20ffb0cec3bc3e (diff) | |
parent | b1c16ac978fd40ae636e629bb69a652df7eebdc2 (diff) |
Merge tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux
Pull btrfs updates from David Sterba:
"This time the majority of changes are cleanups, though there's still a
number of changes of user interest.
User visible changes:
- better read time and write checks to catch errors early and before
writing data to disk (to catch potential memory corruption on data
that get checksummed)
- qgroups + metadata relocation: last speed up patch int the series
to address the slowness, there should be no overhead comparing
balance with and without qgroups
- FIEMAP ioctl does not start a transaction unnecessarily, this can
result in a speed up and less blocking due to IO
- LOGICAL_INO (v1, v2) does not start transaction unnecessarily, this
can speed up the mentioned ioctl and scrub as well
- fsync on files with many (but not too many) hardlinks is faster,
finer decision if the links should be fsynced individually or
completely
- send tries harder to find ranges to clone
- trim/discard will skip unallocated chunks that haven't been touched
since the last mount
Fixes:
- send flushes delayed allocation before start, otherwise it could
miss some changes in case of a very recent rw->ro switch of a
subvolume
- fix fallocate with qgroups that could lead to space accounting
underflow, reported as a warning
- trim/discard ioctl honours the requested range
- starting send and dedupe on a subvolume at the same time will let
only one of them succeed, this is to prevent changes that send
could miss due to dedupe; both operations are restartable
Core changes:
- more tree-checker validations, errors reported by fuzzing tools:
- device item
- inode item
- block group profiles
- tracepoints for extent buffer locking
- async cow preallocates memory to avoid errors happening too deep in
the call chain
- metadata reservations for delalloc reworked to better adapt in
many-writers/low-space scenarios
- improved space flushing logic for intense DIO vs buffered workloads
- lots of cleanups
- removed unused struct members
- redundant argument removal
- properties and xattrs
- extent buffer locking
- selftests
- use common file type conversions
- many-argument functions reduction"
* tag 'for-5.2-tag' of git://git.kernel.org/pub/scm/linux/kernel/git/kdave/linux: (227 commits)
btrfs: Use kvmalloc for allocating compressed path context
btrfs: Factor out common extent locking code in submit_compressed_extents
btrfs: Set io_tree only once in submit_compressed_extents
btrfs: Replace clear_extent_bit with unlock_extent
btrfs: Make compress_file_range take only struct async_chunk
btrfs: Remove fs_info from struct async_chunk
btrfs: Rename async_cow to async_chunk
btrfs: Preallocate chunks in cow_file_range_async
btrfs: reserve delalloc metadata differently
btrfs: track DIO bytes in flight
btrfs: merge calls of btrfs_setxattr and btrfs_setxattr_trans in btrfs_set_prop
btrfs: delete unused function btrfs_set_prop_trans
btrfs: start transaction in xattr_handler_set_prop
btrfs: drop local copy of inode i_mode
btrfs: drop old_fsflags in btrfs_ioctl_setflags
btrfs: modify local copy of btrfs_inode flags
btrfs: drop useless inode i_flags copy and restore
btrfs: start transaction in btrfs_ioctl_setflags()
btrfs: export btrfs_set_prop
btrfs: refactor btrfs_set_props to validate externally
...
Diffstat (limited to 'fs/btrfs/extent_io.h')
-rw-r--r-- | fs/btrfs/extent_io.h | 89 |
1 files changed, 57 insertions, 32 deletions
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 08749e0b9c32..aa18a16a6ed7 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -9,27 +9,34 @@ /* bits for the extent state */ #define EXTENT_DIRTY (1U << 0) -#define EXTENT_WRITEBACK (1U << 1) -#define EXTENT_UPTODATE (1U << 2) -#define EXTENT_LOCKED (1U << 3) -#define EXTENT_NEW (1U << 4) -#define EXTENT_DELALLOC (1U << 5) -#define EXTENT_DEFRAG (1U << 6) -#define EXTENT_BOUNDARY (1U << 9) -#define EXTENT_NODATASUM (1U << 10) -#define EXTENT_CLEAR_META_RESV (1U << 11) -#define EXTENT_NEED_WAIT (1U << 12) -#define EXTENT_DAMAGED (1U << 13) -#define EXTENT_NORESERVE (1U << 14) -#define EXTENT_QGROUP_RESERVED (1U << 15) -#define EXTENT_CLEAR_DATA_RESV (1U << 16) -#define EXTENT_DELALLOC_NEW (1U << 17) -#define EXTENT_IOBITS (EXTENT_LOCKED | EXTENT_WRITEBACK) +#define EXTENT_UPTODATE (1U << 1) +#define EXTENT_LOCKED (1U << 2) +#define EXTENT_NEW (1U << 3) +#define EXTENT_DELALLOC (1U << 4) +#define EXTENT_DEFRAG (1U << 5) +#define EXTENT_BOUNDARY (1U << 6) +#define EXTENT_NODATASUM (1U << 7) +#define EXTENT_CLEAR_META_RESV (1U << 8) +#define EXTENT_NEED_WAIT (1U << 9) +#define EXTENT_DAMAGED (1U << 10) +#define EXTENT_NORESERVE (1U << 11) +#define EXTENT_QGROUP_RESERVED (1U << 12) +#define EXTENT_CLEAR_DATA_RESV (1U << 13) +#define EXTENT_DELALLOC_NEW (1U << 14) #define EXTENT_DO_ACCOUNTING (EXTENT_CLEAR_META_RESV | \ EXTENT_CLEAR_DATA_RESV) #define EXTENT_CTLBITS (EXTENT_DO_ACCOUNTING) /* + * Redefined bits above which are used only in the device allocation tree, + * shouldn't be using EXTENT_LOCKED / EXTENT_BOUNDARY / EXTENT_CLEAR_META_RESV + * / EXTENT_CLEAR_DATA_RESV because they have special meaning to the bit + * manipulation functions + */ +#define CHUNK_ALLOCATED EXTENT_DIRTY +#define CHUNK_TRIMMED EXTENT_DEFRAG + +/* * flags for bio submission. The high bits indicate the compression * type for this bio */ @@ -88,9 +95,6 @@ struct btrfs_inode; struct btrfs_io_bio; struct io_failure_record; -typedef blk_status_t (extent_submit_bio_hook_t)(void *private_data, struct bio *bio, - int mirror_num, unsigned long bio_flags, - u64 bio_offset); typedef blk_status_t (extent_submit_bio_start_t)(void *private_data, struct bio *bio, u64 bio_offset); @@ -100,17 +104,34 @@ struct extent_io_ops { * The following callbacks must be always defined, the function * pointer will be called unconditionally. */ - extent_submit_bio_hook_t *submit_bio_hook; + blk_status_t (*submit_bio_hook)(struct inode *inode, struct bio *bio, + int mirror_num, unsigned long bio_flags); int (*readpage_end_io_hook)(struct btrfs_io_bio *io_bio, u64 phy_offset, struct page *page, u64 start, u64 end, int mirror); }; +enum { + IO_TREE_FS_INFO_FREED_EXTENTS0, + IO_TREE_FS_INFO_FREED_EXTENTS1, + IO_TREE_INODE_IO, + IO_TREE_INODE_IO_FAILURE, + IO_TREE_RELOC_BLOCKS, + IO_TREE_TRANS_DIRTY_PAGES, + IO_TREE_ROOT_DIRTY_LOG_PAGES, + IO_TREE_SELFTEST, +}; + struct extent_io_tree { struct rb_root state; + struct btrfs_fs_info *fs_info; void *private_data; u64 dirty_bytes; - int track_uptodate; + bool track_uptodate; + + /* Who owns this io tree, should be one of IO_TREE_* */ + u8 owner; + spinlock_t lock; const struct extent_io_ops *ops; }; @@ -146,14 +167,9 @@ struct extent_buffer { struct rcu_head rcu_head; pid_t lock_owner; - /* count of read lock holders on the extent buffer */ - atomic_t write_locks; - atomic_t read_locks; atomic_t blocking_writers; atomic_t blocking_readers; - atomic_t spinning_readers; - atomic_t spinning_writers; - short lock_nested; + bool lock_nested; /* >= 0 if eb belongs to a log tree, -1 otherwise */ short log_index; @@ -171,6 +187,10 @@ struct extent_buffer { wait_queue_head_t read_lock_wq; struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG + atomic_t spinning_writers; + atomic_t spinning_readers; + atomic_t read_locks; + atomic_t write_locks; struct list_head leak_list; #endif }; @@ -239,7 +259,10 @@ typedef struct extent_map *(get_extent_t)(struct btrfs_inode *inode, u64 start, u64 len, int create); -void extent_io_tree_init(struct extent_io_tree *tree, void *private_data); +void extent_io_tree_init(struct btrfs_fs_info *fs_info, + struct extent_io_tree *tree, unsigned int owner, + void *private_data); +void extent_io_tree_release(struct extent_io_tree *tree); int try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); int lock_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, @@ -309,6 +332,8 @@ int set_record_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, int set_extent_bit(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits, u64 *failed_start, struct extent_state **cached_state, gfp_t mask); +int set_extent_bits_nowait(struct extent_io_tree *tree, u64 start, u64 end, + unsigned bits); static inline int set_extent_bits(struct extent_io_tree *tree, u64 start, u64 end, unsigned bits) @@ -376,6 +401,8 @@ static inline int set_extent_uptodate(struct extent_io_tree *tree, u64 start, int find_first_extent_bit(struct extent_io_tree *tree, u64 start, u64 *start_ret, u64 *end_ret, unsigned bits, struct extent_state **cached_state); +void find_first_clear_extent_bit(struct extent_io_tree *tree, u64 start, + u64 *start_ret, u64 *end_ret, unsigned bits); int extent_invalidatepage(struct extent_io_tree *tree, struct page *page, unsigned long offset); int extent_write_full_page(struct page *page, struct writeback_control *wbc); @@ -405,8 +432,7 @@ void free_extent_buffer_stale(struct extent_buffer *eb); #define WAIT_NONE 0 #define WAIT_COMPLETE 1 #define WAIT_PAGE_LOCK 2 -int read_extent_buffer_pages(struct extent_io_tree *tree, - struct extent_buffer *eb, int wait, +int read_extent_buffer_pages(struct extent_buffer *eb, int wait, int mirror_num); void wait_on_extent_buffer_writeback(struct extent_buffer *eb); @@ -487,8 +513,7 @@ int clean_io_failure(struct btrfs_fs_info *fs_info, struct extent_io_tree *io_tree, u64 start, struct page *page, u64 ino, unsigned int pg_offset); void end_extent_writepage(struct page *page, int err, u64 start, u64 end); -int repair_eb_io_failure(struct btrfs_fs_info *fs_info, - struct extent_buffer *eb, int mirror_num); +int btrfs_repair_eb_io_failure(struct extent_buffer *eb, int mirror_num); /* * When IO fails, either with EIO or csum verification fails, we |