diff options
Diffstat (limited to 'fs/btrfs/extent_io.h')
-rw-r--r-- | fs/btrfs/extent_io.h | 131 |
1 files changed, 100 insertions, 31 deletions
diff --git a/fs/btrfs/extent_io.h b/fs/btrfs/extent_io.h index 2171057a4477..dca6b12769ec 100644 --- a/fs/btrfs/extent_io.h +++ b/fs/btrfs/extent_io.h @@ -7,11 +7,33 @@ #include <linux/refcount.h> #include <linux/fiemap.h> #include <linux/btrfs_tree.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/rwsem.h> +#include <linux/list.h> +#include <linux/slab.h> #include "compression.h" +#include "messages.h" #include "ulist.h" #include "misc.h" +struct page; +struct file; +struct folio; +struct inode; +struct fiemap_extent_info; +struct readahead_control; +struct address_space; +struct writeback_control; +struct extent_io_tree; +struct extent_map_tree; +struct extent_state; +struct btrfs_block_group; +struct btrfs_fs_info; +struct btrfs_inode; +struct btrfs_root; struct btrfs_trans_handle; +struct btrfs_tree_parent_check; enum { EXTENT_BUFFER_UPTODATE, @@ -28,7 +50,8 @@ enum { EXTENT_BUFFER_IN_TREE, /* write IO error */ EXTENT_BUFFER_WRITE_ERR, - EXTENT_BUFFER_NO_CHECK, + /* Indicate the extent buffer is written zeroed out (for zoned) */ + EXTENT_BUFFER_ZONED_ZEROOUT, /* Indicate that extent buffer pages a being read */ EXTENT_BUFFER_READING, }; @@ -43,10 +66,10 @@ enum { }; /* - * page->private values. Every page that is controlled by the extent - * map has page->private set to one. + * Folio private values. Every page that is controlled by the extent map has + * folio private set to this value. */ -#define EXTENT_PAGE_PRIVATE 1 +#define EXTENT_FOLIO_PRIVATE 1 /* * The extent buffer bitmap operations are done with byte granularity instead of @@ -62,11 +85,6 @@ enum { #define BITMAP_LAST_BYTE_MASK(nbits) \ (BYTE_MASK >> (-(nbits) & (BITS_PER_BYTE - 1))) -struct btrfs_root; -struct btrfs_inode; -struct btrfs_fs_info; -struct extent_io_tree; -struct btrfs_tree_parent_check; int __init extent_buffer_init_cachep(void); void __cold extent_buffer_free_cachep(void); @@ -74,19 +92,33 @@ void __cold extent_buffer_free_cachep(void); #define INLINE_EXTENT_BUFFER_PAGES (BTRFS_MAX_METADATA_BLOCKSIZE / PAGE_SIZE) struct extent_buffer { u64 start; - unsigned long len; + u32 len; + u32 folio_size; unsigned long bflags; struct btrfs_fs_info *fs_info; + + /* + * The address where the eb can be accessed without any cross-page handling. + * This can be NULL if not possible. + */ + void *addr; + spinlock_t refs_lock; atomic_t refs; int read_mirror; /* >= 0 if eb belongs to a log tree, -1 otherwise */ s8 log_index; + u8 folio_shift; struct rcu_head rcu_head; struct rw_semaphore lock; - struct page *pages[INLINE_EXTENT_BUFFER_PAGES]; + /* + * Pointers to all the folios of the extent buffer. + * + * For now the folio is always order 0 (aka, a single page). + */ + struct folio *folios[INLINE_EXTENT_BUFFER_PAGES]; #ifdef CONFIG_BTRFS_DEBUG struct list_head leak_list; pid_t lock_owner; @@ -100,6 +132,13 @@ struct btrfs_eb_write_context { struct btrfs_block_group *zoned_bg; }; +static inline unsigned long offset_in_eb_folio(const struct extent_buffer *eb, + u64 start) +{ + ASSERT(eb->folio_size); + return start & (eb->folio_size - 1); +} + /* * Get the correct offset inside the page of extent buffer. * @@ -108,29 +147,43 @@ struct btrfs_eb_write_context { * * Will handle both sectorsize == PAGE_SIZE and sectorsize < PAGE_SIZE cases. */ -static inline size_t get_eb_offset_in_page(const struct extent_buffer *eb, - unsigned long offset) +static inline size_t get_eb_offset_in_folio(const struct extent_buffer *eb, + unsigned long offset) { /* - * For sectorsize == PAGE_SIZE case, eb->start will always be aligned - * to PAGE_SIZE, thus adding it won't cause any difference. + * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case + * 1.1) One large folio covering the whole eb + * The eb->start is aligned to folio size, thus adding it + * won't cause any difference. + * 1.2) Several page sized folios + * The eb->start is aligned to folio (page) size, thus + * adding it won't cause any difference. * - * For sectorsize < PAGE_SIZE, we must only read the data that belongs - * to the eb, thus we have to take the eb->start into consideration. + * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case + * In this case there would only be one page sized folio, and there + * may be several different extent buffers in the page/folio. + * We need to add eb->start to properly access the offset inside + * that eb. */ - return offset_in_page(offset + eb->start); + return offset_in_folio(eb->folios[0], offset + eb->start); } -static inline unsigned long get_eb_page_index(unsigned long offset) +static inline unsigned long get_eb_folio_index(const struct extent_buffer *eb, + unsigned long offset) { /* - * For sectorsize == PAGE_SIZE case, plain >> PAGE_SHIFT is enough. + * 1) sectorsize == PAGE_SIZE and nodesize >= PAGE_SIZE case + * 1.1) One large folio covering the whole eb. + * the folio_shift would be large enough to always make us + * return 0 as index. + * 1.2) Several page sized folios + * The folio_shift would be PAGE_SHIFT, giving us the correct + * index. * - * For sectorsize < PAGE_SIZE case, we only support 64K PAGE_SIZE, - * and have ensured that all tree blocks are contained in one page, - * thus we always get index == 0. + * 2) sectorsize < PAGE_SIZE and nodesize < PAGE_SIZE case + * The folio would only be page sized, and always give us 0 as index. */ - return offset >> PAGE_SHIFT; + return offset >> eb->folio_shift; } /* @@ -178,22 +231,20 @@ static inline void extent_changeset_free(struct extent_changeset *changeset) kfree(changeset); } -struct extent_map_tree; - -int try_release_extent_mapping(struct page *page, gfp_t mask); +bool try_release_extent_mapping(struct page *page, gfp_t mask); int try_release_extent_buffer(struct page *page); int btrfs_read_folio(struct file *file, struct folio *folio); void extent_write_locked_range(struct inode *inode, struct page *locked_page, u64 start, u64 end, struct writeback_control *wbc, bool pages_dirty); -int extent_writepages(struct address_space *mapping, - struct writeback_control *wbc); +int btrfs_writepages(struct address_space *mapping, struct writeback_control *wbc); int btree_write_cache_pages(struct address_space *mapping, struct writeback_control *wbc); -void extent_readahead(struct readahead_control *rac); +void btrfs_readahead(struct readahead_control *rac); int extent_fiemap(struct btrfs_inode *inode, struct fiemap_extent_info *fieinfo, u64 start, u64 len); +int set_folio_extent_mapped(struct folio *folio); int set_page_extent_mapped(struct page *page); void clear_page_extent_mapped(struct page *page); @@ -230,6 +281,20 @@ static inline int num_extent_pages(const struct extent_buffer *eb) return (eb->len >> PAGE_SHIFT) ?: 1; } +/* + * This can only be determined at runtime by checking eb::folios[0]. + * + * As we can have either one large folio covering the whole eb + * (either nodesize <= PAGE_SIZE, or high order folio), or multiple + * single-paged folios. + */ +static inline int num_extent_folios(const struct extent_buffer *eb) +{ + if (folio_order(eb->folios[0])) + return 1; + return num_extent_pages(eb); +} + static inline int extent_buffer_uptodate(const struct extent_buffer *eb) { return test_bit(EXTENT_BUFFER_UPTODATE, &eb->bflags); @@ -288,13 +353,17 @@ void clear_extent_buffer_uptodate(struct extent_buffer *eb); void extent_range_clear_dirty_for_io(struct inode *inode, u64 start, u64 end); void extent_clear_unlock_delalloc(struct btrfs_inode *inode, u64 start, u64 end, struct page *locked_page, + struct extent_state **cached, u32 bits_to_clear, unsigned long page_ops); int extent_invalidate_folio(struct extent_io_tree *tree, struct folio *folio, size_t offset); void btrfs_clear_buffer_dirty(struct btrfs_trans_handle *trans, struct extent_buffer *buf); -int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array); +int btrfs_alloc_page_array(unsigned int nr_pages, struct page **page_array, + gfp_t extra_gfp); +int btrfs_alloc_folio_array(unsigned int nr_folios, struct folio **folio_array, + gfp_t extra_gfp); #ifdef CONFIG_BTRFS_FS_RUN_SANITY_TESTS bool find_lock_delalloc_range(struct inode *inode, |