diff options
Diffstat (limited to 'fs/btrfs/compression.c')
-rw-r--r-- | fs/btrfs/compression.c | 271 |
1 files changed, 206 insertions, 65 deletions
diff --git a/fs/btrfs/compression.c b/fs/btrfs/compression.c index 19b22b4653c8..6441e47d8a5e 100644 --- a/fs/btrfs/compression.c +++ b/fs/btrfs/compression.c @@ -20,12 +20,11 @@ #include <linux/slab.h> #include <linux/sched/mm.h> #include <linux/log2.h> +#include <linux/shrinker.h> #include <crypto/hash.h> #include "misc.h" #include "ctree.h" #include "fs.h" -#include "disk-io.h" -#include "transaction.h" #include "btrfs_inode.h" #include "bio.h" #include "ordered-data.h" @@ -33,8 +32,7 @@ #include "extent_io.h" #include "extent_map.h" #include "subpage.h" -#include "zoned.h" -#include "file-item.h" +#include "messages.h" #include "super.h" static struct bio_set btrfs_compressed_bioset; @@ -92,20 +90,20 @@ bool btrfs_compress_is_valid_type(const char *str, size_t len) } static int compression_compress_pages(int type, struct list_head *ws, - struct address_space *mapping, u64 start, struct page **pages, - unsigned long *out_pages, unsigned long *total_in, - unsigned long *total_out) + struct address_space *mapping, u64 start, + struct folio **folios, unsigned long *out_folios, + unsigned long *total_in, unsigned long *total_out) { switch (type) { case BTRFS_COMPRESS_ZLIB: - return zlib_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return zlib_compress_folios(ws, mapping, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_LZO: - return lzo_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return lzo_compress_folios(ws, mapping, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_ZSTD: - return zstd_compress_pages(ws, mapping, start, pages, - out_pages, total_in, total_out); + return zstd_compress_folios(ws, mapping, start, folios, + out_folios, total_in, total_out); case BTRFS_COMPRESS_NONE: default: /* @@ -117,7 +115,7 @@ static int compression_compress_pages(int type, struct list_head *ws, * Not a big deal, just need to inform caller that we * haven't allocated any pages yet. */ - *out_pages = 0; + *out_folios = 0; return -E2BIG; } } @@ -140,16 +138,16 @@ static int compression_decompress_bio(struct list_head *ws, } static int compression_decompress(int type, struct list_head *ws, - const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + const u8 *data_in, struct page *dest_page, + unsigned long dest_pgoff, size_t srclen, size_t destlen) { switch (type) { case BTRFS_COMPRESS_ZLIB: return zlib_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_LZO: return lzo_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_ZSTD: return zstd_decompress(ws, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); case BTRFS_COMPRESS_NONE: default: /* @@ -160,16 +158,110 @@ static int compression_decompress(int type, struct list_head *ws, } } -static void btrfs_free_compressed_pages(struct compressed_bio *cb) +static void btrfs_free_compressed_folios(struct compressed_bio *cb) { - for (unsigned int i = 0; i < cb->nr_pages; i++) - put_page(cb->compressed_pages[i]); - kfree(cb->compressed_pages); + for (unsigned int i = 0; i < cb->nr_folios; i++) + btrfs_free_compr_folio(cb->compressed_folios[i]); + kfree(cb->compressed_folios); } static int btrfs_decompress_bio(struct compressed_bio *cb); -static void end_compressed_bio_read(struct btrfs_bio *bbio) +/* + * Global cache of last unused pages for compression/decompression. + */ +static struct btrfs_compr_pool { + struct shrinker *shrinker; + spinlock_t lock; + struct list_head list; + int count; + int thresh; +} compr_pool; + +static unsigned long btrfs_compr_pool_count(struct shrinker *sh, struct shrink_control *sc) +{ + int ret; + + /* + * We must not read the values more than once if 'ret' gets expanded in + * the return statement so we don't accidentally return a negative + * number, even if the first condition finds it positive. + */ + ret = READ_ONCE(compr_pool.count) - READ_ONCE(compr_pool.thresh); + + return ret > 0 ? ret : 0; +} + +static unsigned long btrfs_compr_pool_scan(struct shrinker *sh, struct shrink_control *sc) +{ + struct list_head remove; + struct list_head *tmp, *next; + int freed; + + if (compr_pool.count == 0) + return SHRINK_STOP; + + INIT_LIST_HEAD(&remove); + + /* For now, just simply drain the whole list. */ + spin_lock(&compr_pool.lock); + list_splice_init(&compr_pool.list, &remove); + freed = compr_pool.count; + compr_pool.count = 0; + spin_unlock(&compr_pool.lock); + + list_for_each_safe(tmp, next, &remove) { + struct page *page = list_entry(tmp, struct page, lru); + + ASSERT(page_ref_count(page) == 1); + put_page(page); + } + + return freed; +} + +/* + * Common wrappers for page allocation from compression wrappers + */ +struct folio *btrfs_alloc_compr_folio(void) +{ + struct folio *folio = NULL; + + spin_lock(&compr_pool.lock); + if (compr_pool.count > 0) { + folio = list_first_entry(&compr_pool.list, struct folio, lru); + list_del_init(&folio->lru); + compr_pool.count--; + } + spin_unlock(&compr_pool.lock); + + if (folio) + return folio; + + return folio_alloc(GFP_NOFS, 0); +} + +void btrfs_free_compr_folio(struct folio *folio) +{ + bool do_free = false; + + spin_lock(&compr_pool.lock); + if (compr_pool.count > compr_pool.thresh) { + do_free = true; + } else { + list_add(&folio->lru, &compr_pool.list); + compr_pool.count++; + } + spin_unlock(&compr_pool.lock); + + if (!do_free) + return; + + ASSERT(folio_ref_count(folio) == 1); + folio_put(folio); +} + +static void end_bbio_comprssed_read(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); blk_status_t status = bbio->bio.bi_status; @@ -177,7 +269,7 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) if (!status) status = errno_to_blk_status(btrfs_decompress_bio(cb)); - btrfs_free_compressed_pages(cb); + btrfs_free_compressed_folios(cb); btrfs_bio_end_io(cb->orig_bbio, status); bio_put(&bbio->bio); } @@ -189,7 +281,7 @@ static void end_compressed_bio_read(struct btrfs_bio *bbio) static noinline void end_compressed_writeback(const struct compressed_bio *cb) { struct inode *inode = &cb->bbio.inode->vfs_inode; - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); unsigned long index = cb->start >> PAGE_SHIFT; unsigned long end_index = (cb->start + cb->len - 1) >> PAGE_SHIFT; struct folio_batch fbatch; @@ -211,8 +303,8 @@ static noinline void end_compressed_writeback(const struct compressed_bio *cb) for (i = 0; i < ret; i++) { struct folio *folio = fbatch.folios[i]; - btrfs_page_clamp_clear_writeback(fs_info, &folio->page, - cb->start, cb->len); + btrfs_folio_clamp_clear_writeback(fs_info, folio, + cb->start, cb->len); } folio_batch_release(&fbatch); } @@ -231,7 +323,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work) end_compressed_writeback(cb); /* Note, our inode could be gone now */ - btrfs_free_compressed_pages(cb); + btrfs_free_compressed_folios(cb); bio_put(&cb->bbio.bio); } @@ -242,7 +334,7 @@ static void btrfs_finish_compressed_write_work(struct work_struct *work) * This also calls the writeback end hooks for the file pages so that metadata * and checksums can be updated in the file. */ -static void end_compressed_bio_write(struct btrfs_bio *bbio) +static void end_bbio_comprssed_write(struct btrfs_bio *bbio) { struct compressed_bio *cb = to_compressed_bio(bbio); struct btrfs_fs_info *fs_info = bbio->inode->root->fs_info; @@ -250,17 +342,19 @@ static void end_compressed_bio_write(struct btrfs_bio *bbio) queue_work(fs_info->compressed_write_workers, &cb->write_end_work); } -static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb) +static void btrfs_add_compressed_bio_folios(struct compressed_bio *cb) { struct bio *bio = &cb->bbio.bio; u32 offset = 0; while (offset < cb->compressed_len) { + int ret; u32 len = min_t(u32, cb->compressed_len - offset, PAGE_SIZE); /* Maximum compressed extent is smaller than bio size limit. */ - __bio_add_page(bio, cb->compressed_pages[offset >> PAGE_SHIFT], - len, 0); + ret = bio_add_folio(bio, cb->compressed_folios[offset >> PAGE_SHIFT], + len, 0); + ASSERT(ret); offset += len; } } @@ -275,8 +369,8 @@ static void btrfs_add_compressed_bio_pages(struct compressed_bio *cb) * the end io hooks. */ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, - struct page **compressed_pages, - unsigned int nr_pages, + struct folio **compressed_folios, + unsigned int nr_folios, blk_opf_t write_flags, bool writeback) { @@ -289,17 +383,17 @@ void btrfs_submit_compressed_write(struct btrfs_ordered_extent *ordered, cb = alloc_compressed_bio(inode, ordered->file_offset, REQ_OP_WRITE | write_flags, - end_compressed_bio_write); + end_bbio_comprssed_write); cb->start = ordered->file_offset; cb->len = ordered->num_bytes; - cb->compressed_pages = compressed_pages; + cb->compressed_folios = compressed_folios; cb->compressed_len = ordered->disk_num_bytes; cb->writeback = writeback; INIT_WORK(&cb->write_end_work, btrfs_finish_compressed_write_work); - cb->nr_pages = nr_pages; + cb->nr_folios = nr_folios; cb->bbio.bio.bi_iter.bi_sector = ordered->disk_bytenr >> SECTOR_SHIFT; cb->bbio.ordered = ordered; - btrfs_add_compressed_bio_pages(cb); + btrfs_add_compressed_bio_folios(cb); btrfs_submit_bio(&cb->bbio, 0); } @@ -320,7 +414,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, struct compressed_bio *cb, int *memstall, unsigned long *pflags) { - struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); + struct btrfs_fs_info *fs_info = inode_to_fs_info(inode); unsigned long end_index; struct bio *orig_bio = &cb->orig_bbio->bio; u64 cur = cb->orig_bbio->file_offset + orig_bio->bi_iter.bi_size; @@ -346,7 +440,7 @@ static noinline int add_ra_bio_pages(struct inode *inode, * This makes readahead less effective, so here disable readahead for * subpage for now, until full compressed write is supported. */ - if (btrfs_sb(inode->i_sb)->sectorsize < PAGE_SIZE) + if (fs_info->sectorsize < PAGE_SIZE) return 0; end_index = (i_size_read(inode) - 1) >> PAGE_SHIFT; @@ -446,7 +540,8 @@ static noinline int add_ra_bio_pages(struct inode *inode, * subpage::readers and to unlock the page. */ if (fs_info->sectorsize < PAGE_SIZE) - btrfs_subpage_start_reader(fs_info, page, cur, add_size); + btrfs_subpage_start_reader(fs_info, page_folio(page), + cur, add_size); put_page(page); cur += add_size; } @@ -489,11 +584,11 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) goto out; } - ASSERT(em->compress_type != BTRFS_COMPRESS_NONE); + ASSERT(extent_map_is_compressed(em)); compressed_len = em->block_len; cb = alloc_compressed_bio(inode, file_offset, REQ_OP_READ, - end_compressed_bio_read); + end_bbio_comprssed_read); cb->start = em->orig_start; em_len = em->len; @@ -501,19 +596,19 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) cb->len = bbio->bio.bi_iter.bi_size; cb->compressed_len = compressed_len; - cb->compress_type = em->compress_type; + cb->compress_type = extent_map_compression(em); cb->orig_bbio = bbio; free_extent_map(em); - cb->nr_pages = DIV_ROUND_UP(compressed_len, PAGE_SIZE); - cb->compressed_pages = kcalloc(cb->nr_pages, sizeof(struct page *), GFP_NOFS); - if (!cb->compressed_pages) { + cb->nr_folios = DIV_ROUND_UP(compressed_len, PAGE_SIZE); + cb->compressed_folios = kcalloc(cb->nr_folios, sizeof(struct page *), GFP_NOFS); + if (!cb->compressed_folios) { ret = BLK_STS_RESOURCE; goto out_free_bio; } - ret2 = btrfs_alloc_page_array(cb->nr_pages, cb->compressed_pages); + ret2 = btrfs_alloc_folio_array(cb->nr_folios, cb->compressed_folios, 0); if (ret2) { ret = BLK_STS_RESOURCE; goto out_free_compressed_pages; @@ -525,7 +620,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) /* include any pages we added in add_ra-bio_pages */ cb->len = bbio->bio.bi_iter.bi_size; cb->bbio.bio.bi_iter.bi_sector = bbio->bio.bi_iter.bi_sector; - btrfs_add_compressed_bio_pages(cb); + btrfs_add_compressed_bio_folios(cb); if (memstall) psi_memstall_leave(&pflags); @@ -534,7 +629,7 @@ void btrfs_submit_compressed_read(struct btrfs_bio *bbio) return; out_free_compressed_pages: - kfree(cb->compressed_pages); + kfree(cb->compressed_folios); out_free_bio: bio_put(&cb->bbio.bio); out: @@ -881,6 +976,29 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level) return level; } +/* Wrapper around find_get_page(), with extra error message. */ +int btrfs_compress_filemap_get_folio(struct address_space *mapping, u64 start, + struct folio **in_folio_ret) +{ + struct folio *in_folio; + + /* + * The compressed write path should have the folio locked already, thus + * we only need to grab one reference. + */ + in_folio = filemap_get_folio(mapping, start >> PAGE_SHIFT); + if (IS_ERR(in_folio)) { + struct btrfs_inode *inode = BTRFS_I(mapping->host); + + btrfs_crit(inode->root->fs_info, + "failed to get page cache, root %lld ino %llu file offset %llu", + btrfs_root_id(inode->root), btrfs_ino(inode), start); + return -ENOENT; + } + *in_folio_ret = in_folio; + return 0; +} + /* * Given an address space and start and length, compress the bytes into @pages * that are allocated on demand. @@ -901,11 +1019,9 @@ static unsigned int btrfs_compress_set_level(int type, unsigned level) * @total_out is an in/out parameter, must be set to the input length and will * be also used to return the total number of compressed bytes */ -int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, - u64 start, struct page **pages, - unsigned long *out_pages, - unsigned long *total_in, - unsigned long *total_out) +int btrfs_compress_folios(unsigned int type_level, struct address_space *mapping, + u64 start, struct folio **folios, unsigned long *out_folios, + unsigned long *total_in, unsigned long *total_out) { int type = btrfs_compress_type(type_level); int level = btrfs_compress_level(type_level); @@ -914,8 +1030,8 @@ int btrfs_compress_pages(unsigned int type_level, struct address_space *mapping, level = btrfs_compress_set_level(type, level); workspace = get_workspace(type, level); - ret = compression_compress_pages(type, workspace, mapping, start, pages, - out_pages, total_in, total_out); + ret = compression_compress_pages(type, workspace, mapping, start, folios, + out_folios, total_in, total_out); put_workspace(type, workspace); return ret; } @@ -941,14 +1057,23 @@ static int btrfs_decompress_bio(struct compressed_bio *cb) * start_byte tells us the offset into the compressed data we're interested in */ int btrfs_decompress(int type, const u8 *data_in, struct page *dest_page, - unsigned long start_byte, size_t srclen, size_t destlen) + unsigned long dest_pgoff, size_t srclen, size_t destlen) { + struct btrfs_fs_info *fs_info = page_to_fs_info(dest_page); struct list_head *workspace; + const u32 sectorsize = fs_info->sectorsize; int ret; + /* + * The full destination page range should not exceed the page size. + * And the @destlen should not exceed sectorsize, as this is only called for + * inline file extents, which should not exceed sectorsize. + */ + ASSERT(dest_pgoff + destlen <= PAGE_SIZE && destlen <= sectorsize); + workspace = get_workspace(type, 0); ret = compression_decompress(type, workspace, data_in, dest_page, - start_byte, srclen, destlen); + dest_pgoff, srclen, destlen); put_workspace(type, workspace); return ret; @@ -960,15 +1085,36 @@ int __init btrfs_init_compress(void) offsetof(struct compressed_bio, bbio.bio), BIOSET_NEED_BVECS)) return -ENOMEM; + + compr_pool.shrinker = shrinker_alloc(SHRINKER_NONSLAB, "btrfs-compr-pages"); + if (!compr_pool.shrinker) + return -ENOMEM; + btrfs_init_workspace_manager(BTRFS_COMPRESS_NONE); btrfs_init_workspace_manager(BTRFS_COMPRESS_ZLIB); btrfs_init_workspace_manager(BTRFS_COMPRESS_LZO); zstd_init_workspace_manager(); + + spin_lock_init(&compr_pool.lock); + INIT_LIST_HEAD(&compr_pool.list); + compr_pool.count = 0; + /* 128K / 4K = 32, for 8 threads is 256 pages. */ + compr_pool.thresh = BTRFS_MAX_COMPRESSED / PAGE_SIZE * 8; + compr_pool.shrinker->count_objects = btrfs_compr_pool_count; + compr_pool.shrinker->scan_objects = btrfs_compr_pool_scan; + compr_pool.shrinker->batch = 32; + compr_pool.shrinker->seeks = DEFAULT_SEEKS; + shrinker_register(compr_pool.shrinker); + return 0; } void __cold btrfs_exit_compress(void) { + /* For now scan drains all pages and does not touch the parameters. */ + btrfs_compr_pool_scan(NULL, NULL); + shrinker_free(compr_pool.shrinker); + btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_NONE); btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_ZLIB); btrfs_cleanup_workspace_manager(BTRFS_COMPRESS_LZO); @@ -1353,11 +1499,6 @@ static void heuristic_collect_sample(struct inode *inode, u64 start, u64 end, /* * Compression heuristic. * - * For now is's a naive and optimistic 'return true', we'll extend the logic to - * quickly (compared to direct compression) detect data characteristics - * (compressible/incompressible) to avoid wasting CPU time on incompressible - * data. - * * The following types of analysis can be performed: * - detect mostly zero data * - detect data with low "byte set" size (text, etc) |