diff options
Diffstat (limited to 'fs/erofs/zdata.c')
| -rw-r--r-- | fs/erofs/zdata.c | 346 |
1 files changed, 177 insertions, 169 deletions
diff --git a/fs/erofs/zdata.c b/fs/erofs/zdata.c index d6fe002a4a71..424f656cd765 100644 --- a/fs/erofs/zdata.c +++ b/fs/erofs/zdata.c @@ -19,10 +19,7 @@ typedef void *z_erofs_next_pcluster_t; struct z_erofs_bvec { - union { - struct page *page; - struct folio *folio; - }; + struct page *page; int offset; unsigned int end; }; @@ -449,44 +446,51 @@ static inline int erofs_cpu_hotplug_init(void) { return 0; } static inline void erofs_cpu_hotplug_destroy(void) {} #endif -void z_erofs_exit_zip_subsystem(void) +void z_erofs_exit_subsystem(void) { erofs_cpu_hotplug_destroy(); erofs_destroy_percpu_workers(); destroy_workqueue(z_erofs_workqueue); z_erofs_destroy_pcluster_pool(); + z_erofs_exit_decompressor(); } -int __init z_erofs_init_zip_subsystem(void) +int __init z_erofs_init_subsystem(void) { - int err = z_erofs_create_pcluster_pool(); + int err = z_erofs_init_decompressor(); if (err) - goto out_error_pcluster_pool; + goto err_decompressor; + + err = z_erofs_create_pcluster_pool(); + if (err) + goto err_pcluster_pool; z_erofs_workqueue = alloc_workqueue("erofs_worker", WQ_UNBOUND | WQ_HIGHPRI, num_possible_cpus()); if (!z_erofs_workqueue) { err = -ENOMEM; - goto out_error_workqueue_init; + goto err_workqueue_init; } err = erofs_init_percpu_workers(); if (err) - goto out_error_pcpu_worker; + goto err_pcpu_worker; err = erofs_cpu_hotplug_init(); if (err < 0) - goto out_error_cpuhp_init; + goto err_cpuhp_init; return err; -out_error_cpuhp_init: +err_cpuhp_init: erofs_destroy_percpu_workers(); -out_error_pcpu_worker: +err_pcpu_worker: destroy_workqueue(z_erofs_workqueue); -out_error_workqueue_init: +err_workqueue_init: z_erofs_destroy_pcluster_pool(); -out_error_pcluster_pool: +err_pcluster_pool: + z_erofs_exit_decompressor(); +err_decompressor: return err; } @@ -617,32 +621,31 @@ static void z_erofs_bind_cache(struct z_erofs_decompress_frontend *fe) fe->mode = Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE; } -/* called by erofs_shrinker to get rid of all cached compressed bvecs */ +/* (erofs_shrinker) disconnect cached encoded data with pclusters */ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, struct erofs_workgroup *grp) { struct z_erofs_pcluster *const pcl = container_of(grp, struct z_erofs_pcluster, obj); unsigned int pclusterpages = z_erofs_pclusterpages(pcl); + struct folio *folio; int i; DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - /* There is no actice user since the pcluster is now freezed */ + /* Each cached folio contains one page unless bs > ps is supported */ for (i = 0; i < pclusterpages; ++i) { - struct folio *folio = pcl->compressed_bvecs[i].folio; - - if (!folio) - continue; + if (pcl->compressed_bvecs[i].page) { + folio = page_folio(pcl->compressed_bvecs[i].page); + /* Avoid reclaiming or migrating this folio */ + if (!folio_trylock(folio)) + return -EBUSY; - /* Avoid reclaiming or migrating this folio */ - if (!folio_trylock(folio)) - return -EBUSY; - - if (!erofs_folio_is_managed(sbi, folio)) - continue; - pcl->compressed_bvecs[i].folio = NULL; - folio_detach_private(folio); - folio_unlock(folio); + if (!erofs_folio_is_managed(sbi, folio)) + continue; + pcl->compressed_bvecs[i].page = NULL; + folio_detach_private(folio); + folio_unlock(folio); + } } return 0; } @@ -650,9 +653,9 @@ int erofs_try_to_free_all_cached_folios(struct erofs_sb_info *sbi, static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) { struct z_erofs_pcluster *pcl = folio_get_private(folio); - unsigned int pclusterpages = z_erofs_pclusterpages(pcl); + struct z_erofs_bvec *bvec = pcl->compressed_bvecs; + struct z_erofs_bvec *end = bvec + z_erofs_pclusterpages(pcl); bool ret; - int i; if (!folio_test_private(folio)) return true; @@ -661,9 +664,9 @@ static bool z_erofs_cache_release_folio(struct folio *folio, gfp_t gfp) spin_lock(&pcl->obj.lockref.lock); if (pcl->obj.lockref.count <= 0) { DBG_BUGON(z_erofs_is_inline_pcluster(pcl)); - for (i = 0; i < pclusterpages; ++i) { - if (pcl->compressed_bvecs[i].folio == folio) { - pcl->compressed_bvecs[i].folio = NULL; + for (; bvec < end; ++bvec) { + if (bvec->page && page_folio(bvec->page) == folio) { + bvec->page = NULL; folio_detach_private(folio); ret = true; break; @@ -925,7 +928,7 @@ static void z_erofs_pcluster_end(struct z_erofs_decompress_frontend *fe) fe->pcl = NULL; } -static int z_erofs_read_fragment(struct super_block *sb, struct page *page, +static int z_erofs_read_fragment(struct super_block *sb, struct folio *folio, unsigned int cur, unsigned int end, erofs_off_t pos) { struct inode *packed_inode = EROFS_SB(sb)->packed_inode; @@ -938,113 +941,109 @@ static int z_erofs_read_fragment(struct super_block *sb, struct page *page, buf.mapping = packed_inode->i_mapping; for (; cur < end; cur += cnt, pos += cnt) { - cnt = min_t(unsigned int, end - cur, - sb->s_blocksize - erofs_blkoff(sb, pos)); + cnt = min(end - cur, sb->s_blocksize - erofs_blkoff(sb, pos)); src = erofs_bread(&buf, pos, EROFS_KMAP); if (IS_ERR(src)) { erofs_put_metabuf(&buf); return PTR_ERR(src); } - memcpy_to_page(page, cur, src, cnt); + memcpy_to_folio(folio, cur, src, cnt); } erofs_put_metabuf(&buf); return 0; } -static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *fe, +static int z_erofs_scan_folio(struct z_erofs_decompress_frontend *f, struct folio *folio, bool ra) { - struct inode *const inode = fe->inode; - struct erofs_map_blocks *const map = &fe->map; + struct inode *const inode = f->inode; + struct erofs_map_blocks *const map = &f->map; const loff_t offset = folio_pos(folio); - const unsigned int bs = i_blocksize(inode), fs = folio_size(folio); - bool tight = true, exclusive; - unsigned int cur, end, len, split; + const unsigned int bs = i_blocksize(inode); + unsigned int end = folio_size(folio), split = 0, cur, pgs; + bool tight, excl; int err = 0; + tight = (bs == PAGE_SIZE); z_erofs_onlinefolio_init(folio); - split = 0; - end = fs; -repeat: - if (offset + end - 1 < map->m_la || - offset + end - 1 >= map->m_la + map->m_llen) { - z_erofs_pcluster_end(fe); - map->m_la = offset + end - 1; - map->m_llen = 0; - err = z_erofs_map_blocks_iter(inode, map, 0); - if (err) - goto out; - } - - cur = offset > map->m_la ? 0 : map->m_la - offset; - /* bump split parts first to avoid several separate cases */ - ++split; - - if (!(map->m_flags & EROFS_MAP_MAPPED)) { - folio_zero_segment(folio, cur, end); - tight = false; - goto next_part; - } - - if (map->m_flags & EROFS_MAP_FRAGMENT) { - erofs_off_t fpos = offset + cur - map->m_la; + do { + if (offset + end - 1 < map->m_la || + offset + end - 1 >= map->m_la + map->m_llen) { + z_erofs_pcluster_end(f); + map->m_la = offset + end - 1; + map->m_llen = 0; + err = z_erofs_map_blocks_iter(inode, map, 0); + if (err) + break; + } - len = min_t(unsigned int, map->m_llen - fpos, end - cur); - err = z_erofs_read_fragment(inode->i_sb, &folio->page, cur, - cur + len, EROFS_I(inode)->z_fragmentoff + fpos); - if (err) - goto out; - tight = false; - goto next_part; - } + cur = offset > map->m_la ? 0 : map->m_la - offset; + pgs = round_down(cur, PAGE_SIZE); + /* bump split parts first to avoid several separate cases */ + ++split; + + if (!(map->m_flags & EROFS_MAP_MAPPED)) { + folio_zero_segment(folio, cur, end); + tight = false; + } else if (map->m_flags & EROFS_MAP_FRAGMENT) { + erofs_off_t fpos = offset + cur - map->m_la; + + err = z_erofs_read_fragment(inode->i_sb, folio, cur, + cur + min(map->m_llen - fpos, end - cur), + EROFS_I(inode)->z_fragmentoff + fpos); + if (err) + break; + tight = false; + } else { + if (!f->pcl) { + err = z_erofs_pcluster_begin(f); + if (err) + break; + f->pcl->besteffort |= !ra; + } - if (!fe->pcl) { - err = z_erofs_pcluster_begin(fe); - if (err) - goto out; - fe->pcl->besteffort |= !ra; - } + pgs = round_down(end - 1, PAGE_SIZE); + /* + * Ensure this partial page belongs to this submit chain + * rather than other concurrent submit chains or + * noio(bypass) chains since those chains are handled + * asynchronously thus it cannot be used for inplace I/O + * or bvpage (should be processed in the strict order.) + */ + tight &= (f->mode >= Z_EROFS_PCLUSTER_FOLLOWED); + excl = false; + if (cur <= pgs) { + excl = (split <= 1) || tight; + cur = pgs; + } - /* - * Ensure the current partial folio belongs to this submit chain rather - * than other concurrent submit chains or the noio(bypass) chain since - * those chains are handled asynchronously thus the folio cannot be used - * for inplace I/O or bvpage (should be processed in a strict order.) - */ - tight &= (fe->mode > Z_EROFS_PCLUSTER_FOLLOWED_NOINPLACE); - exclusive = (!cur && ((split <= 1) || (tight && bs == fs))); - if (cur) - tight &= (fe->mode >= Z_EROFS_PCLUSTER_FOLLOWED); - - err = z_erofs_attach_page(fe, &((struct z_erofs_bvec) { - .page = &folio->page, - .offset = offset - map->m_la, - .end = end, - }), exclusive); - if (err) - goto out; - - z_erofs_onlinefolio_split(folio); - if (fe->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) - fe->pcl->multibases = true; - if (fe->pcl->length < offset + end - map->m_la) { - fe->pcl->length = offset + end - map->m_la; - fe->pcl->pageofs_out = map->m_la & ~PAGE_MASK; - } - if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && - !(map->m_flags & EROFS_MAP_PARTIAL_REF) && - fe->pcl->length == map->m_llen) - fe->pcl->partial = false; -next_part: - /* shorten the remaining extent to update progress */ - map->m_llen = offset + cur - map->m_la; - map->m_flags &= ~EROFS_MAP_FULL_MAPPED; - - end = cur; - if (end > 0) - goto repeat; + err = z_erofs_attach_page(f, &((struct z_erofs_bvec) { + .page = folio_page(folio, pgs >> PAGE_SHIFT), + .offset = offset + pgs - map->m_la, + .end = end - pgs, }), excl); + if (err) + break; -out: + z_erofs_onlinefolio_split(folio); + if (f->pcl->pageofs_out != (map->m_la & ~PAGE_MASK)) + f->pcl->multibases = true; + if (f->pcl->length < offset + end - map->m_la) { + f->pcl->length = offset + end - map->m_la; + f->pcl->pageofs_out = map->m_la & ~PAGE_MASK; + } + if ((map->m_flags & EROFS_MAP_FULL_MAPPED) && + !(map->m_flags & EROFS_MAP_PARTIAL_REF) && + f->pcl->length == map->m_llen) + f->pcl->partial = false; + } + /* shorten the remaining extent to update progress */ + map->m_llen = offset + cur - map->m_la; + map->m_flags &= ~EROFS_MAP_FULL_MAPPED; + if (cur <= pgs) { + split = cur < pgs; + tight = (bs == PAGE_SIZE); + } + } while ((end = cur) > 0); z_erofs_onlinefolio_end(folio, err); return err; } @@ -1066,7 +1065,7 @@ static bool z_erofs_is_sync_decompress(struct erofs_sb_info *sbi, static bool z_erofs_page_is_invalidated(struct page *page) { - return !page->mapping && !z_erofs_is_shortlived_page(page); + return !page_folio(page)->mapping && !z_erofs_is_shortlived_page(page); } struct z_erofs_decompress_backend { @@ -1221,8 +1220,8 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, struct z_erofs_pcluster *pcl = be->pcl; unsigned int pclusterpages = z_erofs_pclusterpages(pcl); const struct z_erofs_decompressor *decomp = - &erofs_decompressors[pcl->algorithmformat]; - int i, err2; + z_erofs_decomp[pcl->algorithmformat]; + int i, j, jtop, err2; struct page *page; bool overlapped; @@ -1280,10 +1279,9 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, WRITE_ONCE(pcl->compressed_bvecs[0].page, NULL); put_page(page); } else { + /* managed folios are still left in compressed_bvecs[] */ for (i = 0; i < pclusterpages; ++i) { - /* consider shortlived pages added when decompressing */ page = be->compressed_pages[i]; - if (!page || erofs_folio_is_managed(sbi, page_folio(page))) continue; @@ -1294,21 +1292,31 @@ static int z_erofs_decompress_pcluster(struct z_erofs_decompress_backend *be, if (be->compressed_pages < be->onstack_pages || be->compressed_pages >= be->onstack_pages + Z_EROFS_ONSTACK_PAGES) kvfree(be->compressed_pages); - z_erofs_fill_other_copies(be, err); + jtop = 0; + z_erofs_fill_other_copies(be, err); for (i = 0; i < be->nr_pages; ++i) { page = be->decompressed_pages[i]; if (!page) continue; DBG_BUGON(z_erofs_page_is_invalidated(page)); - - /* recycle all individual short-lived pages */ - if (z_erofs_put_shortlivedpage(be->pagepool, page)) + if (!z_erofs_is_shortlived_page(page)) { + z_erofs_onlinefolio_end(page_folio(page), err); continue; - z_erofs_onlinefolio_end(page_folio(page), err); + } + if (pcl->algorithmformat != Z_EROFS_COMPRESSION_LZ4) { + erofs_pagepool_add(be->pagepool, page); + continue; + } + for (j = 0; j < jtop && be->decompressed_pages[j] != page; ++j) + ; + if (j >= jtop) /* this bounce page is newly detected */ + be->decompressed_pages[jtop++] = page; } - + while (jtop) + erofs_pagepool_add(be->pagepool, + be->decompressed_pages[--jtop]); if (be->decompressed_pages != be->onstack_pages) kvfree(be->decompressed_pages); @@ -1419,7 +1427,7 @@ static void z_erofs_fill_bio_vec(struct bio_vec *bvec, bool tocache = false; struct z_erofs_bvec zbv; struct address_space *mapping; - struct page *page; + struct folio *folio; int bs = i_blocksize(f->inode); /* Except for inplace folios, the entire folio can be used for I/Os */ @@ -1429,23 +1437,25 @@ repeat: spin_lock(&pcl->obj.lockref.lock); zbv = pcl->compressed_bvecs[nr]; spin_unlock(&pcl->obj.lockref.lock); - if (!zbv.folio) + if (!zbv.page) goto out_allocfolio; - bvec->bv_page = &zbv.folio->page; + bvec->bv_page = zbv.page; DBG_BUGON(z_erofs_is_shortlived_page(bvec->bv_page)); + + folio = page_folio(zbv.page); /* * Handle preallocated cached folios. We tried to allocate such folios * without triggering direct reclaim. If allocation failed, inplace * file-backed folios will be used instead. */ - if (zbv.folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { - zbv.folio->private = 0; + if (folio->private == (void *)Z_EROFS_PREALLOCATED_PAGE) { + folio->private = 0; tocache = true; goto out_tocache; } - mapping = READ_ONCE(zbv.folio->mapping); + mapping = READ_ONCE(folio->mapping); /* * File-backed folios for inplace I/Os are all locked steady, * therefore it is impossible for `mapping` to be NULL. @@ -1457,21 +1467,21 @@ repeat: return; } - folio_lock(zbv.folio); - if (zbv.folio->mapping == mc) { + folio_lock(folio); + if (folio->mapping == mc) { /* * The cached folio is still in managed cache but without * a valid `->private` pcluster hint. Let's reconnect them. */ - if (!folio_test_private(zbv.folio)) { - folio_attach_private(zbv.folio, pcl); + if (!folio_test_private(folio)) { + folio_attach_private(folio, pcl); /* compressed_bvecs[] already takes a ref before */ - folio_put(zbv.folio); + folio_put(folio); } /* no need to submit if it is already up-to-date */ - if (folio_test_uptodate(zbv.folio)) { - folio_unlock(zbv.folio); + if (folio_test_uptodate(folio)) { + folio_unlock(folio); bvec->bv_page = NULL; } return; @@ -1481,32 +1491,31 @@ repeat: * It has been truncated, so it's unsafe to reuse this one. Let's * allocate a new page for compressed data. */ - DBG_BUGON(zbv.folio->mapping); + DBG_BUGON(folio->mapping); tocache = true; - folio_unlock(zbv.folio); - folio_put(zbv.folio); + folio_unlock(folio); + folio_put(folio); out_allocfolio: - page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); + zbv.page = erofs_allocpage(&f->pagepool, gfp | __GFP_NOFAIL); spin_lock(&pcl->obj.lockref.lock); - if (pcl->compressed_bvecs[nr].folio) { - erofs_pagepool_add(&f->pagepool, page); + if (pcl->compressed_bvecs[nr].page) { + erofs_pagepool_add(&f->pagepool, zbv.page); spin_unlock(&pcl->obj.lockref.lock); cond_resched(); goto repeat; } - pcl->compressed_bvecs[nr].folio = zbv.folio = page_folio(page); + bvec->bv_page = pcl->compressed_bvecs[nr].page = zbv.page; + folio = page_folio(zbv.page); + /* first mark it as a temporary shortlived folio (now 1 ref) */ + folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; spin_unlock(&pcl->obj.lockref.lock); - bvec->bv_page = page; out_tocache: if (!tocache || bs != PAGE_SIZE || - filemap_add_folio(mc, zbv.folio, pcl->obj.index + nr, gfp)) { - /* turn into a temporary shortlived folio (1 ref) */ - zbv.folio->private = (void *)Z_EROFS_SHORTLIVED_PAGE; + filemap_add_folio(mc, folio, pcl->obj.index + nr, gfp)) return; - } - folio_attach_private(zbv.folio, pcl); + folio_attach_private(folio, pcl); /* drop a refcount added by allocpage (then 2 refs in total here) */ - folio_put(zbv.folio); + folio_put(folio); } static struct z_erofs_decompressqueue *jobqueue_init(struct super_block *sb, @@ -1767,7 +1776,6 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, end = round_up(end, PAGE_SIZE); } else { end = round_up(map->m_la, PAGE_SIZE); - if (!map->m_llen) return; } @@ -1775,15 +1783,15 @@ static void z_erofs_pcluster_readmore(struct z_erofs_decompress_frontend *f, cur = map->m_la + map->m_llen - 1; while ((cur >= end) && (cur < i_size_read(inode))) { pgoff_t index = cur >> PAGE_SHIFT; - struct page *page; + struct folio *folio; - page = erofs_grab_cache_page_nowait(inode->i_mapping, index); - if (page) { - if (PageUptodate(page)) - unlock_page(page); + folio = erofs_grab_folio_nowait(inode->i_mapping, index); + if (!IS_ERR_OR_NULL(folio)) { + if (folio_test_uptodate(folio)) + folio_unlock(folio); else - z_erofs_scan_folio(f, page_folio(page), !!rac); - put_page(page); + z_erofs_scan_folio(f, folio, !!rac); + folio_put(folio); } if (cur < PAGE_SIZE) |