diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:14:04 -0800 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-12 20:14:04 -0800 |
commit | 4a6bff1187409f2c2ba1b17234541d314f0680fc (patch) | |
tree | 8dcc29cbfde92d6fdc04cb4ff2af7bb1a1dda9fc /fs/erofs/fscache.c | |
parent | ad0d9da164cb52e62637e427517b2060dc956a2d (diff) | |
parent | c505feba4c0d76084e56ec498ce819f02a7043ae (diff) |
Merge tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs
Pull erofs updates from Gao Xiang:
"In this cycle, large folios are now enabled in the iomap/fscache mode
for uncompressed files first. In order to do that, we've also cleaned
up better interfaces between erofs and fscache, which are acked by
fscache/netfs folks and included in this pull request.
Other than that, there are random fixes around erofs over fscache and
crafted images by syzbot, minor cleanups and documentation updates.
Summary:
- Enable large folios for iomap/fscache mode
- Avoid sysfs warning due to mounting twice with the same fsid and
domain_id in fscache mode
- Refine fscache interface among erofs, fscache, and cachefiles
- Use kmap_local_page() only for metabuf
- Fixes around crafted images found by syzbot
- Minor cleanups and documentation updates"
* tag 'erofs-for-6.2-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/xiang/erofs:
erofs: validate the extent length for uncompressed pclusters
erofs: fix missing unmap if z_erofs_get_extent_compressedlen() fails
erofs: Fix pcluster memleak when its block address is zero
erofs: use kmap_local_page() only for erofs_bread()
erofs: enable large folios for fscache mode
erofs: support large folios for fscache mode
erofs: switch to prepare_ondemand_read() in fscache mode
fscache,cachefiles: add prepare_ondemand_read() callback
erofs: clean up cached I/O strategies
erofs: update documentation
erofs: check the uniqueness of fsid in shared domain in advance
erofs: enable large folios for iomap mode
Diffstat (limited to 'fs/erofs/fscache.c')
-rw-r--r-- | fs/erofs/fscache.c | 408 |
1 files changed, 185 insertions, 223 deletions
diff --git a/fs/erofs/fscache.c b/fs/erofs/fscache.c index 4c837be3b6e3..014e20962376 100644 --- a/fs/erofs/fscache.c +++ b/fs/erofs/fscache.c @@ -11,265 +11,201 @@ static DEFINE_MUTEX(erofs_domain_cookies_lock); static LIST_HEAD(erofs_domain_list); static struct vfsmount *erofs_pseudo_mnt; -static struct netfs_io_request *erofs_fscache_alloc_request(struct address_space *mapping, +struct erofs_fscache_request { + struct erofs_fscache_request *primary; + struct netfs_cache_resources cache_resources; + struct address_space *mapping; /* The mapping being accessed */ + loff_t start; /* Start position */ + size_t len; /* Length of the request */ + size_t submitted; /* Length of submitted */ + short error; /* 0 or error that occurred */ + refcount_t ref; +}; + +static struct erofs_fscache_request *erofs_fscache_req_alloc(struct address_space *mapping, loff_t start, size_t len) { - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; - rreq = kzalloc(sizeof(struct netfs_io_request), GFP_KERNEL); - if (!rreq) + req = kzalloc(sizeof(struct erofs_fscache_request), GFP_KERNEL); + if (!req) return ERR_PTR(-ENOMEM); - rreq->start = start; - rreq->len = len; - rreq->mapping = mapping; - rreq->inode = mapping->host; - INIT_LIST_HEAD(&rreq->subrequests); - refcount_set(&rreq->ref, 1); - return rreq; -} + req->mapping = mapping; + req->start = start; + req->len = len; + refcount_set(&req->ref, 1); -static void erofs_fscache_put_request(struct netfs_io_request *rreq) -{ - if (!refcount_dec_and_test(&rreq->ref)) - return; - if (rreq->cache_resources.ops) - rreq->cache_resources.ops->end_operation(&rreq->cache_resources); - kfree(rreq); + return req; } -static void erofs_fscache_put_subrequest(struct netfs_io_subrequest *subreq) +static struct erofs_fscache_request *erofs_fscache_req_chain(struct erofs_fscache_request *primary, + size_t len) { - if (!refcount_dec_and_test(&subreq->ref)) - return; - erofs_fscache_put_request(subreq->rreq); - kfree(subreq); -} + struct erofs_fscache_request *req; -static void erofs_fscache_clear_subrequests(struct netfs_io_request *rreq) -{ - struct netfs_io_subrequest *subreq; + /* use primary request for the first submission */ + if (!primary->submitted) { + refcount_inc(&primary->ref); + return primary; + } - while (!list_empty(&rreq->subrequests)) { - subreq = list_first_entry(&rreq->subrequests, - struct netfs_io_subrequest, rreq_link); - list_del(&subreq->rreq_link); - erofs_fscache_put_subrequest(subreq); + req = erofs_fscache_req_alloc(primary->mapping, + primary->start + primary->submitted, len); + if (!IS_ERR(req)) { + req->primary = primary; + refcount_inc(&primary->ref); } + return req; } -static void erofs_fscache_rreq_unlock_folios(struct netfs_io_request *rreq) +static void erofs_fscache_req_complete(struct erofs_fscache_request *req) { - struct netfs_io_subrequest *subreq; struct folio *folio; - unsigned int iopos = 0; - pgoff_t start_page = rreq->start / PAGE_SIZE; - pgoff_t last_page = ((rreq->start + rreq->len) / PAGE_SIZE) - 1; - bool subreq_failed = false; + bool failed = req->error; + pgoff_t start_page = req->start / PAGE_SIZE; + pgoff_t last_page = ((req->start + req->len) / PAGE_SIZE) - 1; - XA_STATE(xas, &rreq->mapping->i_pages, start_page); - - subreq = list_first_entry(&rreq->subrequests, - struct netfs_io_subrequest, rreq_link); - subreq_failed = (subreq->error < 0); + XA_STATE(xas, &req->mapping->i_pages, start_page); rcu_read_lock(); xas_for_each(&xas, folio, last_page) { - unsigned int pgpos, pgend; - bool pg_failed = false; - if (xas_retry(&xas, folio)) continue; - - pgpos = (folio_index(folio) - start_page) * PAGE_SIZE; - pgend = pgpos + folio_size(folio); - - for (;;) { - if (!subreq) { - pg_failed = true; - break; - } - - pg_failed |= subreq_failed; - if (pgend < iopos + subreq->len) - break; - - iopos += subreq->len; - if (!list_is_last(&subreq->rreq_link, - &rreq->subrequests)) { - subreq = list_next_entry(subreq, rreq_link); - subreq_failed = (subreq->error < 0); - } else { - subreq = NULL; - subreq_failed = false; - } - if (pgend == iopos) - break; - } - - if (!pg_failed) + if (!failed) folio_mark_uptodate(folio); - folio_unlock(folio); } rcu_read_unlock(); } -static void erofs_fscache_rreq_complete(struct netfs_io_request *rreq) +static void erofs_fscache_req_put(struct erofs_fscache_request *req) { - erofs_fscache_rreq_unlock_folios(rreq); - erofs_fscache_clear_subrequests(rreq); - erofs_fscache_put_request(rreq); + if (refcount_dec_and_test(&req->ref)) { + if (req->cache_resources.ops) + req->cache_resources.ops->end_operation(&req->cache_resources); + if (!req->primary) + erofs_fscache_req_complete(req); + else + erofs_fscache_req_put(req->primary); + kfree(req); + } } -static void erofc_fscache_subreq_complete(void *priv, +static void erofs_fscache_subreq_complete(void *priv, ssize_t transferred_or_error, bool was_async) { - struct netfs_io_subrequest *subreq = priv; - struct netfs_io_request *rreq = subreq->rreq; - - if (IS_ERR_VALUE(transferred_or_error)) - subreq->error = transferred_or_error; + struct erofs_fscache_request *req = priv; - if (atomic_dec_and_test(&rreq->nr_outstanding)) - erofs_fscache_rreq_complete(rreq); - - erofs_fscache_put_subrequest(subreq); + if (IS_ERR_VALUE(transferred_or_error)) { + if (req->primary) + req->primary->error = transferred_or_error; + else + req->error = transferred_or_error; + } + erofs_fscache_req_put(req); } /* - * Read data from fscache and fill the read data into page cache described by - * @rreq, which shall be both aligned with PAGE_SIZE. @pstart describes - * the start physical address in the cache file. + * Read data from fscache (cookie, pstart, len), and fill the read data into + * page cache described by (req->mapping, lstart, len). @pstart describeis the + * start physical address in the cache file. */ static int erofs_fscache_read_folios_async(struct fscache_cookie *cookie, - struct netfs_io_request *rreq, loff_t pstart) + struct erofs_fscache_request *req, loff_t pstart, size_t len) { enum netfs_io_source source; - struct super_block *sb = rreq->mapping->host->i_sb; - struct netfs_io_subrequest *subreq; - struct netfs_cache_resources *cres = &rreq->cache_resources; + struct super_block *sb = req->mapping->host->i_sb; + struct netfs_cache_resources *cres = &req->cache_resources; struct iov_iter iter; - loff_t start = rreq->start; - size_t len = rreq->len; + loff_t lstart = req->start + req->submitted; size_t done = 0; int ret; - atomic_set(&rreq->nr_outstanding, 1); + DBG_BUGON(len > req->len - req->submitted); ret = fscache_begin_read_operation(cres, cookie); if (ret) - goto out; + return ret; while (done < len) { - subreq = kzalloc(sizeof(struct netfs_io_subrequest), - GFP_KERNEL); - if (subreq) { - INIT_LIST_HEAD(&subreq->rreq_link); - refcount_set(&subreq->ref, 2); - subreq->rreq = rreq; - refcount_inc(&rreq->ref); - } else { - ret = -ENOMEM; - goto out; - } - - subreq->start = pstart + done; - subreq->len = len - done; - subreq->flags = 1 << NETFS_SREQ_ONDEMAND; + loff_t sstart = pstart + done; + size_t slen = len - done; + unsigned long flags = 1 << NETFS_SREQ_ONDEMAND; - list_add_tail(&subreq->rreq_link, &rreq->subrequests); - - source = cres->ops->prepare_read(subreq, LLONG_MAX); - if (WARN_ON(subreq->len == 0)) + source = cres->ops->prepare_ondemand_read(cres, + sstart, &slen, LLONG_MAX, &flags, 0); + if (WARN_ON(slen == 0)) source = NETFS_INVALID_READ; if (source != NETFS_READ_FROM_CACHE) { - erofs_err(sb, "failed to fscache prepare_read (source %d)", - source); - ret = -EIO; - subreq->error = ret; - erofs_fscache_put_subrequest(subreq); - goto out; + erofs_err(sb, "failed to fscache prepare_read (source %d)", source); + return -EIO; } - atomic_inc(&rreq->nr_outstanding); + refcount_inc(&req->ref); + iov_iter_xarray(&iter, ITER_DEST, &req->mapping->i_pages, + lstart + done, slen); - iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, - start + done, subreq->len); - - ret = fscache_read(cres, subreq->start, &iter, - NETFS_READ_HOLE_FAIL, - erofc_fscache_subreq_complete, subreq); + ret = fscache_read(cres, sstart, &iter, NETFS_READ_HOLE_FAIL, + erofs_fscache_subreq_complete, req); if (ret == -EIOCBQUEUED) ret = 0; if (ret) { erofs_err(sb, "failed to fscache_read (ret %d)", ret); - goto out; + return ret; } - done += subreq->len; + done += slen; } -out: - if (atomic_dec_and_test(&rreq->nr_outstanding)) - erofs_fscache_rreq_complete(rreq); - - return ret; + DBG_BUGON(done != len); + return 0; } static int erofs_fscache_meta_read_folio(struct file *data, struct folio *folio) { int ret; struct super_block *sb = folio_mapping(folio)->host->i_sb; - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; struct erofs_map_dev mdev = { .m_deviceid = 0, .m_pa = folio_pos(folio), }; ret = erofs_map_dev(sb, &mdev); - if (ret) - goto out; + if (ret) { + folio_unlock(folio); + return ret; + } - rreq = erofs_fscache_alloc_request(folio_mapping(folio), + req = erofs_fscache_req_alloc(folio_mapping(folio), folio_pos(folio), folio_size(folio)); - if (IS_ERR(rreq)) { - ret = PTR_ERR(rreq); - goto out; + if (IS_ERR(req)) { + folio_unlock(folio); + return PTR_ERR(req); } - return erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, mdev.m_pa); -out: - folio_unlock(folio); + ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + req, mdev.m_pa, folio_size(folio)); + if (ret) + req->error = ret; + + erofs_fscache_req_put(req); return ret; } -/* - * Read into page cache in the range described by (@pos, @len). - * - * On return, the caller is responsible for page unlocking if the output @unlock - * is true, or the callee will take this responsibility through netfs_io_request - * interface. - * - * The return value is the number of bytes successfully handled, or negative - * error code on failure. The only exception is that, the length of the range - * instead of the error code is returned on failure after netfs_io_request is - * allocated, so that .readahead() could advance rac accordingly. - */ -static int erofs_fscache_data_read(struct address_space *mapping, - loff_t pos, size_t len, bool *unlock) +static int erofs_fscache_data_read_slice(struct erofs_fscache_request *primary) { + struct address_space *mapping = primary->mapping; struct inode *inode = mapping->host; struct super_block *sb = inode->i_sb; - struct netfs_io_request *rreq; + struct erofs_fscache_request *req; struct erofs_map_blocks map; struct erofs_map_dev mdev; struct iov_iter iter; + loff_t pos = primary->start + primary->submitted; size_t count; int ret; - *unlock = true; - map.m_la = pos; ret = erofs_map_blocks(inode, &map, EROFS_GET_BLOCKS_RAW); if (ret) @@ -297,17 +233,19 @@ static int erofs_fscache_data_read(struct address_space *mapping, } iov_iter_zero(PAGE_SIZE - size, &iter); erofs_put_metabuf(&buf); - return PAGE_SIZE; + primary->submitted += PAGE_SIZE; + return 0; } + count = primary->len - primary->submitted; if (!(map.m_flags & EROFS_MAP_MAPPED)) { - count = len; iov_iter_xarray(&iter, ITER_DEST, &mapping->i_pages, pos, count); iov_iter_zero(count, &iter); - return count; + primary->submitted += count; + return 0; } - count = min_t(size_t, map.m_llen - (pos - map.m_la), len); + count = min_t(size_t, map.m_llen - (pos - map.m_la), count); DBG_BUGON(!count || count % PAGE_SIZE); mdev = (struct erofs_map_dev) { @@ -318,64 +256,65 @@ static int erofs_fscache_data_read(struct address_space *mapping, if (ret) return ret; - rreq = erofs_fscache_alloc_request(mapping, pos, count); - if (IS_ERR(rreq)) - return PTR_ERR(rreq); + req = erofs_fscache_req_chain(primary, count); + if (IS_ERR(req)) + return PTR_ERR(req); - *unlock = false; - erofs_fscache_read_folios_async(mdev.m_fscache->cookie, - rreq, mdev.m_pa + (pos - map.m_la)); - return count; + ret = erofs_fscache_read_folios_async(mdev.m_fscache->cookie, + req, mdev.m_pa + (pos - map.m_la), count); + erofs_fscache_req_put(req); + primary->submitted += count; + return ret; } -static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +static int erofs_fscache_data_read(struct erofs_fscache_request *req) { - bool unlock; int ret; - DBG_BUGON(folio_size(folio) != EROFS_BLKSIZ); + do { + ret = erofs_fscache_data_read_slice(req); + if (ret) + req->error = ret; + } while (!ret && req->submitted < req->len); - ret = erofs_fscache_data_read(folio_mapping(folio), folio_pos(folio), - folio_size(folio), &unlock); - if (unlock) { - if (ret > 0) - folio_mark_uptodate(folio); + return ret; +} + +static int erofs_fscache_read_folio(struct file *file, struct folio *folio) +{ + struct erofs_fscache_request *req; + int ret; + + req = erofs_fscache_req_alloc(folio_mapping(folio), + folio_pos(folio), folio_size(folio)); + if (IS_ERR(req)) { folio_unlock(folio); + return PTR_ERR(req); } - return ret < 0 ? ret : 0; + + ret = erofs_fscache_data_read(req); + erofs_fscache_req_put(req); + return ret; } static void erofs_fscache_readahead(struct readahead_control *rac) { - struct folio *folio; - size_t len, done = 0; - loff_t start, pos; - bool unlock; - int ret, size; + struct erofs_fscache_request *req; if (!readahead_count(rac)) return; - start = readahead_pos(rac); - len = readahead_length(rac); + req = erofs_fscache_req_alloc(rac->mapping, + readahead_pos(rac), readahead_length(rac)); + if (IS_ERR(req)) + return; - do { - pos = start + done; - ret = erofs_fscache_data_read(rac->mapping, pos, - len - done, &unlock); - if (ret <= 0) - return; + /* The request completion will drop refs on the folios. */ + while (readahead_folio(rac)) + ; - size = ret; - while (size) { - folio = readahead_folio(rac); - size -= folio_size(folio); - if (unlock) { - folio_mark_uptodate(folio); - folio_unlock(folio); - } - } - } while ((done += ret) < len); + erofs_fscache_data_read(req); + erofs_fscache_req_put(req); } static const struct address_space_operations erofs_fscache_meta_aops = { @@ -494,7 +433,8 @@ static int erofs_fscache_register_domain(struct super_block *sb) static struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { struct fscache_volume *volume = EROFS_SB(sb)->volume; struct erofs_fscache *ctx; @@ -516,7 +456,7 @@ struct erofs_fscache *erofs_fscache_acquire_cookie(struct super_block *sb, fscache_use_cookie(cookie, false); ctx->cookie = cookie; - if (need_inode) { + if (flags & EROFS_REG_COOKIE_NEED_INODE) { struct inode *const inode = new_inode(sb); if (!inode) { @@ -554,14 +494,15 @@ static void erofs_fscache_relinquish_cookie(struct erofs_fscache *ctx) static struct erofs_fscache *erofs_fscache_domain_init_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { int err; struct inode *inode; struct erofs_fscache *ctx; struct erofs_domain *domain = EROFS_SB(sb)->domain; - ctx = erofs_fscache_acquire_cookie(sb, name, need_inode); + ctx = erofs_fscache_acquire_cookie(sb, name, flags); if (IS_ERR(ctx)) return ctx; @@ -589,7 +530,8 @@ out: static struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { struct inode *inode; struct erofs_fscache *ctx; @@ -602,23 +544,30 @@ struct erofs_fscache *erofs_domain_register_cookie(struct super_block *sb, ctx = inode->i_private; if (!ctx || ctx->domain != domain || strcmp(ctx->name, name)) continue; - igrab(inode); + if (!(flags & EROFS_REG_COOKIE_NEED_NOEXIST)) { + igrab(inode); + } else { + erofs_err(sb, "%s already exists in domain %s", name, + domain->domain_id); + ctx = ERR_PTR(-EEXIST); + } spin_unlock(&psb->s_inode_list_lock); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } spin_unlock(&psb->s_inode_list_lock); - ctx = erofs_fscache_domain_init_cookie(sb, name, need_inode); + ctx = erofs_fscache_domain_init_cookie(sb, name, flags); mutex_unlock(&erofs_domain_cookies_lock); return ctx; } struct erofs_fscache *erofs_fscache_register_cookie(struct super_block *sb, - char *name, bool need_inode) + char *name, + unsigned int flags) { if (EROFS_SB(sb)->domain_id) - return erofs_domain_register_cookie(sb, name, need_inode); - return erofs_fscache_acquire_cookie(sb, name, need_inode); + return erofs_domain_register_cookie(sb, name, flags); + return erofs_fscache_acquire_cookie(sb, name, flags); } void erofs_fscache_unregister_cookie(struct erofs_fscache *ctx) @@ -647,6 +596,7 @@ int erofs_fscache_register_fs(struct super_block *sb) int ret; struct erofs_sb_info *sbi = EROFS_SB(sb); struct erofs_fscache *fscache; + unsigned int flags; if (sbi->domain_id) ret = erofs_fscache_register_domain(sb); @@ -655,8 +605,20 @@ int erofs_fscache_register_fs(struct super_block *sb) if (ret) return ret; - /* acquired domain/volume will be relinquished in kill_sb() on error */ - fscache = erofs_fscache_register_cookie(sb, sbi->fsid, true); + /* + * When shared domain is enabled, using NEED_NOEXIST to guarantee + * the primary data blob (aka fsid) is unique in the shared domain. + * + * For non-shared-domain case, fscache_acquire_volume() invoked by + * erofs_fscache_register_volume() has already guaranteed + * the uniqueness of primary data blob. + * + * Acquired domain/volume will be relinquished in kill_sb() on error. + */ + flags = EROFS_REG_COOKIE_NEED_INODE; + if (sbi->domain_id) + flags |= EROFS_REG_COOKIE_NEED_NOEXIST; + fscache = erofs_fscache_register_cookie(sb, sbi->fsid, flags); if (IS_ERR(fscache)) return PTR_ERR(fscache); |