aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--fs/9p/vfs_addr.c60
-rw-r--r--fs/afs/file.c8
-rw-r--r--fs/afs/internal.h6
-rw-r--r--fs/afs/validation.c4
-rw-r--r--fs/afs/write.c189
-rw-r--r--fs/cachefiles/io.c76
-rw-r--r--fs/ceph/addr.c24
-rw-r--r--fs/ceph/inode.c2
-rw-r--r--fs/netfs/Makefile3
-rw-r--r--fs/netfs/buffered_read.c40
-rw-r--r--fs/netfs/buffered_write.c823
-rw-r--r--fs/netfs/direct_write.c56
-rw-r--r--fs/netfs/fscache_io.c14
-rw-r--r--fs/netfs/internal.h55
-rw-r--r--fs/netfs/io.c155
-rw-r--r--fs/netfs/main.c55
-rw-r--r--fs/netfs/misc.c10
-rw-r--r--fs/netfs/objects.c81
-rw-r--r--fs/netfs/output.c478
-rw-r--r--fs/netfs/stats.c17
-rw-r--r--fs/netfs/write_collect.c808
-rw-r--r--fs/netfs/write_issue.c684
-rw-r--r--fs/nfs/file.c8
-rw-r--r--fs/nfs/fscache.h6
-rw-r--r--fs/nfs/write.c4
-rw-r--r--fs/smb/client/file.c16
-rw-r--r--include/linux/fscache.h22
-rw-r--r--include/linux/netfs.h196
-rw-r--r--include/linux/pagemap.h2
-rw-r--r--include/net/9p/client.h2
-rw-r--r--include/trace/events/netfs.h249
-rw-r--r--mm/filemap.c60
-rw-r--r--mm/page-writeback.c1
-rw-r--r--net/9p/Kconfig1
-rw-r--r--net/9p/client.c49
35 files changed, 2509 insertions, 1755 deletions
diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c
index 047855033d32..a97ceb105cd8 100644
--- a/fs/9p/vfs_addr.c
+++ b/fs/9p/vfs_addr.c
@@ -26,36 +26,38 @@
#include "cache.h"
#include "fid.h"
-static void v9fs_upload_to_server(struct netfs_io_subrequest *subreq)
+/*
+ * Writeback calls this when it finds a folio that needs uploading. This isn't
+ * called if writeback only has copy-to-cache to deal with.
+ */
+static void v9fs_begin_writeback(struct netfs_io_request *wreq)
{
- struct p9_fid *fid = subreq->rreq->netfs_priv;
- int err, len;
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
- netfs_write_subrequest_terminated(subreq, len ?: err, false);
-}
+ struct p9_fid *fid;
-static void v9fs_upload_to_server_worker(struct work_struct *work)
-{
- struct netfs_io_subrequest *subreq =
- container_of(work, struct netfs_io_subrequest, work);
+ fid = v9fs_fid_find_inode(wreq->inode, true, INVALID_UID, true);
+ if (!fid) {
+ WARN_ONCE(1, "folio expected an open fid inode->i_ino=%lx\n",
+ wreq->inode->i_ino);
+ return;
+ }
- v9fs_upload_to_server(subreq);
+ wreq->wsize = fid->clnt->msize - P9_IOHDRSZ;
+ if (fid->iounit)
+ wreq->wsize = min(wreq->wsize, fid->iounit);
+ wreq->netfs_priv = fid;
+ wreq->io_streams[0].avail = true;
}
/*
- * Set up write requests for a writeback slice. We need to add a write request
- * for each write we want to make.
+ * Issue a subrequest to write to the server.
*/
-static void v9fs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
+static void v9fs_issue_write(struct netfs_io_subrequest *subreq)
{
- struct netfs_io_subrequest *subreq;
+ struct p9_fid *fid = subreq->rreq->netfs_priv;
+ int err, len;
- subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
- start, len, v9fs_upload_to_server_worker);
- if (subreq)
- netfs_queue_write_request(subreq);
+ len = p9_client_write(fid, subreq->start, &subreq->io_iter, &err);
+ netfs_write_subrequest_terminated(subreq, len ?: err, false);
}
/**
@@ -87,12 +89,16 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct p9_fid *fid;
bool writing = (rreq->origin == NETFS_READ_FOR_WRITE ||
- rreq->origin == NETFS_WRITEBACK ||
rreq->origin == NETFS_WRITETHROUGH ||
- rreq->origin == NETFS_LAUNDER_WRITE ||
rreq->origin == NETFS_UNBUFFERED_WRITE ||
rreq->origin == NETFS_DIO_WRITE);
+ if (rreq->origin == NETFS_WRITEBACK)
+ return 0; /* We don't get the write handle until we find we
+ * have actually dirty data and not just
+ * copy-to-cache data.
+ */
+
if (file) {
fid = file->private_data;
if (!fid)
@@ -104,6 +110,10 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file)
goto no_fid;
}
+ rreq->wsize = fid->clnt->msize - P9_IOHDRSZ;
+ if (fid->iounit)
+ rreq->wsize = min(rreq->wsize, fid->iounit);
+
/* we might need to read from a fid that was opened write-only
* for read-modify-write of page cache, use the writeback fid
* for that */
@@ -132,7 +142,8 @@ const struct netfs_request_ops v9fs_req_ops = {
.init_request = v9fs_init_request,
.free_request = v9fs_free_request,
.issue_read = v9fs_issue_read,
- .create_write_requests = v9fs_create_write_requests,
+ .begin_writeback = v9fs_begin_writeback,
+ .issue_write = v9fs_issue_write,
};
const struct address_space_operations v9fs_addr_operations = {
@@ -141,7 +152,6 @@ const struct address_space_operations v9fs_addr_operations = {
.dirty_folio = netfs_dirty_folio,
.release_folio = netfs_release_folio,
.invalidate_folio = netfs_invalidate_folio,
- .launder_folio = netfs_launder_folio,
.direct_IO = noop_direct_IO,
.writepages = netfs_writepages,
};
diff --git a/fs/afs/file.c b/fs/afs/file.c
index ef2cc8f565d2..c3f0c45ae9a9 100644
--- a/fs/afs/file.c
+++ b/fs/afs/file.c
@@ -54,7 +54,6 @@ const struct address_space_operations afs_file_aops = {
.read_folio = netfs_read_folio,
.readahead = netfs_readahead,
.dirty_folio = netfs_dirty_folio,
- .launder_folio = netfs_launder_folio,
.release_folio = netfs_release_folio,
.invalidate_folio = netfs_invalidate_folio,
.migrate_folio = filemap_migrate_folio,
@@ -354,7 +353,7 @@ static int afs_init_request(struct netfs_io_request *rreq, struct file *file)
if (file)
rreq->netfs_priv = key_get(afs_file_key(file));
rreq->rsize = 256 * 1024;
- rreq->wsize = 256 * 1024;
+ rreq->wsize = 256 * 1024 * 1024;
return 0;
}
@@ -369,6 +368,7 @@ static int afs_check_write_begin(struct file *file, loff_t pos, unsigned len,
static void afs_free_request(struct netfs_io_request *rreq)
{
key_put(rreq->netfs_priv);
+ afs_put_wb_key(rreq->netfs_priv2);
}
static void afs_update_i_size(struct inode *inode, loff_t new_i_size)
@@ -400,7 +400,9 @@ const struct netfs_request_ops afs_req_ops = {
.issue_read = afs_issue_read,
.update_i_size = afs_update_i_size,
.invalidate_cache = afs_netfs_invalidate_cache,
- .create_write_requests = afs_create_write_requests,
+ .begin_writeback = afs_begin_writeback,
+ .prepare_write = afs_prepare_write,
+ .issue_write = afs_issue_write,
};
static void afs_add_open_mmap(struct afs_vnode *vnode)
diff --git a/fs/afs/internal.h b/fs/afs/internal.h
index 6ce5a612937c..6e1d3c4daf72 100644
--- a/fs/afs/internal.h
+++ b/fs/afs/internal.h
@@ -916,7 +916,6 @@ struct afs_operation {
loff_t pos;
loff_t size;
loff_t i_size;
- bool laundering; /* Laundering page, PG_writeback not set */
} store;
struct {
struct iattr *attr;
@@ -1599,11 +1598,14 @@ extern int afs_check_volume_status(struct afs_volume *, struct afs_operation *);
/*
* write.c
*/
+void afs_prepare_write(struct netfs_io_subrequest *subreq);
+void afs_issue_write(struct netfs_io_subrequest *subreq);
+void afs_begin_writeback(struct netfs_io_request *wreq);
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
extern int afs_writepages(struct address_space *, struct writeback_control *);
extern int afs_fsync(struct file *, loff_t, loff_t, int);
extern vm_fault_t afs_page_mkwrite(struct vm_fault *vmf);
extern void afs_prune_wb_keys(struct afs_vnode *);
-void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len);
/*
* xattr.c
diff --git a/fs/afs/validation.c b/fs/afs/validation.c
index 32a53fc8dfb2..bef8af12ebe2 100644
--- a/fs/afs/validation.c
+++ b/fs/afs/validation.c
@@ -365,9 +365,9 @@ static void afs_zap_data(struct afs_vnode *vnode)
* written back in a regular file and completely discard the pages in a
* directory or symlink */
if (S_ISREG(vnode->netfs.inode.i_mode))
- invalidate_remote_inode(&vnode->netfs.inode);
+ filemap_invalidate_inode(&vnode->netfs.inode, true, 0, LLONG_MAX);
else
- invalidate_inode_pages2(vnode->netfs.inode.i_mapping);
+ filemap_invalidate_inode(&vnode->netfs.inode, false, 0, LLONG_MAX);
}
/*
diff --git a/fs/afs/write.c b/fs/afs/write.c
index 74402d95a884..e959640694c2 100644
--- a/fs/afs/write.c
+++ b/fs/afs/write.c
@@ -29,43 +29,39 @@ static void afs_pages_written_back(struct afs_vnode *vnode, loff_t start, unsign
/*
* Find a key to use for the writeback. We cached the keys used to author the
- * writes on the vnode. *_wbk will contain the last writeback key used or NULL
- * and we need to start from there if it's set.
+ * writes on the vnode. wreq->netfs_priv2 will contain the last writeback key
+ * record used or NULL and we need to start from there if it's set.
+ * wreq->netfs_priv will be set to the key itself or NULL.
*/
-static int afs_get_writeback_key(struct afs_vnode *vnode,
- struct afs_wb_key **_wbk)
+static void afs_get_writeback_key(struct netfs_io_request *wreq)
{
- struct afs_wb_key *wbk = NULL;
- struct list_head *p;
- int ret = -ENOKEY, ret2;
+ struct afs_wb_key *wbk, *old = wreq->netfs_priv2;
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+
+ key_put(wreq->netfs_priv);
+ wreq->netfs_priv = NULL;
+ wreq->netfs_priv2 = NULL;
spin_lock(&vnode->wb_lock);
- if (*_wbk)
- p = (*_wbk)->vnode_link.next;
+ if (old)
+ wbk = list_next_entry(old, vnode_link);
else
- p = vnode->wb_keys.next;
+ wbk = list_first_entry(&vnode->wb_keys, struct afs_wb_key, vnode_link);
- while (p != &vnode->wb_keys) {
- wbk = list_entry(p, struct afs_wb_key, vnode_link);
+ list_for_each_entry_from(wbk, &vnode->wb_keys, vnode_link) {
_debug("wbk %u", key_serial(wbk->key));
- ret2 = key_validate(wbk->key);
- if (ret2 == 0) {
+ if (key_validate(wbk->key) == 0) {
refcount_inc(&wbk->usage);
+ wreq->netfs_priv = key_get(wbk->key);
+ wreq->netfs_priv2 = wbk;
_debug("USE WB KEY %u", key_serial(wbk->key));
break;
}
-
- wbk = NULL;
- if (ret == -ENOKEY)
- ret = ret2;
- p = p->next;
}
spin_unlock(&vnode->wb_lock);
- if (*_wbk)
- afs_put_wb_key(*_wbk);
- *_wbk = wbk;
- return 0;
+
+ afs_put_wb_key(old);
}
static void afs_store_data_success(struct afs_operation *op)
@@ -75,8 +71,7 @@ static void afs_store_data_success(struct afs_operation *op)
op->ctime = op->file[0].scb.status.mtime_client;
afs_vnode_commit_status(op, &op->file[0]);
if (!afs_op_error(op)) {
- if (!op->store.laundering)
- afs_pages_written_back(vnode, op->store.pos, op->store.size);
+ afs_pages_written_back(vnode, op->store.pos, op->store.size);
afs_stat_v(vnode, n_stores);
atomic_long_add(op->store.size, &afs_v2net(vnode)->n_store_bytes);
}
@@ -89,113 +84,125 @@ static const struct afs_operation_ops afs_store_data_operation = {
};
/*
- * write to a file
+ * Prepare a subrequest to write to the server. This sets the max_len
+ * parameter.
+ */
+void afs_prepare_write(struct netfs_io_subrequest *subreq)
+{
+ //if (test_bit(NETFS_SREQ_RETRYING, &subreq->flags))
+ // subreq->max_len = 512 * 1024;
+ //else
+ subreq->max_len = 256 * 1024 * 1024;
+}
+
+/*
+ * Issue a subrequest to write to the server.
*/
-static int afs_store_data(struct afs_vnode *vnode, struct iov_iter *iter, loff_t pos,
- bool laundering)
+static void afs_issue_write_worker(struct work_struct *work)
{
+ struct netfs_io_subrequest *subreq = container_of(work, struct netfs_io_subrequest, work);
+ struct netfs_io_request *wreq = subreq->rreq;
struct afs_operation *op;
- struct afs_wb_key *wbk = NULL;
- loff_t size = iov_iter_count(iter);
+ struct afs_vnode *vnode = AFS_FS_I(wreq->inode);
+ unsigned long long pos = subreq->start + subreq->transferred;
+ size_t len = subreq->len - subreq->transferred;
int ret = -ENOKEY;
- _enter("%s{%llx:%llu.%u},%llx,%llx",
+ _enter("R=%x[%x],%s{%llx:%llu.%u},%llx,%zx",
+ wreq->debug_id, subreq->debug_index,
vnode->volume->name,
vnode->fid.vid,
vnode->fid.vnode,
vnode->fid.unique,
- size, pos);
+ pos, len);
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret) {
- _leave(" = %d [no keys]", ret);
- return ret;
- }
+#if 0 // Error injection
+ if (subreq->debug_index == 3)
+ return netfs_write_subrequest_terminated(subreq, -ENOANO, false);
- op = afs_alloc_operation(wbk->key, vnode->volume);
- if (IS_ERR(op)) {
- afs_put_wb_key(wbk);
- return -ENOMEM;
+ if (!test_bit(NETFS_SREQ_RETRYING, &subreq->flags)) {
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
}
+#endif
+
+ op = afs_alloc_operation(wreq->netfs_priv, vnode->volume);
+ if (IS_ERR(op))
+ return netfs_write_subrequest_terminated(subreq, -EAGAIN, false);
afs_op_set_vnode(op, 0, vnode);
- op->file[0].dv_delta = 1;
+ op->file[0].dv_delta = 1;
op->file[0].modification = true;
- op->store.pos = pos;
- op->store.size = size;
- op->store.laundering = laundering;
- op->flags |= AFS_OPERATION_UNINTR;
- op->ops = &afs_store_data_operation;
+ op->store.pos = pos;
+ op->store.size = len;
+ op->flags |= AFS_OPERATION_UNINTR;
+ op->ops = &afs_store_data_operation;
-try_next_key:
afs_begin_vnode_operation(op);
- op->store.write_iter = iter;
- op->store.i_size = max(pos + size, vnode->netfs.remote_i_size);
- op->mtime = inode_get_mtime(&vnode->netfs.inode);
+ op->store.write_iter = &subreq->io_iter;
+ op->store.i_size = umax(pos + len, vnode->netfs.remote_i_size);
+ op->mtime = inode_get_mtime(&vnode->netfs.inode);
afs_wait_for_operation(op);
-
- switch (afs_op_error(op)) {
+ ret = afs_put_operation(op);
+ switch (ret) {
case -EACCES:
case -EPERM:
case -ENOKEY:
case -EKEYEXPIRED:
case -EKEYREJECTED:
case -EKEYREVOKED:
- _debug("next");
-
- ret = afs_get_writeback_key(vnode, &wbk);
- if (ret == 0) {
- key_put(op->key);
- op->key = key_get(wbk->key);
- goto try_next_key;
- }
+ /* If there are more keys we can try, use the retry algorithm
+ * to rotate the keys.
+ */
+ if (wreq->netfs_priv2)
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
break;
}
- afs_put_wb_key(wbk);
- _leave(" = %d", afs_op_error(op));
- return afs_put_operation(op);
+ netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len, false);
}
-static void afs_upload_to_server(struct netfs_io_subrequest *subreq)
+void afs_issue_write(struct netfs_io_subrequest *subreq)
{
- struct afs_vnode *vnode = AFS_FS_I(subreq->rreq->inode);
- ssize_t ret;
-
- _enter("%x[%x],%zx",
- subreq->rreq->debug_id, subreq->debug_index, subreq->io_iter.count);
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
- ret = afs_store_data(vnode, &subreq->io_iter, subreq->start,
- subreq->rreq->origin == NETFS_LAUNDER_WRITE);
- netfs_write_subrequest_terminated(subreq, ret < 0 ? ret : subreq->len,
- false);
+ subreq->work.func = afs_issue_write_worker;
+ if (!queue_work(system_unbound_wq, &subreq->work))
+ WARN_ON_ONCE(1);
}
-static void afs_upload_to_server_worker(struct work_struct *work)
+/*
+ * Writeback calls this when it finds a folio that needs uploading. This isn't
+ * called if writeback only has copy-to-cache to deal with.
+ */
+void afs_begin_writeback(struct netfs_io_request *wreq)
{
- struct netfs_io_subrequest *subreq =
- container_of(work, struct netfs_io_subrequest, work);
-
- afs_upload_to_server(subreq);
+ afs_get_writeback_key(wreq);
+ wreq->io_streams[0].avail = true;
}
/*
- * Set up write requests for a writeback slice. We need to add a write request
- * for each write we want to make.
+ * Prepare to retry the writes in request. Use this to try rotating the
+ * available writeback keys.
*/
-void afs_create_write_requests(struct netfs_io_request *wreq, loff_t start, size_t len)
+void afs_retry_request(struct netfs_io_request *wreq, struct netfs_io_stream *stream)
{
- struct netfs_io_subrequest *subreq;
-
- _enter("%x,%llx-%llx", wreq->debug_id, start, start + len);
+ struct netfs_io_subrequest *subreq =
+ list_first_entry(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
- subreq = netfs_create_write_request(wreq, NETFS_UPLOAD_TO_SERVER,
- start, len, afs_upload_to_server_worker);
- if (subreq)
- netfs_queue_write_request(subreq);
+ switch (subreq->error) {
+ case -EACCES:
+ case -EPERM:
+ case -ENOKEY:
+ case -EKEYEXPIRED:
+ case -EKEYREJECTED:
+ case -EKEYREVOKED:
+ afs_get_writeback_key(wreq);
+ if (!wreq->netfs_priv)
+ stream->failed = true;
+ break;
+ }
}
/*
diff --git a/fs/cachefiles/io.c b/fs/cachefiles/io.c
index 1d685357e67f..e667dbcd20e8 100644
--- a/fs/cachefiles/io.c
+++ b/fs/cachefiles/io.c
@@ -9,6 +9,7 @@
#include <linux/slab.h>
#include <linux/file.h>
#include <linux/uio.h>
+#include <linux/bio.h>
#include <linux/falloc.h>
#include <linux/sched/mm.h>
#include <trace/events/fscache.h>
@@ -493,7 +494,7 @@ out_no_object:
* boundary as appropriate.
*/
static enum netfs_io_source cachefiles_prepare_read(struct netfs_io_subrequest *subreq,
- loff_t i_size)
+ unsigned long long i_size)
{
return cachefiles_do_prepare_read(&subreq->rreq->cache_resources,
subreq->start, &subreq->len, i_size,
@@ -622,6 +623,77 @@ static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
return ret;
}
+static void cachefiles_prepare_write_subreq(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct netfs_cache_resources *cres = &wreq->cache_resources;
+
+ _enter("W=%x[%x] %llx", wreq->debug_id, subreq->debug_index, subreq->start);
+
+ subreq->max_len = ULONG_MAX;
+ subreq->max_nr_segs = BIO_MAX_VECS;
+
+ if (!cachefiles_cres_file(cres)) {
+ if (!fscache_wait_for_operation(cres, FSCACHE_WANT_WRITE))
+ return netfs_prepare_write_failed(subreq);
+ if (!cachefiles_cres_file(cres))
+ return netfs_prepare_write_failed(subreq);
+ }
+}
+
+static void cachefiles_issue_write(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct netfs_cache_resources *cres = &wreq->cache_resources;
+ struct cachefiles_object *object = cachefiles_cres_object(cres);
+ struct cachefiles_cache *cache = object->volume->cache;
+ const struct cred *saved_cred;
+ size_t off, pre, post, len = subreq->len;
+ loff_t start = subreq->start;
+ int ret;
+
+ _enter("W=%x[%x] %llx-%llx",
+ wreq->debug_id, subreq->debug_index, start, start + len - 1);
+
+ /* We need to start on the cache granularity boundary */
+ off = start & (CACHEFILES_DIO_BLOCK_SIZE - 1);
+ if (off) {
+ pre = CACHEFILES_DIO_BLOCK_SIZE - off;
+ if (pre >= len) {
+ netfs_write_subrequest_terminated(subreq, len, false);
+ return;
+ }
+ subreq->transferred += pre;
+ start += pre;
+ len -= pre;
+ iov_iter_advance(&subreq->io_iter, pre);
+ }
+
+ /* We also need to end on the cache granularity boundary */
+ post = len & (CACHEFILES_DIO_BLOCK_SIZE - 1);
+ if (post) {
+ len -= post;
+ if (len == 0) {
+ netfs_write_subrequest_terminated(subreq, post, false);
+ return;
+ }
+ iov_iter_truncate(&subreq->io_iter, len);
+ }
+
+ cachefiles_begin_secure(cache, &saved_cred);
+ ret = __cachefiles_prepare_write(object, cachefiles_cres_file(cres),
+ &start, &len, len, true);
+ cachefiles_end_secure(cache, saved_cred);
+ if (ret < 0) {
+ netfs_write_subrequest_terminated(subreq, ret, false);
+ return;
+ }
+
+ cachefiles_write(&subreq->rreq->cache_resources,
+ subreq->start, &subreq->io_iter,
+ netfs_write_subrequest_terminated, subreq);
+}
+
/*
* Clean up an operation.
*/
@@ -638,8 +710,10 @@ static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
.end_operation = cachefiles_end_operation,
.read = cachefiles_read,
.write = cachefiles_write,
+ .issue_write = cachefiles_issue_write,
.prepare_read = cachefiles_prepare_read,
.prepare_write = cachefiles_prepare_write,
+ .prepare_write_subreq = cachefiles_prepare_write_subreq,
.prepare_ondemand_read = cachefiles_prepare_ondemand_read,
.query_occupancy = cachefiles_query_occupancy,
};
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index ee9caf7916fb..8c16bc5250ef 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -193,7 +193,7 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
* block, but do not exceed the file size, unless the original
* request already exceeds it.
*/
- new_end = min(round_up(end, lo->stripe_unit), rreq->i_size);
+ new_end = umin(round_up(end, lo->stripe_unit), rreq->i_size);
if (new_end > end && new_end <= rreq->start + max_len)
rreq->len = new_end - rreq->start;
@@ -498,11 +498,6 @@ const struct netfs_request_ops ceph_netfs_ops = {
};
#ifdef CONFIG_CEPH_FSCACHE
-static void ceph_set_page_fscache(struct page *page)
-{
- set_page_fscache(page);
-}
-
static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
{
struct inode *inode = priv;
@@ -517,13 +512,9 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
struct fscache_cookie *cookie = ceph_fscache_cookie(ci);
fscache_write_to_cache(cookie, inode->i_mapping, off, len, i_size_read(inode),
- ceph_fscache_write_terminated, inode, caching);
+ ceph_fscache_write_terminated, inode, true, caching);
}
#else
-static inline void ceph_set_page_fscache(struct page *page)
-{
-}
-
static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
{
}
@@ -715,8 +706,6 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
len = wlen;
set_page_writeback(page);
- if (caching)
- ceph_set_page_fscache(page);
ceph_fscache_write_to_cache(inode, page_off, len, caching);
if (IS_ENCRYPTED(inode)) {
@@ -800,8 +789,6 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
return AOP_WRITEPAGE_ACTIVATE;
}
- wait_on_page_fscache(page);
-
err = writepage_nounlock(page, wbc);
if (err == -ERESTARTSYS) {
/* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1075,7 +1062,7 @@ get_more_pages:
unlock_page(page);
break;
}
- if (PageWriteback(page) || PageFsCache(page)) {
+ if (PageWriteback(page)) {
if (wbc->sync_mode == WB_SYNC_NONE) {
doutc(cl, "%p under writeback\n", page);
unlock_page(page);
@@ -1083,7 +1070,6 @@ get_more_pages:
}
doutc(cl, "waiting on writeback %p\n", page);
wait_on_page_writeback(page);
- wait_on_page_fscache(page);
}
if (!clear_page_dirty_for_io(page)) {
@@ -1268,8 +1254,6 @@ new_request:
}
set_page_writeback(page);
- if (caching)
- ceph_set_page_fscache(page);
len += thp_size(page);
}
ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1513,7 +1497,7 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
if (r < 0)
return r;
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
WARN_ON_ONCE(!folio_test_locked(folio));
*pagep = &folio->page;
return 0;
diff --git a/fs/ceph/inode.c b/fs/ceph/inode.c
index 7b2e77517f23..99561fddcb38 100644
--- a/fs/ceph/inode.c
+++ b/fs/ceph/inode.c
@@ -577,6 +577,8 @@ struct inode *ceph_alloc_inode(struct super_block *sb)
/* Set parameters for the netfs library */
netfs_inode_init(&ci->netfs, &ceph_netfs_ops, false);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_ICTX_USE_PGPRIV2, &ci->netfs.flags);
spin_lock_init(&ci->i_ceph_lock);
diff --git a/fs/netfs/Makefile b/fs/netfs/Makefile
index d4d1d799819e..8e6781e0b10b 100644
--- a/fs/netfs/Makefile
+++ b/fs/netfs/Makefile
@@ -11,7 +11,8 @@ netfs-y := \
main.o \
misc.o \
objects.o \
- output.o
+ write_collect.o \
+ write_issue.o
netfs-$(CONFIG_NETFS_STATS) += stats.o
diff --git a/fs/netfs/buffered_read.c b/fs/netfs/buffered_read.c
index 3298c29b5548..a6bb03bea920 100644
--- a/fs/netfs/buffered_read.c
+++ b/fs/netfs/buffered_read.c
@@ -10,8 +10,11 @@
#include "internal.h"
/*
- * Unlock the folios in a read operation. We need to set PG_fscache on any
+ * Unlock the folios in a read operation. We need to set PG_writeback on any
* folios we're going to write back before we unlock them.
+ *
+ * Note that if the deprecated NETFS_RREQ_USE_PGPRIV2 is set then we use
+ * PG_private_2 and do a direct write to the cache from here instead.
*/
void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
{
@@ -48,14 +51,14 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
xas_for_each(&xas, folio, last_page) {
loff_t pg_end;
bool pg_failed = false;
- bool folio_started;
+ bool wback_to_cache = false;
+ bool folio_started = false;
if (xas_retry(&xas, folio))
continue;
pg_end = folio_pos(folio) + folio_size(folio) - 1;
- folio_started = false;
for (;;) {
loff_t sreq_end;
@@ -63,10 +66,16 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
pg_failed = true;
break;
}
- if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
- trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
- folio_start_fscache(folio);
- folio_started = true;
+ if (test_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags)) {
+ if (!folio_started && test_bit(NETFS_SREQ_COPY_TO_CACHE,
+ &subreq->flags)) {
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+ folio_start_private_2(folio);
+ folio_started = true;
+ }
+ } else {
+ wback_to_cache |=
+ test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
}
pg_failed |= subreq_failed;
sreq_end = subreq->start + subreq->len - 1;
@@ -98,6 +107,11 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
kfree(finfo);
}
folio_mark_uptodate(folio);
+ if (wback_to_cache && !WARN_ON_ONCE(folio_get_private(folio) != NULL)) {
+ trace_netfs_folio(folio, netfs_folio_trace_copy_to_cache);
+ folio_attach_private(folio, NETFS_FOLIO_COPY_TO_CACHE);
+ filemap_dirty_folio(folio->mapping, folio);
+ }
}
if (!test_bit(NETFS_RREQ_DONT_UNLOCK_FOLIOS, &rreq->flags)) {
@@ -116,7 +130,9 @@ void netfs_rreq_unlock_folios(struct netfs_io_request *rreq)
}
static void netfs_cache_expand_readahead(struct netfs_io_request *rreq,
- loff_t *_start, size_t *_len, loff_t i_size)
+ unsigned long long *_start,
+ unsigned long long *_len,
+ unsigned long long i_size)
{
struct netfs_cache_resources *cres = &rreq->cache_resources;
@@ -266,7 +282,7 @@ int netfs_read_folio(struct file *file, struct folio *folio)
if (ret == -ENOMEM || ret == -EINTR || ret == -ERESTARTSYS)
goto discard;
- netfs_stat(&netfs_n_rh_readpage);
+ netfs_stat(&netfs_n_rh_read_folio);
trace_netfs_read(rreq, rreq->start, rreq->len, netfs_read_trace_readpage);
/* Set up the output buffer */
@@ -450,7 +466,7 @@ retry:
if (!netfs_is_cache_enabled(ctx) &&
netfs_skip_folio_read(folio, pos, len, false)) {
netfs_stat(&netfs_n_rh_write_zskip);
- goto have_folio_no_wait;
+ goto have_folio;
}
rreq = netfs_alloc_request(mapping, file,
@@ -491,10 +507,6 @@ retry:
netfs_put_request(rreq, false, netfs_rreq_trace_put_return);
have_folio:
- ret = folio_wait_fscache_killable(folio);
- if (ret < 0)
- goto error;
-have_folio_no_wait:
*_folio = folio;
_leave(" = 0");
return 0;
diff --git a/fs/netfs/buffered_write.c b/fs/netfs/buffered_write.c
index 267b622d923b..825e6632ee4f 100644
--- a/fs/netfs/buffered_write.c
+++ b/fs/netfs/buffered_write.c
@@ -1,5 +1,5 @@
// SPDX-License-Identifier: GPL-2.0-only
-/* Network filesystem high-level write support.
+/* Network filesystem high-level buffered write support.
*
* Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
* Written by David Howells ([email protected])
@@ -26,25 +26,15 @@ enum netfs_how_to_modify {
NETFS_FLUSH_CONTENT, /* Flush incompatible content. */
};
-static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq);
-
static void netfs_set_group(struct folio *folio, struct netfs_group *netfs_group)
{
- if (netfs_group && !folio_get_private(folio))
- folio_attach_private(folio, netfs_get_group(netfs_group));
-}
+ void *priv = folio_get_private(folio);
-#if IS_ENABLED(CONFIG_FSCACHE)
-static void netfs_folio_start_fscache(bool caching, struct folio *folio)
-{
- if (caching)
- folio_start_fscache(folio);
-}
-#else
-static void netfs_folio_start_fscache(bool caching, struct folio *folio)
-{
+ if (netfs_group && (!priv || priv == NETFS_FOLIO_COPY_TO_CACHE))
+ folio_attach_private(folio, netfs_get_group(netfs_group));
+ else if (!netfs_group && priv == NETFS_FOLIO_COPY_TO_CACHE)
+ folio_detach_private(folio);
}
-#endif
/*
* Decide how we should modify a folio. We might be attempting to do
@@ -63,11 +53,12 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
bool maybe_trouble)
{
struct netfs_folio *finfo = netfs_folio_info(folio);
+ struct netfs_group *group = netfs_folio_group(folio);
loff_t pos = folio_file_pos(folio);
_enter("");
- if (netfs_folio_group(folio) != netfs_group)
+ if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE)
return NETFS_FLUSH_CONTENT;
if (folio_test_uptodate(folio))
@@ -81,16 +72,12 @@ static enum netfs_how_to_modify netfs_how_to_modify(struct netfs_inode *ctx,
if (file->f_mode & FMODE_READ)
goto no_write_streaming;
- if (test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
- goto no_write_streaming;
if (netfs_is_cache_enabled(ctx)) {
/* We don't want to get a streaming write on a file that loses
* caching service temporarily because the backing store got
* culled.
*/
- if (!test_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags))
- set_bit(NETFS_ICTX_NO_WRITE_STREAMING, &ctx->flags);
goto no_write_streaming;
}
@@ -130,6 +117,37 @@ static struct folio *netfs_grab_folio_for_write(struct address_space *mapping,
mapping_gfp_mask(mapping));
}
+/*
+ * Update i_size and estimate the update to i_blocks to reflect the additional
+ * data written into the pagecache until we can find out from the server what
+ * the values actually are.
+ */
+static void netfs_update_i_size(struct netfs_inode *ctx, struct inode *inode,
+ loff_t i_size, loff_t pos, size_t copied)
+{
+ blkcnt_t add;
+ size_t gap;
+
+ if (ctx->ops->update_i_size) {
+ ctx->ops->update_i_size(inode, pos);
+ return;
+ }
+
+ i_size_write(inode, pos);
+#if IS_ENABLED(CONFIG_FSCACHE)
+ fscache_update_cookie(ctx->cache, NULL, &pos);
+#endif
+
+ gap = SECTOR_SIZE - (i_size & (SECTOR_SIZE - 1));
+ if (copied > gap) {
+ add = DIV_ROUND_UP(copied - gap, SECTOR_SIZE);
+
+ inode->i_blocks = min_t(blkcnt_t,
+ DIV_ROUND_UP(pos, SECTOR_SIZE),
+ inode->i_blocks + add);
+ }
+}
+
/**
* netfs_perform_write - Copy data into the pagecache.
* @iocb: The operation parameters
@@ -160,7 +178,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
};
struct netfs_io_request *wreq = NULL;
struct netfs_folio *finfo;
- struct folio *folio;
+ struct folio *folio, *writethrough = NULL;
enum netfs_how_to_modify howto;
enum netfs_folio_trace trace;
unsigned int bdp_flags = (iocb->ki_flags & IOCB_SYNC) ? 0: BDP_ASYNC;
@@ -189,7 +207,9 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
}
if (!is_sync_kiocb(iocb))
wreq->iocb = iocb;
- wreq->cleanup = netfs_cleanup_buffered_write;
+ netfs_stat(&netfs_n_wh_writethrough);
+ } else {
+ netfs_stat(&netfs_n_wh_buffered_write);
}
do {
@@ -230,6 +250,16 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
offset = pos & (flen - 1);
part = min_t(size_t, flen - offset, part);
+ /* Wait for writeback to complete. The writeback engine owns
+ * the info in folio->private and may change it until it
+ * removes the WB mark.
+ */
+ if (folio_get_private(folio) &&
+ folio_wait_writeback_killable(folio)) {
+ ret = written ? -EINTR : -ERESTARTSYS;
+ goto error_folio_unlock;
+ }
+
if (signal_pending(current)) {
ret = written ? -EINTR : -ERESTARTSYS;
goto error_folio_unlock;
@@ -304,6 +334,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
maybe_trouble = true;
iov_iter_revert(iter, copied);
copied = 0;
+ folio_unlock(folio);
goto retry;
}
netfs_set_group(folio, netfs_group);
@@ -351,41 +382,22 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
trace_netfs_folio(folio, trace);
/* Update the inode size if we moved the EOF marker */
- i_size = i_size_read(inode);
pos += copied;
- if (pos > i_size) {
- if (ctx->ops->update_i_size) {
- ctx->ops->update_i_size(inode, pos);
- } else {
- i_size_write(inode, pos);
-#if IS_ENABLED(CONFIG_FSCACHE)
- fscache_update_cookie(ctx->cache, NULL, &pos);
-#endif
- }
- }
+ i_size = i_size_read(inode);
+ if (pos > i_size)
+ netfs_update_i_size(ctx, inode, i_size, pos, copied);
written += copied;
if (likely(!wreq)) {
folio_mark_dirty(folio);
+ folio_unlock(folio);
} else {
- if (folio_test_dirty(folio))
- /* Sigh. mmap. */
- folio_clear_dirty_for_io(folio);
- /* We make multiple writes to the folio... */
- if (!folio_test_writeback(folio)) {
- folio_wait_fscache(folio);
- folio_start_writeback(folio);
- folio_start_fscache(folio);
- if (wreq->iter.count == 0)
- trace_netfs_folio(folio, netfs_folio_trace_wthru);
- else
- trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
- }
- netfs_advance_writethrough(wreq, copied,
- offset + copied == flen);
+ netfs_advance_writethrough(wreq, &wbc, folio, copied,
+ offset + copied == flen,
+ &writethrough);
+ /* Folio unlocked */
}
retry:
- folio_unlock(folio);
folio_put(folio);
folio = NULL;
@@ -394,7 +406,7 @@ ssize_t netfs_perform_write(struct kiocb *iocb, struct iov_iter *iter,
out:
if (unlikely(wreq)) {
- ret2 = netfs_end_writethrough(wreq, iocb);
+ ret2 = netfs_end_writethrough(wreq, &wbc, writethrough);
wbc_detach_inode(&wbc);
if (ret2 == -EIOCBQUEUED)
return ret2;
@@ -505,6 +517,7 @@ EXPORT_SYMBOL(netfs_file_write_iter);
*/
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group)
{
+ struct netfs_group *group;
struct folio *folio = page_folio(vmf->page);
struct file *file = vmf->vma->vm_file;
struct inode *inode = file_inode(file);
@@ -515,11 +528,13 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
sb_start_pagefault(inode->i_sb);
- if (folio_wait_writeback_killable(folio))
+ if (folio_lock_killable(folio) < 0)
goto out;
- if (folio_lock_killable(folio) < 0)
+ if (folio_wait_writeback_killable(folio)) {
+ ret = VM_FAULT_LOCKED;
goto out;
+ }
/* Can we see a streaming write here? */
if (WARN_ON(!folio_test_uptodate(folio))) {
@@ -527,7 +542,8 @@ vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_gr
goto out;
}
- if (netfs_folio_group(folio) != netfs_group) {
+ group = netfs_folio_group(folio);
+ if (group != netfs_group && group != NETFS_FOLIO_COPY_TO_CACHE) {
folio_unlock(folio);
err = filemap_fdatawait_range(inode->i_mapping,
folio_pos(folio),
@@ -557,702 +573,3 @@ out:
return ret;
}
EXPORT_SYMBOL(netfs_page_mkwrite);
-
-/*
- * Kill all the pages in the given range
- */
-static void netfs_kill_pages(struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("%llx-%llx", start, start + len - 1);
-
- do {
- _debug("kill %lx (to %lx)", index, last);
-
- folio = filemap_get_folio(mapping, index);
- if (IS_ERR(folio)) {
- next = index + 1;
- continue;
- }
-
- next = folio_next_index(folio);
-
- trace_netfs_folio(folio, netfs_folio_trace_kill);
- folio_clear_uptodate(folio);
- if (folio_test_fscache(folio))
- folio_end_fscache(folio);
- folio_end_writeback(folio);
- folio_lock(folio);
- generic_error_remove_folio(mapping, folio);
- folio_unlock(folio);
- folio_put(folio);
-
- } while (index = next, index <= last);
-
- _leave("");
-}
-
-/*
- * Redirty all the pages in a given range.
- */
-static void netfs_redirty_pages(struct address_space *mapping,
- loff_t start, loff_t len)
-{
- struct folio *folio;
- pgoff_t index = start / PAGE_SIZE;
- pgoff_t last = (start + len - 1) / PAGE_SIZE, next;
-
- _enter("%llx-%llx", start, start + len - 1);
-
- do {
- _debug("redirty %llx @%llx", len, start);
-
- folio = filemap_get_folio(mapping, index);
- if (IS_ERR(folio)) {
- next = index + 1;
- continue;
- }
-
- next = folio_next_index(folio);
- trace_netfs_folio(folio, netfs_folio_trace_redirty);
- filemap_dirty_folio(mapping, folio);
- if (folio_test_fscache(folio))
- folio_end_fscache(folio);
- folio_end_writeback(folio);
- folio_put(folio);
- } while (index = next, index <= last);
-
- balance_dirty_pages_ratelimited(mapping);
-
- _leave("");
-}
-
-/*
- * Completion of write to server
- */
-static void netfs_pages_written_back(struct netfs_io_request *wreq)
-{
- struct address_space *mapping = wreq->mapping;
- struct netfs_folio *finfo;
- struct netfs_group *group = NULL;
- struct folio *folio;
- pgoff_t last;
- int gcount = 0;
-
- XA_STATE(xas, &mapping->i_pages, wreq->start / PAGE_SIZE);
-
- _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
-
- rcu_read_lock();
-
- last = (wreq->start + wreq->len - 1) / PAGE_SIZE;
- xas_for_each(&xas, folio, last) {
- WARN(!folio_test_writeback(folio),
- "bad %zx @%llx page %lx %lx\n",
- wreq->len, wreq->start, folio->index, last);
-
- if ((finfo = netfs_folio_info(folio))) {
- /* Streaming writes cannot be redirtied whilst under
- * writeback, so discard the streaming record.
- */
- folio_detach_private(folio);
- group = finfo->netfs_group;
- gcount++;
- trace_netfs_folio(folio, netfs_folio_trace_clear_s);
- kfree(finfo);
- } else if ((group = netfs_folio_group(folio))) {
- /* Need to detach the group pointer if the page didn't
- * get redirtied. If it has been redirtied, then it
- * must be within the same group.
- */
- if (folio_test_dirty(folio)) {
- trace_netfs_folio(folio, netfs_folio_trace_redirtied);
- goto end_wb;
- }
- if (folio_trylock(folio)) {
- if (!folio_test_dirty(folio)) {
- folio_detach_private(folio);
- gcount++;
- trace_netfs_folio(folio, netfs_folio_trace_clear_g);
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_redirtied);
- }
- folio_unlock(folio);
- goto end_wb;
- }
-
- xas_pause(&xas);
- rcu_read_unlock();
- folio_lock(folio);
- if (!folio_test_dirty(folio)) {
- folio_detach_private(folio);
- gcount++;
- trace_netfs_folio(folio, netfs_folio_trace_clear_g);
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_redirtied);
- }
- folio_unlock(folio);
- rcu_read_lock();
- } else {
- trace_netfs_folio(folio, netfs_folio_trace_clear);
- }
- end_wb:
- if (folio_test_fscache(folio))
- folio_end_fscache(folio);
- xas_advance(&xas, folio_next_index(folio) - 1);
- folio_end_writeback(folio);
- }
-
- rcu_read_unlock();
- netfs_put_group_many(group, gcount);
- _leave("");
-}
-
-/*
- * Deal with the disposition of the folios that are under writeback to close
- * out the operation.
- */
-static void netfs_cleanup_buffered_write(struct netfs_io_request *wreq)
-{
- struct address_space *mapping = wreq->mapping;
-
- _enter("");
-
- switch (wreq->error) {
- case 0:
- netfs_pages_written_back(wreq);
- break;
-
- default:
- pr_notice("R=%08x Unexpected error %d\n", wreq->debug_id, wreq->error);
- fallthrough;
- case -EACCES:
- case -EPERM:
- case -ENOKEY:
- case -EKEYEXPIRED:
- case -EKEYREJECTED:
- case -EKEYREVOKED:
- case -ENETRESET:
- case -EDQUOT:
- case -ENOSPC:
- netfs_redirty_pages(mapping, wreq->start, wreq->len);
- break;
-
- case -EROFS:
- case -EIO:
- case -EREMOTEIO:
- case -EFBIG:
- case -ENOENT:
- case -ENOMEDIUM:
- case -ENXIO:
- netfs_kill_pages(mapping, wreq->start, wreq->len);
- break;
- }
-
- if (wreq->error)
- mapping_set_error(mapping, wreq->error);
- if (wreq->netfs_ops->done)
- wreq->netfs_ops->done(wreq);
-}
-
-/*
- * Extend the region to be written back to include subsequent contiguously
- * dirty pages if possible, but don't sleep while doing so.
- *
- * If this page holds new content, then we can include filler zeros in the
- * writeback.
- */
-static void netfs_extend_writeback(struct address_space *mapping,
- struct netfs_group *group,
- struct xa_state *xas,
- long *_count,
- loff_t start,
- loff_t max_len,
- bool caching,
- size_t *_len,
- size_t *_top)
-{
- struct netfs_folio *finfo;
- struct folio_batch fbatch;
- struct folio *folio;
- unsigned int i;
- pgoff_t index = (start + *_len) / PAGE_SIZE;
- size_t len;
- void *priv;
- bool stop = true;
-
- folio_batch_init(&fbatch);
-
- do {
- /* Firstly, we gather up a batch of contiguous dirty pages
- * under the RCU read lock - but we can't clear the dirty flags
- * there if any of those pages are mapped.
- */
- rcu_read_lock();
-
- xas_for_each(xas, folio, ULONG_MAX) {
- stop = true;
- if (xas_retry(xas, folio))
- continue;
- if (xa_is_value(folio))
- break;
- if (folio->index != index) {
- xas_reset(xas);
- break;
- }
-
- if (!folio_try_get_rcu(folio)) {
- xas_reset(xas);
- continue;
- }
-
- /* Has the folio moved or been split? */
- if (unlikely(folio != xas_reload(xas))) {
- folio_put(folio);
- xas_reset(xas);
- break;
- }
-
- if (!folio_trylock(folio)) {
- folio_put(folio);
- xas_reset(xas);
- break;
- }
- if (!folio_test_dirty(folio) ||
- folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- folio_put(folio);
- xas_reset(xas);
- break;
- }
-
- stop = false;
- len = folio_size(folio);
- priv = folio_get_private(folio);
- if ((const struct netfs_group *)priv != group) {
- stop = true;
- finfo = netfs_folio_info(folio);
- if (finfo->netfs_group != group ||
- finfo->dirty_offset > 0) {
- folio_unlock(folio);
- folio_put(folio);
- xas_reset(xas);
- break;
- }
- len = finfo->dirty_len;
- }
-
- *_top += folio_size(folio);
- index += folio_nr_pages(folio);
- *_count -= folio_nr_pages(folio);
- *_len += len;
- if (*_len >= max_len || *_count <= 0)
- stop = true;
-
- if (!folio_batch_add(&fbatch, folio))
- break;
- if (stop)
- break;
- }
-
- xas_pause(xas);
- rcu_read_unlock();
-
- /* Now, if we obtained any folios, we can shift them to being
- * writable and mark them for caching.
- */
- if (!folio_batch_count(&fbatch))
- break;
-
- for (i = 0; i < folio_batch_count(&fbatch); i++) {
- folio = fbatch.folios[i];
- trace_netfs_folio(folio, netfs_folio_trace_store_plus);
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- folio_start_writeback(folio);
- netfs_folio_start_fscache(caching, folio);
- folio_unlock(folio);
- }
-
- folio_batch_release(&fbatch);
- cond_resched();
- } while (!stop);
-}
-
-/*
- * Synchronously write back the locked page and any subsequent non-locked dirty
- * pages.
- */
-static ssize_t netfs_write_back_from_locked_folio(struct address_space *mapping,
- struct writeback_control *wbc,
- struct netfs_group *group,
- struct xa_state *xas,
- struct folio *folio,
- unsigned long long start,
- unsigned long long end)
-{
- struct netfs_io_request *wreq;
- struct netfs_folio *finfo;
- struct netfs_inode *ctx = netfs_inode(mapping->host);
- unsigned long long i_size = i_size_read(&ctx->inode);
- size_t len, max_len;
- bool caching = netfs_is_cache_enabled(ctx);
- long count = wbc->nr_to_write;
- int ret;
-
- _enter(",%lx,%llx-%llx,%u", folio->index, start, end, caching);
-
- wreq = netfs_alloc_request(mapping, NULL, start, folio_size(folio),
- NETFS_WRITEBACK);
- if (IS_ERR(wreq)) {
- folio_unlock(folio);
- return PTR_ERR(wreq);
- }
-
- if (!folio_clear_dirty_for_io(folio))
- BUG();
- folio_start_writeback(folio);
- netfs_folio_start_fscache(caching, folio);
-
- count -= folio_nr_pages(folio);
-
- /* Find all consecutive lockable dirty pages that have contiguous
- * written regions, stopping when we find a page that is not
- * immediately lockable, is not dirty or is missing, or we reach the
- * end of the range.
- */
- trace_netfs_folio(folio, netfs_folio_trace_store);
-
- len = wreq->len;
- finfo = netfs_folio_info(folio);
- if (finfo) {
- start += finfo->dirty_offset;
- if (finfo->dirty_offset + finfo->dirty_len != len) {
- len = finfo->dirty_len;
- goto cant_expand;
- }
- len = finfo->dirty_len;
- }
-
- if (start < i_size) {
- /* Trim the write to the EOF; the extra data is ignored. Also
- * put an upper limit on the size of a single storedata op.
- */
- max_len = 65536 * 4096;
- max_len = min_t(unsigned long long, max_len, end - start + 1);
- max_len = min_t(unsigned long long, max_len, i_size - start);
-
- if (len < max_len)
- netfs_extend_writeback(mapping, group, xas, &count, start,
- max_len, caching, &len, &wreq->upper_len);
- }
-
-cant_expand:
- len = min_t(unsigned long long, len, i_size - start);
-
- /* We now have a contiguous set of dirty pages, each with writeback
- * set; the first page is still locked at this point, but all the rest
- * have been unlocked.
- */
- folio_unlock(folio);
- wreq->start = start;
- wreq->len = len;
-
- if (start < i_size) {
- _debug("write back %zx @%llx [%llx]", len, start, i_size);
-
- /* Speculatively write to the cache. We have to fix this up
- * later if the store fails.
- */
- wreq->cleanup = netfs_cleanup_buffered_write;
-
- iov_iter_xarray(&wreq->iter, ITER_SOURCE, &mapping->i_pages, start,
- wreq->upper_len);
- __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
- ret = netfs_begin_write(wreq, true, netfs_write_trace_writeback);
- if (ret == 0 || ret == -EIOCBQUEUED)
- wbc->nr_to_write -= len / PAGE_SIZE;
- } else {
- _debug("write discard %zx @%llx [%llx]", len, start, i_size);
-
- /* The dirty region was entirely beyond the EOF. */
- fscache_clear_page_bits(mapping, start, len, caching);
- netfs_pages_written_back(wreq);
- ret = 0;
- }
-
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- _leave(" = 1");
- return 1;
-}
-
-/*
- * Write a region of pages back to the server
- */
-static ssize_t netfs_writepages_begin(struct address_space *mapping,
- struct writeback_control *wbc,
- struct netfs_group *group,
- struct xa_state *xas,
- unsigned long long *_start,
- unsigned long long end)
-{
- const struct netfs_folio *finfo;
- struct folio *folio;
- unsigned long long start = *_start;
- ssize_t ret;
- void *priv;
- int skips = 0;
-
- _enter("%llx,%llx,", start, end);
-
-search_again:
- /* Find the first dirty page in the group. */
- rcu_read_lock();
-
- for (;;) {
- folio = xas_find_marked(xas, end / PAGE_SIZE, PAGECACHE_TAG_DIRTY);
- if (xas_retry(xas, folio) || xa_is_value(folio))
- continue;
- if (!folio)
- break;
-
- if (!folio_try_get_rcu(folio)) {
- xas_reset(xas);
- continue;
- }
-
- if (unlikely(folio != xas_reload(xas))) {
- folio_put(folio);
- xas_reset(xas);
- continue;
- }
-
- /* Skip any dirty folio that's not in the group of interest. */
- priv = folio_get_private(folio);
- if ((const struct netfs_group *)priv != group) {
- finfo = netfs_folio_info(folio);
- if (finfo->netfs_group != group) {
- folio_put(folio);
- continue;
- }
- }
-
- xas_pause(xas);
- break;
- }
- rcu_read_unlock();
- if (!folio)
- return 0;
-
- start = folio_pos(folio); /* May regress with THPs */
-
- _debug("wback %lx", folio->index);
-
- /* At this point we hold neither the i_pages lock nor the page lock:
- * the page may be truncated or invalidated (changing page->mapping to
- * NULL), or even swizzled back from swapper_space to tmpfs file
- * mapping
- */
-lock_again:
- if (wbc->sync_mode != WB_SYNC_NONE) {
- ret = folio_lock_killable(folio);
- if (ret < 0)
- return ret;
- } else {
- if (!folio_trylock(folio))
- goto search_again;
- }
-
- if (folio->mapping != mapping ||
- !folio_test_dirty(folio)) {
- start += folio_size(folio);
- folio_unlock(folio);
- goto search_again;
- }
-
- if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
- folio_unlock(folio);
- if (wbc->sync_mode != WB_SYNC_NONE) {
- folio_wait_writeback(folio);
-#ifdef CONFIG_FSCACHE
- folio_wait_fscache(folio);
-#endif
- goto lock_again;
- }
-
- start += folio_size(folio);
- if (wbc->sync_mode == WB_SYNC_NONE) {
- if (skips >= 5 || need_resched()) {
- ret = 0;
- goto out;
- }
- skips++;
- }
- goto search_again;
- }
-
- ret = netfs_write_back_from_locked_folio(mapping, wbc, group, xas,
- folio, start, end);
-out:
- if (ret > 0)
- *_start = start + ret;
- _leave(" = %zd [%llx]", ret, *_start);
- return ret;
-}
-
-/*
- * Write a region of pages back to the server
- */
-static int netfs_writepages_region(struct address_space *mapping,
- struct writeback_control *wbc,
- struct netfs_group *group,
- unsigned long long *_start,
- unsigned long long end)
-{
- ssize_t ret;
-
- XA_STATE(xas, &mapping->i_pages, *_start / PAGE_SIZE);
-
- do {
- ret = netfs_writepages_begin(mapping, wbc, group, &xas,
- _start, end);
- if (ret > 0 && wbc->nr_to_write > 0)
- cond_resched();
- } while (ret > 0 && wbc->nr_to_write > 0);
-
- return ret > 0 ? 0 : ret;
-}
-
-/*
- * write some of the pending data back to the server
- */
-int netfs_writepages(struct address_space *mapping,
- struct writeback_control *wbc)
-{
- struct netfs_group *group = NULL;
- loff_t start, end;
- int ret;
-
- _enter("");
-
- /* We have to be careful as we can end up racing with setattr()
- * truncating the pagecache since the caller doesn't take a lock here
- * to prevent it.
- */
-
- if (wbc->range_cyclic && mapping->writeback_index) {
- start = mapping->writeback_index * PAGE_SIZE;
- ret = netfs_writepages_region(mapping, wbc, group,
- &start, LLONG_MAX);
- if (ret < 0)
- goto out;
-
- if (wbc->nr_to_write <= 0) {
- mapping->writeback_index = start / PAGE_SIZE;
- goto out;
- }
-
- start = 0;
- end = mapping->writeback_index * PAGE_SIZE;
- mapping->writeback_index = 0;
- ret = netfs_writepages_region(mapping, wbc, group, &start, end);
- if (ret == 0)
- mapping->writeback_index = start / PAGE_SIZE;
- } else if (wbc->range_start == 0 && wbc->range_end == LLONG_MAX) {
- start = 0;
- ret = netfs_writepages_region(mapping, wbc, group,
- &start, LLONG_MAX);
- if (wbc->nr_to_write > 0 && ret == 0)
- mapping->writeback_index = start / PAGE_SIZE;
- } else {
- start = wbc->range_start;
- ret = netfs_writepages_region(mapping, wbc, group,
- &start, wbc->range_end);
- }
-
-out:
- _leave(" = %d", ret);
- return ret;
-}
-EXPORT_SYMBOL(netfs_writepages);
-
-/*
- * Deal with the disposition of a laundered folio.
- */
-static void netfs_cleanup_launder_folio(struct netfs_io_request *wreq)
-{
- if (wreq->error) {
- pr_notice("R=%08x Laundering error %d\n", wreq->debug_id, wreq->error);
- mapping_set_error(wreq->mapping, wreq->error);
- }
-}
-
-/**
- * netfs_launder_folio - Clean up a dirty folio that's being invalidated
- * @folio: The folio to clean
- *
- * This is called to write back a folio that's being invalidated when an inode
- * is getting torn down. Ideally, writepages would be used instead.
- */
-int netfs_launder_folio(struct folio *folio)
-{
- struct netfs_io_request *wreq;
- struct address_space *mapping = folio->mapping;
- struct netfs_folio *finfo = netfs_folio_info(folio);
- struct netfs_group *group = netfs_folio_group(folio);
- struct bio_vec bvec;
- unsigned long long i_size = i_size_read(mapping->host);
- unsigned long long start = folio_pos(folio);
- size_t offset = 0, len;
- int ret = 0;
-
- if (finfo) {
- offset = finfo->dirty_offset;
- start += offset;
- len = finfo->dirty_len;
- } else {
- len = folio_size(folio);
- }
- len = min_t(unsigned long long, len, i_size - start);
-
- wreq = netfs_alloc_request(mapping, NULL, start, len, NETFS_LAUNDER_WRITE);
- if (IS_ERR(wreq)) {
- ret = PTR_ERR(wreq);
- goto out;
- }
-
- if (!folio_clear_dirty_for_io(folio))
- goto out_put;
-
- trace_netfs_folio(folio, netfs_folio_trace_launder);
-
- _debug("launder %llx-%llx", start, start + len - 1);
-
- /* Speculatively write to the cache. We have to fix this up later if
- * the store fails.
- */
- wreq->cleanup = netfs_cleanup_launder_folio;
-
- bvec_set_folio(&bvec, folio, len, offset);
- iov_iter_bvec(&wreq->iter, ITER_SOURCE, &bvec, 1, len);
- __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
- ret = netfs_begin_write(wreq, true, netfs_write_trace_launder);
-
-out_put:
- folio_detach_private(folio);
- netfs_put_group(group);
- kfree(finfo);
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
-out:
- folio_wait_fscache(folio);
- _leave(" = %d", ret);
- return ret;
-}
-EXPORT_SYMBOL(netfs_launder_folio);
diff --git a/fs/netfs/direct_write.c b/fs/netfs/direct_write.c
index bee047e20f5d..608ba6416919 100644
--- a/fs/netfs/direct_write.c
+++ b/fs/netfs/direct_write.c
@@ -34,6 +34,7 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
unsigned long long start = iocb->ki_pos;
unsigned long long end = start + iov_iter_count(iter);
ssize_t ret, n;
+ size_t len = iov_iter_count(iter);
bool async = !is_sync_kiocb(iocb);
_enter("");
@@ -46,13 +47,17 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
_debug("uw %llx-%llx", start, end);
- wreq = netfs_alloc_request(iocb->ki_filp->f_mapping, iocb->ki_filp,
- start, end - start,
- iocb->ki_flags & IOCB_DIRECT ?
- NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
+ wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp, start,
+ iocb->ki_flags & IOCB_DIRECT ?
+ NETFS_DIO_WRITE : NETFS_UNBUFFERED_WRITE);
if (IS_ERR(wreq))
return PTR_ERR(wreq);
+ wreq->io_streams[0].avail = true;
+ trace_netfs_write(wreq, (iocb->ki_flags & IOCB_DIRECT ?
+ netfs_write_trace_dio_write :
+ netfs_write_trace_unbuffered_write));
+
{
/* If this is an async op and we're not using a bounce buffer,
* we have to save the source buffer as the iterator is only
@@ -63,7 +68,7 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
* request.
*/
if (async || user_backed_iter(iter)) {
- n = netfs_extract_user_iter(iter, wreq->len, &wreq->iter, 0);
+ n = netfs_extract_user_iter(iter, len, &wreq->iter, 0);
if (n < 0) {
ret = n;
goto out;
@@ -71,7 +76,6 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
wreq->direct_bv = (struct bio_vec *)wreq->iter.bvec;
wreq->direct_bv_count = n;
wreq->direct_bv_unpin = iov_iter_extract_will_pin(iter);
- wreq->len = iov_iter_count(&wreq->iter);
} else {
wreq->iter = *iter;
}
@@ -79,6 +83,8 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
wreq->io_iter = wreq->iter;
}
+ __set_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags);
+
/* Copy the data into the bounce buffer and encrypt it. */
// TODO
@@ -87,10 +93,7 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
if (async)
wreq->iocb = iocb;
wreq->cleanup = netfs_cleanup_dio_write;
- ret = netfs_begin_write(wreq, is_sync_kiocb(iocb),
- iocb->ki_flags & IOCB_DIRECT ?
- netfs_write_trace_dio_write :
- netfs_write_trace_unbuffered_write);
+ ret = netfs_unbuffered_write(wreq, is_sync_kiocb(iocb), iov_iter_count(&wreq->io_iter));
if (ret < 0) {
_debug("begin = %zd", ret);
goto out;
@@ -100,9 +103,8 @@ static ssize_t netfs_unbuffered_write_iter_locked(struct kiocb *iocb, struct iov
trace_netfs_rreq(wreq, netfs_rreq_trace_wait_ip);
wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
TASK_UNINTERRUPTIBLE);
-
+ smp_rmb(); /* Read error/transferred after RIP flag */
ret = wreq->error;
- _debug("waited = %zd", ret);
if (ret == 0) {
ret = wreq->transferred;
iocb->ki_pos += ret;
@@ -132,18 +134,20 @@ out:
ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
{
struct file *file = iocb->ki_filp;
- struct inode *inode = file->f_mapping->host;
+ struct address_space *mapping = file->f_mapping;
+ struct inode *inode = mapping->host;
struct netfs_inode *ictx = netfs_inode(inode);
- unsigned long long end;
ssize_t ret;
+ loff_t pos = iocb->ki_pos;
+ unsigned long long end = pos + iov_iter_count(from) - 1;
- _enter("%llx,%zx,%llx", iocb->ki_pos, iov_iter_count(from), i_size_read(inode));
+ _enter("%llx,%zx,%llx", pos, iov_iter_count(from), i_size_read(inode));
if (!iov_iter_count(from))
return 0;
trace_netfs_write_iter(iocb, from);
- netfs_stat(&netfs_n_rh_dio_write);
+ netfs_stat(&netfs_n_wh_dio_write);
ret = netfs_start_io_direct(inode);
if (ret < 0)
@@ -157,7 +161,25 @@ ssize_t netfs_unbuffered_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = file_update_time(file);
if (ret < 0)
goto out;
- ret = kiocb_invalidate_pages(iocb, iov_iter_count(from));
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ /* We could block if there are any pages in the range. */
+ ret = -EAGAIN;
+ if (filemap_range_has_page(mapping, pos, end))
+ if (filemap_invalidate_inode(inode, true, pos, end))
+ goto out;
+ } else {
+ ret = filemap_write_and_wait_range(mapping, pos, end);
+ if (ret < 0)
+ goto out;
+ }
+
+ /*
+ * After a write we want buffered reads to be sure to go to disk to get
+ * the new data. We invalidate clean cached page from the region we're
+ * about to write. We do this *before* the write so that we can return
+ * without clobbering -EIOCBQUEUED from ->direct_IO().
+ */
+ ret = filemap_invalidate_inode(inode, true, pos, end);
if (ret < 0)
goto out;
end = iocb->ki_pos + iov_iter_count(from);
diff --git a/fs/netfs/fscache_io.c b/fs/netfs/fscache_io.c
index 43a651ed8264..38637e5c9b57 100644
--- a/fs/netfs/fscache_io.c
+++ b/fs/netfs/fscache_io.c
@@ -166,6 +166,7 @@ struct fscache_write_request {
loff_t start;
size_t len;
bool set_bits;
+ bool using_pgpriv2;
netfs_io_terminated_t term_func;
void *term_func_priv;
};
@@ -182,7 +183,7 @@ void __fscache_clear_page_bits(struct address_space *mapping,
rcu_read_lock();
xas_for_each(&xas, page, last) {
- end_page_fscache(page);
+ folio_end_private_2(page_folio(page));
}
rcu_read_unlock();
}
@@ -197,8 +198,9 @@ static void fscache_wreq_done(void *priv, ssize_t transferred_or_error,
{
struct fscache_write_request *wreq = priv;
- fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
- wreq->set_bits);
+ if (wreq->using_pgpriv2)
+ fscache_clear_page_bits(wreq->mapping, wreq->start, wreq->len,
+ wreq->set_bits);
if (wreq->term_func)
wreq->term_func(wreq->term_func_priv, transferred_or_error,
@@ -212,7 +214,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
loff_t start, size_t len, loff_t i_size,
netfs_io_terminated_t term_func,
void *term_func_priv,
- bool cond)
+ bool using_pgpriv2, bool cond)
{
struct fscache_write_request *wreq;
struct netfs_cache_resources *cres;
@@ -230,6 +232,7 @@ void __fscache_write_to_cache(struct fscache_cookie *cookie,
wreq->mapping = mapping;
wreq->start = start;
wreq->len = len;
+ wreq->using_pgpriv2 = using_pgpriv2;
wreq->set_bits = cond;
wreq->term_func = term_func;
wreq->term_func_priv = term_func_priv;
@@ -257,7 +260,8 @@ abandon_end:
abandon_free:
kfree(wreq);
abandon:
- fscache_clear_page_bits(mapping, start, len, cond);
+ if (using_pgpriv2)
+ fscache_clear_page_bits(mapping, start, len, cond);
if (term_func)
term_func(term_func_priv, ret, false);
}
diff --git a/fs/netfs/internal.h b/fs/netfs/internal.h
index ec7045d24400..95e281a8af78 100644
--- a/fs/netfs/internal.h
+++ b/fs/netfs/internal.h
@@ -37,6 +37,8 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync);
extern unsigned int netfs_debug;
extern struct list_head netfs_io_requests;
extern spinlock_t netfs_proc_lock;
+extern mempool_t netfs_request_pool;
+extern mempool_t netfs_subrequest_pool;
#ifdef CONFIG_PROC_FS
static inline void netfs_proc_add_rreq(struct netfs_io_request *rreq)
@@ -91,22 +93,12 @@ static inline void netfs_see_request(struct netfs_io_request *rreq,
}
/*
- * output.c
- */
-int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
- enum netfs_write_trace what);
-struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
-int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end);
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb);
-
-/*
* stats.c
*/
#ifdef CONFIG_NETFS_STATS
extern atomic_t netfs_n_rh_dio_read;
-extern atomic_t netfs_n_rh_dio_write;
extern atomic_t netfs_n_rh_readahead;
-extern atomic_t netfs_n_rh_readpage;
+extern atomic_t netfs_n_rh_read_folio;
extern atomic_t netfs_n_rh_rreq;
extern atomic_t netfs_n_rh_sreq;
extern atomic_t netfs_n_rh_download;
@@ -123,6 +115,10 @@ extern atomic_t netfs_n_rh_write_begin;
extern atomic_t netfs_n_rh_write_done;
extern atomic_t netfs_n_rh_write_failed;
extern atomic_t netfs_n_rh_write_zskip;
+extern atomic_t netfs_n_wh_buffered_write;
+extern atomic_t netfs_n_wh_writethrough;
+extern atomic_t netfs_n_wh_dio_write;
+extern atomic_t netfs_n_wh_writepages;
extern atomic_t netfs_n_wh_wstream_conflict;
extern atomic_t netfs_n_wh_upload;
extern atomic_t netfs_n_wh_upload_done;
@@ -149,6 +145,33 @@ static inline void netfs_stat_d(atomic_t *stat)
#endif
/*
+ * write_collect.c
+ */
+int netfs_folio_written_back(struct folio *folio);
+void netfs_write_collection_worker(struct work_struct *work);
+void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async);
+
+/*
+ * write_issue.c
+ */
+struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
+ struct file *file,
+ loff_t start,
+ enum netfs_io_origin origin);
+void netfs_reissue_write(struct netfs_io_stream *stream,
+ struct netfs_io_subrequest *subreq);
+int netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof);
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len);
+int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *folio, size_t copied, bool to_page_end,
+ struct folio **writethrough_cache);
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache);
+int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len);
+
+/*
* Miscellaneous functions.
*/
static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
@@ -168,7 +191,7 @@ static inline bool netfs_is_cache_enabled(struct netfs_inode *ctx)
*/
static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_group)
{
- if (netfs_group)
+ if (netfs_group && netfs_group != NETFS_FOLIO_COPY_TO_CACHE)
refcount_inc(&netfs_group->ref);
return netfs_group;
}
@@ -178,7 +201,9 @@ static inline struct netfs_group *netfs_get_group(struct netfs_group *netfs_grou
*/
static inline void netfs_put_group(struct netfs_group *netfs_group)
{
- if (netfs_group && refcount_dec_and_test(&netfs_group->ref))
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_dec_and_test(&netfs_group->ref))
netfs_group->free(netfs_group);
}
@@ -187,7 +212,9 @@ static inline void netfs_put_group(struct netfs_group *netfs_group)
*/
static inline void netfs_put_group_many(struct netfs_group *netfs_group, int nr)
{
- if (netfs_group && refcount_sub_and_test(nr, &netfs_group->ref))
+ if (netfs_group &&
+ netfs_group != NETFS_FOLIO_COPY_TO_CACHE &&
+ refcount_sub_and_test(nr, &netfs_group->ref))
netfs_group->free(netfs_group);
}
diff --git a/fs/netfs/io.c b/fs/netfs/io.c
index 4261ad6c55b6..6cfecfcd02e1 100644
--- a/fs/netfs/io.c
+++ b/fs/netfs/io.c
@@ -99,145 +99,6 @@ static void netfs_rreq_completed(struct netfs_io_request *rreq, bool was_async)
}
/*
- * Deal with the completion of writing the data to the cache. We have to clear
- * the PG_fscache bits on the folios involved and release the caller's ref.
- *
- * May be called in softirq mode and we inherit a ref from the caller.
- */
-static void netfs_rreq_unmark_after_write(struct netfs_io_request *rreq,
- bool was_async)
-{
- struct netfs_io_subrequest *subreq;
- struct folio *folio;
- pgoff_t unlocked = 0;
- bool have_unlocked = false;
-
- rcu_read_lock();
-
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
- XA_STATE(xas, &rreq->mapping->i_pages, subreq->start / PAGE_SIZE);
-
- xas_for_each(&xas, folio, (subreq->start + subreq->len - 1) / PAGE_SIZE) {
- if (xas_retry(&xas, folio))
- continue;
-
- /* We might have multiple writes from the same huge
- * folio, but we mustn't unlock a folio more than once.
- */
- if (have_unlocked && folio->index <= unlocked)
- continue;
- unlocked = folio_next_index(folio) - 1;
- trace_netfs_folio(folio, netfs_folio_trace_end_copy);
- folio_end_fscache(folio);
- have_unlocked = true;
- }
- }
-
- rcu_read_unlock();
- netfs_rreq_completed(rreq, was_async);
-}
-
-static void netfs_rreq_copy_terminated(void *priv, ssize_t transferred_or_error,
- bool was_async)
-{
- struct netfs_io_subrequest *subreq = priv;
- struct netfs_io_request *rreq = subreq->rreq;
-
- if (IS_ERR_VALUE(transferred_or_error)) {
- netfs_stat(&netfs_n_rh_write_failed);
- trace_netfs_failure(rreq, subreq, transferred_or_error,
- netfs_fail_copy_to_cache);
- } else {
- netfs_stat(&netfs_n_rh_write_done);
- }
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_write_term);
-
- /* If we decrement nr_copy_ops to 0, the ref belongs to us. */
- if (atomic_dec_and_test(&rreq->nr_copy_ops))
- netfs_rreq_unmark_after_write(rreq, was_async);
-
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
-}
-
-/*
- * Perform any outstanding writes to the cache. We inherit a ref from the
- * caller.
- */
-static void netfs_rreq_do_write_to_cache(struct netfs_io_request *rreq)
-{
- struct netfs_cache_resources *cres = &rreq->cache_resources;
- struct netfs_io_subrequest *subreq, *next, *p;
- struct iov_iter iter;
- int ret;
-
- trace_netfs_rreq(rreq, netfs_rreq_trace_copy);
-
- /* We don't want terminating writes trying to wake us up whilst we're
- * still going through the list.
- */
- atomic_inc(&rreq->nr_copy_ops);
-
- list_for_each_entry_safe(subreq, p, &rreq->subrequests, rreq_link) {
- if (!test_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags)) {
- list_del_init(&subreq->rreq_link);
- netfs_put_subrequest(subreq, false,
- netfs_sreq_trace_put_no_copy);
- }
- }
-
- list_for_each_entry(subreq, &rreq->subrequests, rreq_link) {
- /* Amalgamate adjacent writes */
- while (!list_is_last(&subreq->rreq_link, &rreq->subrequests)) {
- next = list_next_entry(subreq, rreq_link);
- if (next->start != subreq->start + subreq->len)
- break;
- subreq->len += next->len;
- list_del_init(&next->rreq_link);
- netfs_put_subrequest(next, false,
- netfs_sreq_trace_put_merged);
- }
-
- ret = cres->ops->prepare_write(cres, &subreq->start, &subreq->len,
- subreq->len, rreq->i_size, true);
- if (ret < 0) {
- trace_netfs_failure(rreq, subreq, ret, netfs_fail_prepare_write);
- trace_netfs_sreq(subreq, netfs_sreq_trace_write_skip);
- continue;
- }
-
- iov_iter_xarray(&iter, ITER_SOURCE, &rreq->mapping->i_pages,
- subreq->start, subreq->len);
-
- atomic_inc(&rreq->nr_copy_ops);
- netfs_stat(&netfs_n_rh_write);
- netfs_get_subrequest(subreq, netfs_sreq_trace_get_copy_to_cache);
- trace_netfs_sreq(subreq, netfs_sreq_trace_write);
- cres->ops->write(cres, subreq->start, &iter,
- netfs_rreq_copy_terminated, subreq);
- }
-
- /* If we decrement nr_copy_ops to 0, the usage ref belongs to us. */
- if (atomic_dec_and_test(&rreq->nr_copy_ops))
- netfs_rreq_unmark_after_write(rreq, false);
-}
-
-static void netfs_rreq_write_to_cache_work(struct work_struct *work)
-{
- struct netfs_io_request *rreq =
- container_of(work, struct netfs_io_request, work);
-
- netfs_rreq_do_write_to_cache(rreq);
-}
-
-static void netfs_rreq_write_to_cache(struct netfs_io_request *rreq)
-{
- rreq->work.func = netfs_rreq_write_to_cache_work;
- if (!queue_work(system_unbound_wq, &rreq->work))
- BUG();
-}
-
-/*
* Handle a short read.
*/
static void netfs_rreq_short_read(struct netfs_io_request *rreq,
@@ -409,9 +270,6 @@ again:
clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
wake_up_bit(&rreq->flags, NETFS_RREQ_IN_PROGRESS);
- if (test_bit(NETFS_RREQ_COPY_TO_CACHE, &rreq->flags))
- return netfs_rreq_write_to_cache(rreq);
-
netfs_rreq_completed(rreq, was_async);
}
@@ -618,7 +476,7 @@ netfs_rreq_prepare_read(struct netfs_io_request *rreq,
set:
if (subreq->len > rreq->len)
- pr_warn("R=%08x[%u] SREQ>RREQ %zx > %zx\n",
+ pr_warn("R=%08x[%u] SREQ>RREQ %zx > %llx\n",
rreq->debug_id, subreq->debug_index,
subreq->len, rreq->len);
@@ -643,8 +501,7 @@ out:
* Slice off a piece of a read request and submit an I/O request for it.
*/
static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
- struct iov_iter *io_iter,
- unsigned int *_debug_index)
+ struct iov_iter *io_iter)
{
struct netfs_io_subrequest *subreq;
enum netfs_io_source source;
@@ -653,11 +510,10 @@ static bool netfs_rreq_submit_slice(struct netfs_io_request *rreq,
if (!subreq)
return false;
- subreq->debug_index = (*_debug_index)++;
subreq->start = rreq->start + rreq->submitted;
subreq->len = io_iter->count;
- _debug("slice %llx,%zx,%zx", subreq->start, subreq->len, rreq->submitted);
+ _debug("slice %llx,%zx,%llx", subreq->start, subreq->len, rreq->submitted);
list_add_tail(&subreq->rreq_link, &rreq->subrequests);
/* Call out to the cache to find out what it can do with the remaining
@@ -707,7 +563,6 @@ subreq_failed:
int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
{
struct iov_iter io_iter;
- unsigned int debug_index = 0;
int ret;
_enter("R=%x %llx-%llx",
@@ -733,12 +588,12 @@ int netfs_begin_read(struct netfs_io_request *rreq, bool sync)
atomic_set(&rreq->nr_outstanding, 1);
io_iter = rreq->io_iter;
do {
- _debug("submit %llx + %zx >= %llx",
+ _debug("submit %llx + %llx >= %llx",
rreq->start, rreq->submitted, rreq->i_size);
if (rreq->origin == NETFS_DIO_READ &&
rreq->start + rreq->submitted >= rreq->i_size)
break;
- if (!netfs_rreq_submit_slice(rreq, &io_iter, &debug_index))
+ if (!netfs_rreq_submit_slice(rreq, &io_iter))
break;
if (test_bit(NETFS_RREQ_BLOCKED, &rreq->flags) &&
test_bit(NETFS_RREQ_NONBLOCK, &rreq->flags))
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5e77618a7940..5f0f438e5d21 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -7,6 +7,7 @@
#include <linux/module.h>
#include <linux/export.h>
+#include <linux/mempool.h>
#include <linux/proc_fs.h>
#include <linux/seq_file.h>
#include "internal.h"
@@ -23,6 +24,11 @@ unsigned netfs_debug;
module_param_named(debug, netfs_debug, uint, S_IWUSR | S_IRUGO);
MODULE_PARM_DESC(netfs_debug, "Netfs support debugging mask");
+static struct kmem_cache *netfs_request_slab;
+static struct kmem_cache *netfs_subrequest_slab;
+mempool_t netfs_request_pool;
+mempool_t netfs_subrequest_pool;
+
#ifdef CONFIG_PROC_FS
LIST_HEAD(netfs_io_requests);
DEFINE_SPINLOCK(netfs_proc_lock);
@@ -31,9 +37,9 @@ static const char *netfs_origins[nr__netfs_io_origin] = {
[NETFS_READAHEAD] = "RA",
[NETFS_READPAGE] = "RP",
[NETFS_READ_FOR_WRITE] = "RW",
+ [NETFS_COPY_TO_CACHE] = "CC",
[NETFS_WRITEBACK] = "WB",
[NETFS_WRITETHROUGH] = "WT",
- [NETFS_LAUNDER_WRITE] = "LW",
[NETFS_UNBUFFERED_WRITE] = "UW",
[NETFS_DIO_READ] = "DR",
[NETFS_DIO_WRITE] = "DW",
@@ -56,7 +62,7 @@ static int netfs_requests_seq_show(struct seq_file *m, void *v)
rreq = list_entry(v, struct netfs_io_request, proc_link);
seq_printf(m,
- "%08x %s %3d %2lx %4d %3d @%04llx %zx/%zx",
+ "%08x %s %3d %2lx %4d %3d @%04llx %llx/%llx",
rreq->debug_id,
netfs_origins[rreq->origin],
refcount_read(&rreq->ref),
@@ -98,25 +104,54 @@ static int __init netfs_init(void)
{
int ret = -ENOMEM;
+ netfs_request_slab = kmem_cache_create("netfs_request",
+ sizeof(struct netfs_io_request), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
+ NULL);
+ if (!netfs_request_slab)
+ goto error_req;
+
+ if (mempool_init_slab_pool(&netfs_request_pool, 100, netfs_request_slab) < 0)
+ goto error_reqpool;
+
+ netfs_subrequest_slab = kmem_cache_create("netfs_subrequest",
+ sizeof(struct netfs_io_subrequest), 0,
+ SLAB_HWCACHE_ALIGN | SLAB_ACCOUNT,
+ NULL);
+ if (!netfs_subrequest_slab)
+ goto error_subreq;
+
+ if (mempool_init_slab_pool(&netfs_subrequest_pool, 100, netfs_subrequest_slab) < 0)
+ goto error_subreqpool;
+
if (!proc_mkdir("fs/netfs", NULL))
- goto error;
+ goto error_proc;
if (!proc_create_seq("fs/netfs/requests", S_IFREG | 0444, NULL,
&netfs_requests_seq_ops))
- goto error_proc;
+ goto error_procfile;
#ifdef CONFIG_FSCACHE_STATS
if (!proc_create_single("fs/netfs/stats", S_IFREG | 0444, NULL,
netfs_stats_show))
- goto error_proc;
+ goto error_procfile;
#endif
ret = fscache_init();
if (ret < 0)
- goto error_proc;
+ goto error_fscache;
return 0;
-error_proc:
+error_fscache:
+error_procfile:
remove_proc_entry("fs/netfs", NULL);
-error:
+error_proc:
+ mempool_exit(&netfs_subrequest_pool);
+error_subreqpool:
+ kmem_cache_destroy(netfs_subrequest_slab);
+error_subreq:
+ mempool_exit(&netfs_request_pool);
+error_reqpool:
+ kmem_cache_destroy(netfs_request_slab);
+error_req:
return ret;
}
fs_initcall(netfs_init);
@@ -125,5 +160,9 @@ static void __exit netfs_exit(void)
{
fscache_exit();
remove_proc_entry("fs/netfs", NULL);
+ mempool_exit(&netfs_subrequest_pool);
+ kmem_cache_destroy(netfs_subrequest_slab);
+ mempool_exit(&netfs_request_pool);
+ kmem_cache_destroy(netfs_request_slab);
}
module_exit(netfs_exit);
diff --git a/fs/netfs/misc.c b/fs/netfs/misc.c
index 90051ced8e2a..bc1fc54fb724 100644
--- a/fs/netfs/misc.c
+++ b/fs/netfs/misc.c
@@ -177,13 +177,11 @@ EXPORT_SYMBOL(netfs_clear_inode_writeback);
*/
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length)
{
- struct netfs_folio *finfo = NULL;
+ struct netfs_folio *finfo;
size_t flen = folio_size(folio);
_enter("{%lx},%zx,%zx", folio->index, offset, length);
- folio_wait_fscache(folio);
-
if (!folio_test_private(folio))
return;
@@ -248,12 +246,6 @@ bool netfs_release_folio(struct folio *folio, gfp_t gfp)
if (folio_test_private(folio))
return false;
- if (folio_test_fscache(folio)) {
- if (current_is_kswapd() || !(gfp & __GFP_FS))
- return false;
- folio_wait_fscache(folio);
- }
-
fscache_note_page_release(netfs_i_cookie(ctx));
return true;
}
diff --git a/fs/netfs/objects.c b/fs/netfs/objects.c
index 610ceb5bd86c..c90d482b1650 100644
--- a/fs/netfs/objects.c
+++ b/fs/netfs/objects.c
@@ -6,6 +6,8 @@
*/
#include <linux/slab.h>
+#include <linux/mempool.h>
+#include <linux/delay.h>
#include "internal.h"
/*
@@ -20,17 +22,22 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
struct inode *inode = file ? file_inode(file) : mapping->host;
struct netfs_inode *ctx = netfs_inode(inode);
struct netfs_io_request *rreq;
+ mempool_t *mempool = ctx->ops->request_pool ?: &netfs_request_pool;
+ struct kmem_cache *cache = mempool->pool_data;
bool is_unbuffered = (origin == NETFS_UNBUFFERED_WRITE ||
origin == NETFS_DIO_READ ||
origin == NETFS_DIO_WRITE);
bool cached = !is_unbuffered && netfs_is_cache_enabled(ctx);
int ret;
- rreq = kzalloc(ctx->ops->io_request_size ?: sizeof(struct netfs_io_request),
- GFP_KERNEL);
- if (!rreq)
- return ERR_PTR(-ENOMEM);
+ for (;;) {
+ rreq = mempool_alloc(mempool, GFP_KERNEL);
+ if (rreq)
+ break;
+ msleep(10);
+ }
+ memset(rreq, 0, kmem_cache_size(cache));
rreq->start = start;
rreq->len = len;
rreq->upper_len = len;
@@ -40,19 +47,27 @@ struct netfs_io_request *netfs_alloc_request(struct address_space *mapping,
rreq->inode = inode;
rreq->i_size = i_size_read(inode);
rreq->debug_id = atomic_inc_return(&debug_ids);
+ rreq->wsize = INT_MAX;
+ spin_lock_init(&rreq->lock);
+ INIT_LIST_HEAD(&rreq->io_streams[0].subrequests);
+ INIT_LIST_HEAD(&rreq->io_streams[1].subrequests);
INIT_LIST_HEAD(&rreq->subrequests);
INIT_WORK(&rreq->work, NULL);
refcount_set(&rreq->ref, 1);
__set_bit(NETFS_RREQ_IN_PROGRESS, &rreq->flags);
- if (cached)
+ if (cached) {
__set_bit(NETFS_RREQ_WRITE_TO_CACHE, &rreq->flags);
+ if (test_bit(NETFS_ICTX_USE_PGPRIV2, &ctx->flags))
+ /* Filesystem uses deprecated PG_private_2 marking. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+ }
if (file && file->f_flags & O_NONBLOCK)
__set_bit(NETFS_RREQ_NONBLOCK, &rreq->flags);
if (rreq->netfs_ops->init_request) {
ret = rreq->netfs_ops->init_request(rreq, file);
if (ret < 0) {
- kfree(rreq);
+ mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool);
return ERR_PTR(ret);
}
}
@@ -74,6 +89,8 @@ void netfs_get_request(struct netfs_io_request *rreq, enum netfs_rreq_ref_trace
void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
{
struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream;
+ int s;
while (!list_empty(&rreq->subrequests)) {
subreq = list_first_entry(&rreq->subrequests,
@@ -82,6 +99,25 @@ void netfs_clear_subrequests(struct netfs_io_request *rreq, bool was_async)
netfs_put_subrequest(subreq, was_async,
netfs_sreq_trace_put_clear);
}
+
+ for (s = 0; s < ARRAY_SIZE(rreq->io_streams); s++) {
+ stream = &rreq->io_streams[s];
+ while (!list_empty(&stream->subrequests)) {
+ subreq = list_first_entry(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, was_async,
+ netfs_sreq_trace_put_clear);
+ }
+ }
+}
+
+static void netfs_free_request_rcu(struct rcu_head *rcu)
+{
+ struct netfs_io_request *rreq = container_of(rcu, struct netfs_io_request, rcu);
+
+ mempool_free(rreq, rreq->netfs_ops->request_pool ?: &netfs_request_pool);
+ netfs_stat_d(&netfs_n_rh_rreq);
}
static void netfs_free_request(struct work_struct *work)
@@ -106,8 +142,7 @@ static void netfs_free_request(struct work_struct *work)
}
kvfree(rreq->direct_bv);
}
- kfree_rcu(rreq, rcu);
- netfs_stat_d(&netfs_n_rh_rreq);
+ call_rcu(&rreq->rcu, netfs_free_request_rcu);
}
void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
@@ -139,19 +174,25 @@ void netfs_put_request(struct netfs_io_request *rreq, bool was_async,
struct netfs_io_subrequest *netfs_alloc_subrequest(struct netfs_io_request *rreq)
{
struct netfs_io_subrequest *subreq;
-
- subreq = kzalloc(rreq->netfs_ops->io_subrequest_size ?:
- sizeof(struct netfs_io_subrequest),
- GFP_KERNEL);
- if (subreq) {
- INIT_WORK(&subreq->work, NULL);
- INIT_LIST_HEAD(&subreq->rreq_link);
- refcount_set(&subreq->ref, 2);
- subreq->rreq = rreq;
- netfs_get_request(rreq, netfs_rreq_trace_get_subreq);
- netfs_stat(&netfs_n_rh_sreq);
+ mempool_t *mempool = rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool;
+ struct kmem_cache *cache = mempool->pool_data;
+
+ for (;;) {
+ subreq = mempool_alloc(rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool,
+ GFP_KERNEL);
+ if (subreq)
+ break;
+ msleep(10);
}
+ memset(subreq, 0, kmem_cache_size(cache));
+ INIT_WORK(&subreq->work, NULL);
+ INIT_LIST_HEAD(&subreq->rreq_link);
+ refcount_set(&subreq->ref, 2);
+ subreq->rreq = rreq;
+ subreq->debug_index = atomic_inc_return(&rreq->subreq_counter);
+ netfs_get_request(rreq, netfs_rreq_trace_get_subreq);
+ netfs_stat(&netfs_n_rh_sreq);
return subreq;
}
@@ -173,7 +214,7 @@ static void netfs_free_subrequest(struct netfs_io_subrequest *subreq,
trace_netfs_sreq(subreq, netfs_sreq_trace_free);
if (rreq->netfs_ops->free_subrequest)
rreq->netfs_ops->free_subrequest(subreq);
- kfree(subreq);
+ mempool_free(subreq, rreq->netfs_ops->subrequest_pool ?: &netfs_subrequest_pool);
netfs_stat_d(&netfs_n_rh_sreq);
netfs_put_request(rreq, was_async, netfs_rreq_trace_put_subreq);
}
diff --git a/fs/netfs/output.c b/fs/netfs/output.c
deleted file mode 100644
index 625eb68f3e5a..000000000000
--- a/fs/netfs/output.c
+++ /dev/null
@@ -1,478 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* Network filesystem high-level write support.
- *
- * Copyright (C) 2023 Red Hat, Inc. All Rights Reserved.
- * Written by David Howells ([email protected])
- */
-
-#include <linux/fs.h>
-#include <linux/mm.h>
-#include <linux/pagemap.h>
-#include <linux/slab.h>
-#include <linux/writeback.h>
-#include <linux/pagevec.h>
-#include "internal.h"
-
-/**
- * netfs_create_write_request - Create a write operation.
- * @wreq: The write request this is storing from.
- * @dest: The destination type
- * @start: Start of the region this write will modify
- * @len: Length of the modification
- * @worker: The worker function to handle the write(s)
- *
- * Allocate a write operation, set it up and add it to the list on a write
- * request.
- */
-struct netfs_io_subrequest *netfs_create_write_request(struct netfs_io_request *wreq,
- enum netfs_io_source dest,
- loff_t start, size_t len,
- work_func_t worker)
-{
- struct netfs_io_subrequest *subreq;
-
- subreq = netfs_alloc_subrequest(wreq);
- if (subreq) {
- INIT_WORK(&subreq->work, worker);
- subreq->source = dest;
- subreq->start = start;
- subreq->len = len;
- subreq->debug_index = wreq->subreq_counter++;
-
- switch (subreq->source) {
- case NETFS_UPLOAD_TO_SERVER:
- netfs_stat(&netfs_n_wh_upload);
- break;
- case NETFS_WRITE_TO_CACHE:
- netfs_stat(&netfs_n_wh_write);
- break;
- default:
- BUG();
- }
-
- subreq->io_iter = wreq->io_iter;
- iov_iter_advance(&subreq->io_iter, subreq->start - wreq->start);
- iov_iter_truncate(&subreq->io_iter, subreq->len);
-
- trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
- refcount_read(&subreq->ref),
- netfs_sreq_trace_new);
- atomic_inc(&wreq->nr_outstanding);
- list_add_tail(&subreq->rreq_link, &wreq->subrequests);
- trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
- }
-
- return subreq;
-}
-EXPORT_SYMBOL(netfs_create_write_request);
-
-/*
- * Process a completed write request once all the component operations have
- * been completed.
- */
-static void netfs_write_terminated(struct netfs_io_request *wreq, bool was_async)
-{
- struct netfs_io_subrequest *subreq;
- struct netfs_inode *ctx = netfs_inode(wreq->inode);
- size_t transferred = 0;
-
- _enter("R=%x[]", wreq->debug_id);
-
- trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
-
- list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
- if (subreq->error || subreq->transferred == 0)
- break;
- transferred += subreq->transferred;
- if (subreq->transferred < subreq->len)
- break;
- }
- wreq->transferred = transferred;
-
- list_for_each_entry(subreq, &wreq->subrequests, rreq_link) {
- if (!subreq->error)
- continue;
- switch (subreq->source) {
- case NETFS_UPLOAD_TO_SERVER:
- /* Depending on the type of failure, this may prevent
- * writeback completion unless we're in disconnected
- * mode.
- */
- if (!wreq->error)
- wreq->error = subreq->error;
- break;
-
- case NETFS_WRITE_TO_CACHE:
- /* Failure doesn't prevent writeback completion unless
- * we're in disconnected mode.
- */
- if (subreq->error != -ENOBUFS)
- ctx->ops->invalidate_cache(wreq);
- break;
-
- default:
- WARN_ON_ONCE(1);
- if (!wreq->error)
- wreq->error = -EIO;
- return;
- }
- }
-
- wreq->cleanup(wreq);
-
- if (wreq->origin == NETFS_DIO_WRITE &&
- wreq->mapping->nrpages) {
- pgoff_t first = wreq->start >> PAGE_SHIFT;
- pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
- invalidate_inode_pages2_range(wreq->mapping, first, last);
- }
-
- if (wreq->origin == NETFS_DIO_WRITE)
- inode_dio_end(wreq->inode);
-
- _debug("finished");
- trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
- clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
- wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
-
- if (wreq->iocb) {
- wreq->iocb->ki_pos += transferred;
- if (wreq->iocb->ki_complete)
- wreq->iocb->ki_complete(
- wreq->iocb, wreq->error ? wreq->error : transferred);
- }
-
- netfs_clear_subrequests(wreq, was_async);
- netfs_put_request(wreq, was_async, netfs_rreq_trace_put_complete);
-}
-
-/*
- * Deal with the completion of writing the data to the cache.
- */
-void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
- bool was_async)
-{
- struct netfs_io_subrequest *subreq = _op;
- struct netfs_io_request *wreq = subreq->rreq;
- unsigned int u;
-
- _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
-
- switch (subreq->source) {
- case NETFS_UPLOAD_TO_SERVER:
- netfs_stat(&netfs_n_wh_upload_done);
- break;
- case NETFS_WRITE_TO_CACHE:
- netfs_stat(&netfs_n_wh_write_done);
- break;
- case NETFS_INVALID_WRITE:
- break;
- default:
- BUG();
- }
-
- if (IS_ERR_VALUE(transferred_or_error)) {
- subreq->error = transferred_or_error;
- trace_netfs_failure(wreq, subreq, transferred_or_error,
- netfs_fail_write);
- goto failed;
- }
-
- if (WARN(transferred_or_error > subreq->len - subreq->transferred,
- "Subreq excess write: R%x[%x] %zd > %zu - %zu",
- wreq->debug_id, subreq->debug_index,
- transferred_or_error, subreq->len, subreq->transferred))
- transferred_or_error = subreq->len - subreq->transferred;
-
- subreq->error = 0;
- subreq->transferred += transferred_or_error;
-
- if (iov_iter_count(&subreq->io_iter) != subreq->len - subreq->transferred)
- pr_warn("R=%08x[%u] ITER POST-MISMATCH %zx != %zx-%zx %x\n",
- wreq->debug_id, subreq->debug_index,
- iov_iter_count(&subreq->io_iter), subreq->len,
- subreq->transferred, subreq->io_iter.iter_type);
-
- if (subreq->transferred < subreq->len)
- goto incomplete;
-
- __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
-out:
- trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
-
- /* If we decrement nr_outstanding to 0, the ref belongs to us. */
- u = atomic_dec_return(&wreq->nr_outstanding);
- if (u == 0)
- netfs_write_terminated(wreq, was_async);
- else if (u == 1)
- wake_up_var(&wreq->nr_outstanding);
-
- netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
- return;
-
-incomplete:
- if (transferred_or_error == 0) {
- if (__test_and_set_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags)) {
- subreq->error = -ENODATA;
- goto failed;
- }
- } else {
- __clear_bit(NETFS_SREQ_NO_PROGRESS, &subreq->flags);
- }
-
- __set_bit(NETFS_SREQ_SHORT_IO, &subreq->flags);
- set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
- goto out;
-
-failed:
- switch (subreq->source) {
- case NETFS_WRITE_TO_CACHE:
- netfs_stat(&netfs_n_wh_write_failed);
- set_bit(NETFS_RREQ_INCOMPLETE_IO, &wreq->flags);
- break;
- case NETFS_UPLOAD_TO_SERVER:
- netfs_stat(&netfs_n_wh_upload_failed);
- set_bit(NETFS_RREQ_FAILED, &wreq->flags);
- wreq->error = subreq->error;
- break;
- default:
- break;
- }
- goto out;
-}
-EXPORT_SYMBOL(netfs_write_subrequest_terminated);
-
-static void netfs_write_to_cache_op(struct netfs_io_subrequest *subreq)
-{
- struct netfs_io_request *wreq = subreq->rreq;
- struct netfs_cache_resources *cres = &wreq->cache_resources;
-
- trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
-
- cres->ops->write(cres, subreq->start, &subreq->io_iter,
- netfs_write_subrequest_terminated, subreq);
-}
-
-static void netfs_write_to_cache_op_worker(struct work_struct *work)
-{
- struct netfs_io_subrequest *subreq =
- container_of(work, struct netfs_io_subrequest, work);
-
- netfs_write_to_cache_op(subreq);
-}
-
-/**
- * netfs_queue_write_request - Queue a write request for attention
- * @subreq: The write request to be queued
- *
- * Queue the specified write request for processing by a worker thread. We
- * pass the caller's ref on the request to the worker thread.
- */
-void netfs_queue_write_request(struct netfs_io_subrequest *subreq)
-{
- if (!queue_work(system_unbound_wq, &subreq->work))
- netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_wip);
-}
-EXPORT_SYMBOL(netfs_queue_write_request);
-
-/*
- * Set up a op for writing to the cache.
- */
-static void netfs_set_up_write_to_cache(struct netfs_io_request *wreq)
-{
- struct netfs_cache_resources *cres = &wreq->cache_resources;
- struct netfs_io_subrequest *subreq;
- struct netfs_inode *ctx = netfs_inode(wreq->inode);
- struct fscache_cookie *cookie = netfs_i_cookie(ctx);
- loff_t start = wreq->start;
- size_t len = wreq->len;
- int ret;
-
- if (!fscache_cookie_enabled(cookie)) {
- clear_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags);
- return;
- }
-
- _debug("write to cache");
- ret = fscache_begin_write_operation(cres, cookie);
- if (ret < 0)
- return;
-
- ret = cres->ops->prepare_write(cres, &start, &len, wreq->upper_len,
- i_size_read(wreq->inode), true);
- if (ret < 0)
- return;
-
- subreq = netfs_create_write_request(wreq, NETFS_WRITE_TO_CACHE, start, len,
- netfs_write_to_cache_op_worker);
- if (!subreq)
- return;
-
- netfs_write_to_cache_op(subreq);
-}
-
-/*
- * Begin the process of writing out a chunk of data.
- *
- * We are given a write request that holds a series of dirty regions and
- * (partially) covers a sequence of folios, all of which are present. The
- * pages must have been marked as writeback as appropriate.
- *
- * We need to perform the following steps:
- *
- * (1) If encrypting, create an output buffer and encrypt each block of the
- * data into it, otherwise the output buffer will point to the original
- * folios.
- *
- * (2) If the data is to be cached, set up a write op for the entire output
- * buffer to the cache, if the cache wants to accept it.
- *
- * (3) If the data is to be uploaded (ie. not merely cached):
- *
- * (a) If the data is to be compressed, create a compression buffer and
- * compress the data into it.
- *
- * (b) For each destination we want to upload to, set up write ops to write
- * to that destination. We may need multiple writes if the data is not
- * contiguous or the span exceeds wsize for a server.
- */
-int netfs_begin_write(struct netfs_io_request *wreq, bool may_wait,
- enum netfs_write_trace what)
-{
- struct netfs_inode *ctx = netfs_inode(wreq->inode);
-
- _enter("R=%x %llx-%llx f=%lx",
- wreq->debug_id, wreq->start, wreq->start + wreq->len - 1,
- wreq->flags);
-
- trace_netfs_write(wreq, what);
- if (wreq->len == 0 || wreq->iter.count == 0) {
- pr_err("Zero-sized write [R=%x]\n", wreq->debug_id);
- return -EIO;
- }
-
- if (wreq->origin == NETFS_DIO_WRITE)
- inode_dio_begin(wreq->inode);
-
- wreq->io_iter = wreq->iter;
-
- /* ->outstanding > 0 carries a ref */
- netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
- atomic_set(&wreq->nr_outstanding, 1);
-
- /* Start the encryption/compression going. We can do that in the
- * background whilst we generate a list of write ops that we want to
- * perform.
- */
- // TODO: Encrypt or compress the region as appropriate
-
- /* We need to write all of the region to the cache */
- if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
- netfs_set_up_write_to_cache(wreq);
-
- /* However, we don't necessarily write all of the region to the server.
- * Caching of reads is being managed this way also.
- */
- if (test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
- ctx->ops->create_write_requests(wreq, wreq->start, wreq->len);
-
- if (atomic_dec_and_test(&wreq->nr_outstanding))
- netfs_write_terminated(wreq, false);
-
- if (!may_wait)
- return -EIOCBQUEUED;
-
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- return wreq->error;
-}
-
-/*
- * Begin a write operation for writing through the pagecache.
- */
-struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
-{
- struct netfs_io_request *wreq;
- struct file *file = iocb->ki_filp;
-
- wreq = netfs_alloc_request(file->f_mapping, file, iocb->ki_pos, len,
- NETFS_WRITETHROUGH);
- if (IS_ERR(wreq))
- return wreq;
-
- trace_netfs_write(wreq, netfs_write_trace_writethrough);
-
- __set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
- iov_iter_xarray(&wreq->iter, ITER_SOURCE, &wreq->mapping->i_pages, wreq->start, 0);
- wreq->io_iter = wreq->iter;
-
- /* ->outstanding > 0 carries a ref */
- netfs_get_request(wreq, netfs_rreq_trace_get_for_outstanding);
- atomic_set(&wreq->nr_outstanding, 1);
- return wreq;
-}
-
-static void netfs_submit_writethrough(struct netfs_io_request *wreq, bool final)
-{
- struct netfs_inode *ictx = netfs_inode(wreq->inode);
- unsigned long long start;
- size_t len;
-
- if (!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))
- return;
-
- start = wreq->start + wreq->submitted;
- len = wreq->iter.count - wreq->submitted;
- if (!final) {
- len /= wreq->wsize; /* Round to number of maximum packets */
- len *= wreq->wsize;
- }
-
- ictx->ops->create_write_requests(wreq, start, len);
- wreq->submitted += len;
-}
-
-/*
- * Advance the state of the write operation used when writing through the
- * pagecache. Data has been copied into the pagecache that we need to append
- * to the request. If we've added more than wsize then we need to create a new
- * subrequest.
- */
-int netfs_advance_writethrough(struct netfs_io_request *wreq, size_t copied, bool to_page_end)
-{
- _enter("ic=%zu sb=%zu ws=%u cp=%zu tp=%u",
- wreq->iter.count, wreq->submitted, wreq->wsize, copied, to_page_end);
-
- wreq->iter.count += copied;
- wreq->io_iter.count += copied;
- if (to_page_end && wreq->io_iter.count - wreq->submitted >= wreq->wsize)
- netfs_submit_writethrough(wreq, false);
-
- return wreq->error;
-}
-
-/*
- * End a write operation used when writing through the pagecache.
- */
-int netfs_end_writethrough(struct netfs_io_request *wreq, struct kiocb *iocb)
-{
- int ret = -EIOCBQUEUED;
-
- _enter("ic=%zu sb=%zu ws=%u",
- wreq->iter.count, wreq->submitted, wreq->wsize);
-
- if (wreq->submitted < wreq->io_iter.count)
- netfs_submit_writethrough(wreq, true);
-
- if (atomic_dec_and_test(&wreq->nr_outstanding))
- netfs_write_terminated(wreq, false);
-
- if (is_sync_kiocb(iocb)) {
- wait_on_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS,
- TASK_UNINTERRUPTIBLE);
- ret = wreq->error;
- }
-
- netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
- return ret;
-}
diff --git a/fs/netfs/stats.c b/fs/netfs/stats.c
index deeba9f9dcf5..0892768eea32 100644
--- a/fs/netfs/stats.c
+++ b/fs/netfs/stats.c
@@ -10,9 +10,8 @@
#include "internal.h"
atomic_t netfs_n_rh_dio_read;
-atomic_t netfs_n_rh_dio_write;
atomic_t netfs_n_rh_readahead;
-atomic_t netfs_n_rh_readpage;
+atomic_t netfs_n_rh_read_folio;
atomic_t netfs_n_rh_rreq;
atomic_t netfs_n_rh_sreq;
atomic_t netfs_n_rh_download;
@@ -29,6 +28,10 @@ atomic_t netfs_n_rh_write_begin;
atomic_t netfs_n_rh_write_done;
atomic_t netfs_n_rh_write_failed;
atomic_t netfs_n_rh_write_zskip;
+atomic_t netfs_n_wh_buffered_write;
+atomic_t netfs_n_wh_writethrough;
+atomic_t netfs_n_wh_dio_write;
+atomic_t netfs_n_wh_writepages;
atomic_t netfs_n_wh_wstream_conflict;
atomic_t netfs_n_wh_upload;
atomic_t netfs_n_wh_upload_done;
@@ -39,13 +42,17 @@ atomic_t netfs_n_wh_write_failed;
int netfs_stats_show(struct seq_file *m, void *v)
{
- seq_printf(m, "Netfs : DR=%u DW=%u RA=%u RP=%u WB=%u WBZ=%u\n",
+ seq_printf(m, "Netfs : DR=%u RA=%u RF=%u WB=%u WBZ=%u\n",
atomic_read(&netfs_n_rh_dio_read),
- atomic_read(&netfs_n_rh_dio_write),
atomic_read(&netfs_n_rh_readahead),
- atomic_read(&netfs_n_rh_readpage),
+ atomic_read(&netfs_n_rh_read_folio),
atomic_read(&netfs_n_rh_write_begin),
atomic_read(&netfs_n_rh_write_zskip));
+ seq_printf(m, "Netfs : BW=%u WT=%u DW=%u WP=%u\n",
+ atomic_read(&netfs_n_wh_buffered_write),
+ atomic_read(&netfs_n_wh_writethrough),
+ atomic_read(&netfs_n_wh_dio_write),
+ atomic_read(&netfs_n_wh_writepages));
seq_printf(m, "Netfs : ZR=%u sh=%u sk=%u\n",
atomic_read(&netfs_n_rh_zero),
atomic_read(&netfs_n_rh_short_read),
diff --git a/fs/netfs/write_collect.c b/fs/netfs/write_collect.c
new file mode 100644
index 000000000000..60112e4b2c5e
--- /dev/null
+++ b/fs/netfs/write_collect.c
@@ -0,0 +1,808 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem write subrequest result collection, assessment
+ * and retrying.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/slab.h>
+#include "internal.h"
+
+/* Notes made in the collector */
+#define HIT_PENDING 0x01 /* A front op was still pending */
+#define SOME_EMPTY 0x02 /* One of more streams are empty */
+#define ALL_EMPTY 0x04 /* All streams are empty */
+#define MAYBE_DISCONTIG 0x08 /* A front op may be discontiguous (rounded to PAGE_SIZE) */
+#define NEED_REASSESS 0x10 /* Need to loop round and reassess */
+#define REASSESS_DISCONTIG 0x20 /* Reassess discontiguity if contiguity advances */
+#define MADE_PROGRESS 0x40 /* Made progress cleaning up a stream or the folio set */
+#define BUFFERED 0x80 /* The pagecache needs cleaning up */
+#define NEED_RETRY 0x100 /* A front op requests retrying */
+#define SAW_FAILURE 0x200 /* One stream or hit a permanent failure */
+
+/*
+ * Successful completion of write of a folio to the server and/or cache. Note
+ * that we are not allowed to lock the folio here on pain of deadlocking with
+ * truncate.
+ */
+int netfs_folio_written_back(struct folio *folio)
+{
+ enum netfs_folio_trace why = netfs_folio_trace_clear;
+ struct netfs_folio *finfo;
+ struct netfs_group *group = NULL;
+ int gcount = 0;
+
+ if ((finfo = netfs_folio_info(folio))) {
+ /* Streaming writes cannot be redirtied whilst under writeback,
+ * so discard the streaming record.
+ */
+ folio_detach_private(folio);
+ group = finfo->netfs_group;
+ gcount++;
+ kfree(finfo);
+ why = netfs_folio_trace_clear_s;
+ goto end_wb;
+ }
+
+ if ((group = netfs_folio_group(folio))) {
+ if (group == NETFS_FOLIO_COPY_TO_CACHE) {
+ why = netfs_folio_trace_clear_cc;
+ folio_detach_private(folio);
+ goto end_wb;
+ }
+
+ /* Need to detach the group pointer if the page didn't get
+ * redirtied. If it has been redirtied, then it must be within
+ * the same group.
+ */
+ why = netfs_folio_trace_redirtied;
+ if (!folio_test_dirty(folio)) {
+ folio_detach_private(folio);
+ gcount++;
+ why = netfs_folio_trace_clear_g;
+ }
+ }
+
+end_wb:
+ trace_netfs_folio(folio, why);
+ folio_end_writeback(folio);
+ return gcount;
+}
+
+/*
+ * Get hold of a folio we have under writeback. We don't want to get the
+ * refcount on it.
+ */
+static struct folio *netfs_writeback_lookup_folio(struct netfs_io_request *wreq, loff_t pos)
+{
+ XA_STATE(xas, &wreq->mapping->i_pages, pos / PAGE_SIZE);
+ struct folio *folio;
+
+ rcu_read_lock();
+
+ for (;;) {
+ xas_reset(&xas);
+ folio = xas_load(&xas);
+ if (xas_retry(&xas, folio))
+ continue;
+
+ if (!folio || xa_is_value(folio))
+ kdebug("R=%08x: folio %lx (%llx) not present",
+ wreq->debug_id, xas.xa_index, pos / PAGE_SIZE);
+ BUG_ON(!folio || xa_is_value(folio));
+
+ if (folio == xas_reload(&xas))
+ break;
+ }
+
+ rcu_read_unlock();
+
+ if (WARN_ONCE(!folio_test_writeback(folio),
+ "R=%08x: folio %lx is not under writeback\n",
+ wreq->debug_id, folio->index)) {
+ trace_netfs_folio(folio, netfs_folio_trace_not_under_wback);
+ }
+ return folio;
+}
+
+/*
+ * Unlock any folios we've finished with.
+ */
+static void netfs_writeback_unlock_folios(struct netfs_io_request *wreq,
+ unsigned long long collected_to,
+ unsigned int *notes)
+{
+ for (;;) {
+ struct folio *folio;
+ struct netfs_folio *finfo;
+ unsigned long long fpos, fend;
+ size_t fsize, flen;
+
+ folio = netfs_writeback_lookup_folio(wreq, wreq->cleaned_to);
+
+ fpos = folio_pos(folio);
+ fsize = folio_size(folio);
+ finfo = netfs_folio_info(folio);
+ flen = finfo ? finfo->dirty_offset + finfo->dirty_len : fsize;
+
+ fend = min_t(unsigned long long, fpos + flen, wreq->i_size);
+
+ trace_netfs_collect_folio(wreq, folio, fend, collected_to);
+
+ if (fpos + fsize > wreq->contiguity) {
+ trace_netfs_collect_contig(wreq, fpos + fsize,
+ netfs_contig_trace_unlock);
+ wreq->contiguity = fpos + fsize;
+ }
+
+ /* Unlock any folio we've transferred all of. */
+ if (collected_to < fend)
+ break;
+
+ wreq->nr_group_rel += netfs_folio_written_back(folio);
+ wreq->cleaned_to = fpos + fsize;
+ *notes |= MADE_PROGRESS;
+
+ if (fpos + fsize >= collected_to)
+ break;
+ }
+}
+
+/*
+ * Perform retries on the streams that need it.
+ */
+static void netfs_retry_write_stream(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream)
+{
+ struct list_head *next;
+
+ _enter("R=%x[%x:]", wreq->debug_id, stream->stream_nr);
+
+ if (list_empty(&stream->subrequests))
+ return;
+
+ if (stream->source == NETFS_UPLOAD_TO_SERVER &&
+ wreq->netfs_ops->retry_request)
+ wreq->netfs_ops->retry_request(wreq, stream);
+
+ if (unlikely(stream->failed))
+ return;
+
+ /* If there's no renegotiation to do, just resend each failed subreq. */
+ if (!stream->prepare_write) {
+ struct netfs_io_subrequest *subreq;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
+ break;
+ if (__test_and_clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags)) {
+ __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq);
+ }
+ }
+ return;
+ }
+
+ next = stream->subrequests.next;
+
+ do {
+ struct netfs_io_subrequest *subreq = NULL, *from, *to, *tmp;
+ unsigned long long start, len;
+ size_t part;
+ bool boundary = false;
+
+ /* Go through the stream and find the next span of contiguous
+ * data that we then rejig (cifs, for example, needs the wsize
+ * renegotiating) and reissue.
+ */
+ from = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ to = from;
+ start = from->start + from->transferred;
+ len = from->len - from->transferred;
+
+ if (test_bit(NETFS_SREQ_FAILED, &from->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &from->flags))
+ return;
+
+ list_for_each_continue(next, &stream->subrequests) {
+ subreq = list_entry(next, struct netfs_io_subrequest, rreq_link);
+ if (subreq->start + subreq->transferred != start + len ||
+ test_bit(NETFS_SREQ_BOUNDARY, &subreq->flags) ||
+ !test_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags))
+ break;
+ to = subreq;
+ len += to->len;
+ }
+
+ /* Work through the sublist. */
+ subreq = from;
+ list_for_each_entry_from(subreq, &stream->subrequests, rreq_link) {
+ if (!len)
+ break;
+ /* Renegotiate max_len (wsize) */
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+ __clear_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+ stream->prepare_write(subreq);
+
+ part = min(len, subreq->max_len);
+ subreq->len = part;
+ subreq->start = start;
+ subreq->transferred = 0;
+ len -= part;
+ start += part;
+ if (len && subreq == to &&
+ __test_and_clear_bit(NETFS_SREQ_BOUNDARY, &to->flags))
+ boundary = true;
+
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+ netfs_reissue_write(stream, subreq);
+ if (subreq == to)
+ break;
+ }
+
+ /* If we managed to use fewer subreqs, we can discard the
+ * excess; if we used the same number, then we're done.
+ */
+ if (!len) {
+ if (subreq == to)
+ continue;
+ list_for_each_entry_safe_from(subreq, tmp,
+ &stream->subrequests, rreq_link) {
+ trace_netfs_sreq(subreq, netfs_sreq_trace_discard);
+ list_del(&subreq->rreq_link);
+ netfs_put_subrequest(subreq, false, netfs_sreq_trace_put_done);
+ if (subreq == to)
+ break;
+ }
+ continue;
+ }
+
+ /* We ran out of subrequests, so we need to allocate some more
+ * and insert them after.
+ */
+ do {
+ subreq = netfs_alloc_subrequest(wreq);
+ subreq->source = to->source;
+ subreq->start = start;
+ subreq->max_len = len;
+ subreq->max_nr_segs = INT_MAX;
+ subreq->debug_index = atomic_inc_return(&wreq->subreq_counter);
+ subreq->stream_nr = to->stream_nr;
+ __set_bit(NETFS_SREQ_RETRYING, &subreq->flags);
+
+ trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+ netfs_get_subrequest(subreq, netfs_sreq_trace_get_resubmit);
+
+ list_add(&subreq->rreq_link, &to->rreq_link);
+ to = list_next_entry(to, rreq_link);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_retry);
+
+ switch (stream->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload);
+ subreq->max_len = min(len, wreq->wsize);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ }
+
+ stream->prepare_write(subreq);
+
+ part = min(len, subreq->max_len);
+ subreq->len = subreq->transferred + part;
+ len -= part;
+ start += part;
+ if (!len && boundary) {
+ __set_bit(NETFS_SREQ_BOUNDARY, &to->flags);
+ boundary = false;
+ }
+
+ netfs_reissue_write(stream, subreq);
+ if (!len)
+ break;
+
+ } while (len);
+
+ } while (!list_is_head(next, &stream->subrequests));
+}
+
+/*
+ * Perform retries on the streams that need it. If we're doing content
+ * encryption and the server copy changed due to a third-party write, we may
+ * need to do an RMW cycle and also rewrite the data to the cache.
+ */
+static void netfs_retry_writes(struct netfs_io_request *wreq)
+{
+ struct netfs_io_subrequest *subreq;
+ struct netfs_io_stream *stream;
+ int s;
+
+ /* Wait for all outstanding I/O to quiesce before performing retries as
+ * we may need to renegotiate the I/O sizes.
+ */
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (!stream->active)
+ continue;
+
+ list_for_each_entry(subreq, &stream->subrequests, rreq_link) {
+ wait_on_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS,
+ TASK_UNINTERRUPTIBLE);
+ }
+ }
+
+ // TODO: Enc: Fetch changed partial pages
+ // TODO: Enc: Reencrypt content if needed.
+ // TODO: Enc: Wind back transferred point.
+ // TODO: Enc: Mark cache pages for retry.
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->need_retry) {
+ stream->need_retry = false;
+ netfs_retry_write_stream(wreq, stream);
+ }
+ }
+}
+
+/*
+ * Collect and assess the results of various write subrequests. We may need to
+ * retry some of the results - or even do an RMW cycle for content crypto.
+ *
+ * Note that we have a number of parallel, overlapping lists of subrequests,
+ * one to the server and one to the local cache for example, which may not be
+ * the same size or starting position and may not even correspond in boundary
+ * alignment.
+ */
+static void netfs_collect_write_results(struct netfs_io_request *wreq)
+{
+ struct netfs_io_subrequest *front, *remove;
+ struct netfs_io_stream *stream;
+ unsigned long long collected_to;
+ unsigned int notes;
+ int s;
+
+ _enter("%llx-%llx", wreq->start, wreq->start + wreq->len);
+ trace_netfs_collect(wreq);
+ trace_netfs_rreq(wreq, netfs_rreq_trace_collect);
+
+reassess_streams:
+ smp_rmb();
+ collected_to = ULLONG_MAX;
+ if (wreq->origin == NETFS_WRITEBACK)
+ notes = ALL_EMPTY | BUFFERED | MAYBE_DISCONTIG;
+ else if (wreq->origin == NETFS_WRITETHROUGH)
+ notes = ALL_EMPTY | BUFFERED;
+ else
+ notes = ALL_EMPTY;
+
+ /* Remove completed subrequests from the front of the streams and
+ * advance the completion point on each stream. We stop when we hit
+ * something that's in progress. The issuer thread may be adding stuff
+ * to the tail whilst we're doing this.
+ *
+ * We must not, however, merge in discontiguities that span whole
+ * folios that aren't under writeback. This is made more complicated
+ * by the folios in the gap being of unpredictable sizes - if they even
+ * exist - but we don't want to look them up.
+ */
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ loff_t rstart, rend;
+
+ stream = &wreq->io_streams[s];
+ /* Read active flag before list pointers */
+ if (!smp_load_acquire(&stream->active))
+ continue;
+
+ front = stream->front;
+ while (front) {
+ trace_netfs_collect_sreq(wreq, front);
+ //_debug("sreq [%x] %llx %zx/%zx",
+ // front->debug_index, front->start, front->transferred, front->len);
+
+ /* Stall if there may be a discontinuity. */
+ rstart = round_down(front->start, PAGE_SIZE);
+ if (rstart > wreq->contiguity) {
+ if (wreq->contiguity > stream->collected_to) {
+ trace_netfs_collect_gap(wreq, stream,
+ wreq->contiguity, 'D');
+ stream->collected_to = wreq->contiguity;
+ }
+ notes |= REASSESS_DISCONTIG;
+ break;
+ }
+ rend = round_up(front->start + front->len, PAGE_SIZE);
+ if (rend > wreq->contiguity) {
+ trace_netfs_collect_contig(wreq, rend,
+ netfs_contig_trace_collect);
+ wreq->contiguity = rend;
+ if (notes & REASSESS_DISCONTIG)
+ notes |= NEED_REASSESS;
+ }
+ notes &= ~MAYBE_DISCONTIG;
+
+ /* Stall if the front is still undergoing I/O. */
+ if (test_bit(NETFS_SREQ_IN_PROGRESS, &front->flags)) {
+ notes |= HIT_PENDING;
+ break;
+ }
+ smp_rmb(); /* Read counters after I-P flag. */
+
+ if (stream->failed) {
+ stream->collected_to = front->start + front->len;
+ notes |= MADE_PROGRESS | SAW_FAILURE;
+ goto cancel;
+ }
+ if (front->start + front->transferred > stream->collected_to) {
+ stream->collected_to = front->start + front->transferred;
+ stream->transferred = stream->collected_to - wreq->start;
+ notes |= MADE_PROGRESS;
+ }
+ if (test_bit(NETFS_SREQ_FAILED, &front->flags)) {
+ stream->failed = true;
+ stream->error = front->error;
+ if (stream->source == NETFS_UPLOAD_TO_SERVER)
+ mapping_set_error(wreq->mapping, front->error);
+ notes |= NEED_REASSESS | SAW_FAILURE;
+ break;
+ }
+ if (front->transferred < front->len) {
+ stream->need_retry = true;
+ notes |= NEED_RETRY | MADE_PROGRESS;
+ break;
+ }
+
+ cancel:
+ /* Remove if completely consumed. */
+ spin_lock(&wreq->lock);
+
+ remove = front;
+ list_del_init(&front->rreq_link);
+ front = list_first_entry_or_null(&stream->subrequests,
+ struct netfs_io_subrequest, rreq_link);
+ stream->front = front;
+ if (!front) {
+ unsigned long long jump_to = atomic64_read(&wreq->issued_to);
+
+ if (stream->collected_to < jump_to) {
+ trace_netfs_collect_gap(wreq, stream, jump_to, 'A');
+ stream->collected_to = jump_to;
+ }
+ }
+
+ spin_unlock(&wreq->lock);
+ netfs_put_subrequest(remove, false,
+ notes & SAW_FAILURE ?
+ netfs_sreq_trace_put_cancel :
+ netfs_sreq_trace_put_done);
+ }
+
+ if (front)
+ notes &= ~ALL_EMPTY;
+ else
+ notes |= SOME_EMPTY;
+
+ if (stream->collected_to < collected_to)
+ collected_to = stream->collected_to;
+ }
+
+ if (collected_to != ULLONG_MAX && collected_to > wreq->collected_to)
+ wreq->collected_to = collected_to;
+
+ /* If we have an empty stream, we need to jump it forward over any gap
+ * otherwise the collection point will never advance.
+ *
+ * Note that the issuer always adds to the stream with the lowest
+ * so-far submitted start, so if we see two consecutive subreqs in one
+ * stream with nothing between then in another stream, then the second
+ * stream has a gap that can be jumped.
+ */
+ if (notes & SOME_EMPTY) {
+ unsigned long long jump_to = wreq->start + wreq->len;
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->active &&
+ stream->front &&
+ stream->front->start < jump_to)
+ jump_to = stream->front->start;
+ }
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->active &&
+ !stream->front &&
+ stream->collected_to < jump_to) {
+ trace_netfs_collect_gap(wreq, stream, jump_to, 'B');
+ stream->collected_to = jump_to;
+ }
+ }
+ }
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->active)
+ trace_netfs_collect_stream(wreq, stream);
+ }
+
+ trace_netfs_collect_state(wreq, wreq->collected_to, notes);
+
+ /* Unlock any folios that we have now finished with. */
+ if (notes & BUFFERED) {
+ unsigned long long clean_to = min(wreq->collected_to, wreq->contiguity);
+
+ if (wreq->cleaned_to < clean_to)
+ netfs_writeback_unlock_folios(wreq, clean_to, &notes);
+ } else {
+ wreq->cleaned_to = wreq->collected_to;
+ }
+
+ // TODO: Discard encryption buffers
+
+ /* If all streams are discontiguous with the last folio we cleared, we
+ * may need to skip a set of folios.
+ */
+ if ((notes & (MAYBE_DISCONTIG | ALL_EMPTY)) == MAYBE_DISCONTIG) {
+ unsigned long long jump_to = ULLONG_MAX;
+
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->active && stream->front &&
+ stream->front->start < jump_to)
+ jump_to = stream->front->start;
+ }
+
+ trace_netfs_collect_contig(wreq, jump_to, netfs_contig_trace_jump);
+ wreq->contiguity = jump_to;
+ wreq->cleaned_to = jump_to;
+ wreq->collected_to = jump_to;
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->collected_to < jump_to)
+ stream->collected_to = jump_to;
+ }
+ //cond_resched();
+ notes |= MADE_PROGRESS;
+ goto reassess_streams;
+ }
+
+ if (notes & NEED_RETRY)
+ goto need_retry;
+ if ((notes & MADE_PROGRESS) && test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_unpause);
+ clear_bit_unlock(NETFS_RREQ_PAUSE, &wreq->flags);
+ wake_up_bit(&wreq->flags, NETFS_RREQ_PAUSE);
+ }
+
+ if (notes & NEED_REASSESS) {
+ //cond_resched();
+ goto reassess_streams;
+ }
+ if (notes & MADE_PROGRESS) {
+ //cond_resched();
+ goto reassess_streams;
+ }
+
+out:
+ netfs_put_group_many(wreq->group, wreq->nr_group_rel);
+ wreq->nr_group_rel = 0;
+ _leave(" = %x", notes);
+ return;
+
+need_retry:
+ /* Okay... We're going to have to retry one or both streams. Note
+ * that any partially completed op will have had any wholly transferred
+ * folios removed from it.
+ */
+ _debug("retry");
+ netfs_retry_writes(wreq);
+ goto out;
+}
+
+/*
+ * Perform the collection of subrequests, folios and encryption buffers.
+ */
+void netfs_write_collection_worker(struct work_struct *work)
+{
+ struct netfs_io_request *wreq = container_of(work, struct netfs_io_request, work);
+ struct netfs_inode *ictx = netfs_inode(wreq->inode);
+ size_t transferred;
+ int s;
+
+ _enter("R=%x", wreq->debug_id);
+
+ netfs_see_request(wreq, netfs_rreq_trace_see_work);
+ if (!test_bit(NETFS_RREQ_IN_PROGRESS, &wreq->flags)) {
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
+ return;
+ }
+
+ netfs_collect_write_results(wreq);
+
+ /* We're done when the app thread has finished posting subreqs and all
+ * the queues in all the streams are empty.
+ */
+ if (!test_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags)) {
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
+ return;
+ }
+ smp_rmb(); /* Read ALL_QUEUED before lists. */
+
+ transferred = LONG_MAX;
+ for (s = 0; s < NR_IO_STREAMS; s++) {
+ struct netfs_io_stream *stream = &wreq->io_streams[s];
+ if (!stream->active)
+ continue;
+ if (!list_empty(&stream->subrequests)) {
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_work);
+ return;
+ }
+ if (stream->transferred < transferred)
+ transferred = stream->transferred;
+ }
+
+ /* Okay, declare that all I/O is complete. */
+ wreq->transferred = transferred;
+ trace_netfs_rreq(wreq, netfs_rreq_trace_write_done);
+
+ if (wreq->io_streams[1].active &&
+ wreq->io_streams[1].failed) {
+ /* Cache write failure doesn't prevent writeback completion
+ * unless we're in disconnected mode.
+ */
+ ictx->ops->invalidate_cache(wreq);
+ }
+
+ if (wreq->cleanup)
+ wreq->cleanup(wreq);
+
+ if (wreq->origin == NETFS_DIO_WRITE &&
+ wreq->mapping->nrpages) {
+ /* mmap may have got underfoot and we may now have folios
+ * locally covering the region we just wrote. Attempt to
+ * discard the folios, but leave in place any modified locally.
+ * ->write_iter() is prevented from interfering by the DIO
+ * counter.
+ */
+ pgoff_t first = wreq->start >> PAGE_SHIFT;
+ pgoff_t last = (wreq->start + wreq->transferred - 1) >> PAGE_SHIFT;
+ invalidate_inode_pages2_range(wreq->mapping, first, last);
+ }
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_end(wreq->inode);
+
+ _debug("finished");
+ trace_netfs_rreq(wreq, netfs_rreq_trace_wake_ip);
+ clear_bit_unlock(NETFS_RREQ_IN_PROGRESS, &wreq->flags);
+ wake_up_bit(&wreq->flags, NETFS_RREQ_IN_PROGRESS);
+
+ if (wreq->iocb) {
+ wreq->iocb->ki_pos += wreq->transferred;
+ if (wreq->iocb->ki_complete)
+ wreq->iocb->ki_complete(
+ wreq->iocb, wreq->error ? wreq->error : wreq->transferred);
+ wreq->iocb = VFS_PTR_POISON;
+ }
+
+ netfs_clear_subrequests(wreq, false);
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_work_complete);
+}
+
+/*
+ * Wake the collection work item.
+ */
+void netfs_wake_write_collector(struct netfs_io_request *wreq, bool was_async)
+{
+ if (!work_pending(&wreq->work)) {
+ netfs_get_request(wreq, netfs_rreq_trace_get_work);
+ if (!queue_work(system_unbound_wq, &wreq->work))
+ netfs_put_request(wreq, was_async, netfs_rreq_trace_put_work_nq);
+ }
+}
+
+/**
+ * netfs_write_subrequest_terminated - Note the termination of a write operation.
+ * @_op: The I/O request that has terminated.
+ * @transferred_or_error: The amount of data transferred or an error code.
+ * @was_async: The termination was asynchronous
+ *
+ * This tells the library that a contributory write I/O operation has
+ * terminated, one way or another, and that it should collect the results.
+ *
+ * The caller indicates in @transferred_or_error the outcome of the operation,
+ * supplying a positive value to indicate the number of bytes transferred or a
+ * negative error code. The library will look after reissuing I/O operations
+ * as appropriate and writing downloaded data to the cache.
+ *
+ * If @was_async is true, the caller might be running in softirq or interrupt
+ * context and we can't sleep.
+ *
+ * When this is called, ownership of the subrequest is transferred back to the
+ * library, along with a ref.
+ *
+ * Note that %_op is a void* so that the function can be passed to
+ * kiocb::term_func without the need for a casting wrapper.
+ */
+void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
+ bool was_async)
+{
+ struct netfs_io_subrequest *subreq = _op;
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct netfs_io_stream *stream = &wreq->io_streams[subreq->stream_nr];
+
+ _enter("%x[%x] %zd", wreq->debug_id, subreq->debug_index, transferred_or_error);
+
+ switch (subreq->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload_done);
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write_done);
+ break;
+ case NETFS_INVALID_WRITE:
+ break;
+ default:
+ BUG();
+ }
+
+ if (IS_ERR_VALUE(transferred_or_error)) {
+ subreq->error = transferred_or_error;
+ if (subreq->error == -EAGAIN)
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ else
+ set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ trace_netfs_failure(wreq, subreq, transferred_or_error, netfs_fail_write);
+
+ switch (subreq->source) {
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write_failed);
+ break;
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload_failed);
+ break;
+ default:
+ break;
+ }
+ trace_netfs_rreq(wreq, netfs_rreq_trace_set_pause);
+ set_bit(NETFS_RREQ_PAUSE, &wreq->flags);
+ } else {
+ if (WARN(transferred_or_error > subreq->len - subreq->transferred,
+ "Subreq excess write: R=%x[%x] %zd > %zu - %zu",
+ wreq->debug_id, subreq->debug_index,
+ transferred_or_error, subreq->len, subreq->transferred))
+ transferred_or_error = subreq->len - subreq->transferred;
+
+ subreq->error = 0;
+ subreq->transferred += transferred_or_error;
+
+ if (subreq->transferred < subreq->len)
+ set_bit(NETFS_SREQ_NEED_RETRY, &subreq->flags);
+ }
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_terminated);
+
+ clear_bit_unlock(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ wake_up_bit(&subreq->flags, NETFS_SREQ_IN_PROGRESS);
+
+ /* If we are at the head of the queue, wake up the collector,
+ * transferring a ref to it if we were the ones to do so.
+ */
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests))
+ netfs_wake_write_collector(wreq, was_async);
+
+ netfs_put_subrequest(subreq, was_async, netfs_sreq_trace_put_terminated);
+}
+EXPORT_SYMBOL(netfs_write_subrequest_terminated);
diff --git a/fs/netfs/write_issue.c b/fs/netfs/write_issue.c
new file mode 100644
index 000000000000..e190043bc0da
--- /dev/null
+++ b/fs/netfs/write_issue.c
@@ -0,0 +1,684 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Network filesystem high-level (buffered) writeback.
+ *
+ * Copyright (C) 2024 Red Hat, Inc. All Rights Reserved.
+ * Written by David Howells ([email protected])
+ *
+ *
+ * To support network filesystems with local caching, we manage a situation
+ * that can be envisioned like the following:
+ *
+ * +---+---+-----+-----+---+----------+
+ * Folios: | | | | | | |
+ * +---+---+-----+-----+---+----------+
+ *
+ * +------+------+ +----+----+
+ * Upload: | | |.....| | |
+ * (Stream 0) +------+------+ +----+----+
+ *
+ * +------+------+------+------+------+
+ * Cache: | | | | | |
+ * (Stream 1) +------+------+------+------+------+
+ *
+ * Where we have a sequence of folios of varying sizes that we need to overlay
+ * with multiple parallel streams of I/O requests, where the I/O requests in a
+ * stream may also be of various sizes (in cifs, for example, the sizes are
+ * negotiated with the server; in something like ceph, they may represent the
+ * sizes of storage objects).
+ *
+ * The sequence in each stream may contain gaps and noncontiguous subrequests
+ * may be glued together into single vectored write RPCs.
+ */
+
+#include <linux/export.h>
+#include <linux/fs.h>
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include "internal.h"
+
+/*
+ * Kill all dirty folios in the event of an unrecoverable error, starting with
+ * a locked folio we've already obtained from writeback_iter().
+ */
+static void netfs_kill_dirty_pages(struct address_space *mapping,
+ struct writeback_control *wbc,
+ struct folio *folio)
+{
+ int error = 0;
+
+ do {
+ enum netfs_folio_trace why = netfs_folio_trace_kill;
+ struct netfs_group *group = NULL;
+ struct netfs_folio *finfo = NULL;
+ void *priv;
+
+ priv = folio_detach_private(folio);
+ if (priv) {
+ finfo = __netfs_folio_info(priv);
+ if (finfo) {
+ /* Kill folio from streaming write. */
+ group = finfo->netfs_group;
+ why = netfs_folio_trace_kill_s;
+ } else {
+ group = priv;
+ if (group == NETFS_FOLIO_COPY_TO_CACHE) {
+ /* Kill copy-to-cache folio */
+ why = netfs_folio_trace_kill_cc;
+ group = NULL;
+ } else {
+ /* Kill folio with group */
+ why = netfs_folio_trace_kill_g;
+ }
+ }
+ }
+
+ trace_netfs_folio(folio, why);
+
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+ folio_end_writeback(folio);
+
+ netfs_put_group(group);
+ kfree(finfo);
+
+ } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
+}
+
+/*
+ * Create a write request and set it up appropriately for the origin type.
+ */
+struct netfs_io_request *netfs_create_write_req(struct address_space *mapping,
+ struct file *file,
+ loff_t start,
+ enum netfs_io_origin origin)
+{
+ struct netfs_io_request *wreq;
+ struct netfs_inode *ictx;
+
+ wreq = netfs_alloc_request(mapping, file, start, 0, origin);
+ if (IS_ERR(wreq))
+ return wreq;
+
+ _enter("R=%x", wreq->debug_id);
+
+ ictx = netfs_inode(wreq->inode);
+ if (test_bit(NETFS_RREQ_WRITE_TO_CACHE, &wreq->flags))
+ fscache_begin_write_operation(&wreq->cache_resources, netfs_i_cookie(ictx));
+
+ wreq->contiguity = wreq->start;
+ wreq->cleaned_to = wreq->start;
+ INIT_WORK(&wreq->work, netfs_write_collection_worker);
+
+ wreq->io_streams[0].stream_nr = 0;
+ wreq->io_streams[0].source = NETFS_UPLOAD_TO_SERVER;
+ wreq->io_streams[0].prepare_write = ictx->ops->prepare_write;
+ wreq->io_streams[0].issue_write = ictx->ops->issue_write;
+ wreq->io_streams[0].collected_to = start;
+ wreq->io_streams[0].transferred = LONG_MAX;
+
+ wreq->io_streams[1].stream_nr = 1;
+ wreq->io_streams[1].source = NETFS_WRITE_TO_CACHE;
+ wreq->io_streams[1].collected_to = start;
+ wreq->io_streams[1].transferred = LONG_MAX;
+ if (fscache_resources_valid(&wreq->cache_resources)) {
+ wreq->io_streams[1].avail = true;
+ wreq->io_streams[1].prepare_write = wreq->cache_resources.ops->prepare_write_subreq;
+ wreq->io_streams[1].issue_write = wreq->cache_resources.ops->issue_write;
+ }
+
+ return wreq;
+}
+
+/**
+ * netfs_prepare_write_failed - Note write preparation failed
+ * @subreq: The subrequest to mark
+ *
+ * Mark a subrequest to note that preparation for write failed.
+ */
+void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq)
+{
+ __set_bit(NETFS_SREQ_FAILED, &subreq->flags);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prep_failed);
+}
+EXPORT_SYMBOL(netfs_prepare_write_failed);
+
+/*
+ * Prepare a write subrequest. We need to allocate a new subrequest
+ * if we don't have one.
+ */
+static void netfs_prepare_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start)
+{
+ struct netfs_io_subrequest *subreq;
+
+ subreq = netfs_alloc_subrequest(wreq);
+ subreq->source = stream->source;
+ subreq->start = start;
+ subreq->max_len = ULONG_MAX;
+ subreq->max_nr_segs = INT_MAX;
+ subreq->stream_nr = stream->stream_nr;
+
+ _enter("R=%x[%x]", wreq->debug_id, subreq->debug_index);
+
+ trace_netfs_sreq_ref(wreq->debug_id, subreq->debug_index,
+ refcount_read(&subreq->ref),
+ netfs_sreq_trace_new);
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_prepare);
+
+ switch (stream->source) {
+ case NETFS_UPLOAD_TO_SERVER:
+ netfs_stat(&netfs_n_wh_upload);
+ subreq->max_len = wreq->wsize;
+ break;
+ case NETFS_WRITE_TO_CACHE:
+ netfs_stat(&netfs_n_wh_write);
+ break;
+ default:
+ WARN_ON_ONCE(1);
+ break;
+ }
+
+ if (stream->prepare_write)
+ stream->prepare_write(subreq);
+
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+
+ /* We add to the end of the list whilst the collector may be walking
+ * the list. The collector only goes nextwards and uses the lock to
+ * remove entries off of the front.
+ */
+ spin_lock(&wreq->lock);
+ list_add_tail(&subreq->rreq_link, &stream->subrequests);
+ if (list_is_first(&subreq->rreq_link, &stream->subrequests)) {
+ stream->front = subreq;
+ if (!stream->active) {
+ stream->collected_to = stream->front->start;
+ /* Write list pointers before active flag */
+ smp_store_release(&stream->active, true);
+ }
+ }
+
+ spin_unlock(&wreq->lock);
+
+ stream->construct = subreq;
+}
+
+/*
+ * Set the I/O iterator for the filesystem/cache to use and dispatch the I/O
+ * operation. The operation may be asynchronous and should call
+ * netfs_write_subrequest_terminated() when complete.
+ */
+static void netfs_do_issue_write(struct netfs_io_stream *stream,
+ struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+
+ _enter("R=%x[%x],%zx", wreq->debug_id, subreq->debug_index, subreq->len);
+
+ if (test_bit(NETFS_SREQ_FAILED, &subreq->flags))
+ return netfs_write_subrequest_terminated(subreq, subreq->error, false);
+
+ // TODO: Use encrypted buffer
+ if (test_bit(NETFS_RREQ_USE_IO_ITER, &wreq->flags)) {
+ subreq->io_iter = wreq->io_iter;
+ iov_iter_advance(&subreq->io_iter,
+ subreq->start + subreq->transferred - wreq->start);
+ iov_iter_truncate(&subreq->io_iter,
+ subreq->len - subreq->transferred);
+ } else {
+ iov_iter_xarray(&subreq->io_iter, ITER_SOURCE, &wreq->mapping->i_pages,
+ subreq->start + subreq->transferred,
+ subreq->len - subreq->transferred);
+ }
+
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
+ stream->issue_write(subreq);
+}
+
+void netfs_reissue_write(struct netfs_io_stream *stream,
+ struct netfs_io_subrequest *subreq)
+{
+ __set_bit(NETFS_SREQ_IN_PROGRESS, &subreq->flags);
+ netfs_do_issue_write(stream, subreq);
+}
+
+static void netfs_issue_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream)
+{
+ struct netfs_io_subrequest *subreq = stream->construct;
+
+ if (!subreq)
+ return;
+ stream->construct = NULL;
+
+ if (subreq->start + subreq->len > wreq->start + wreq->submitted)
+ wreq->len = wreq->submitted = subreq->start + subreq->len - wreq->start;
+ netfs_do_issue_write(stream, subreq);
+}
+
+/*
+ * Add data to the write subrequest, dispatching each as we fill it up or if it
+ * is discontiguous with the previous. We only fill one part at a time so that
+ * we can avoid overrunning the credits obtained (cifs) and try to parallelise
+ * content-crypto preparation with network writes.
+ */
+int netfs_advance_write(struct netfs_io_request *wreq,
+ struct netfs_io_stream *stream,
+ loff_t start, size_t len, bool to_eof)
+{
+ struct netfs_io_subrequest *subreq = stream->construct;
+ size_t part;
+
+ if (!stream->avail) {
+ _leave("no write");
+ return len;
+ }
+
+ _enter("R=%x[%x]", wreq->debug_id, subreq ? subreq->debug_index : 0);
+
+ if (subreq && start != subreq->start + subreq->len) {
+ netfs_issue_write(wreq, stream);
+ subreq = NULL;
+ }
+
+ if (!stream->construct)
+ netfs_prepare_write(wreq, stream, start);
+ subreq = stream->construct;
+
+ part = min(subreq->max_len - subreq->len, len);
+ _debug("part %zx/%zx %zx/%zx", subreq->len, subreq->max_len, part, len);
+ subreq->len += part;
+ subreq->nr_segs++;
+
+ if (subreq->len >= subreq->max_len ||
+ subreq->nr_segs >= subreq->max_nr_segs ||
+ to_eof) {
+ netfs_issue_write(wreq, stream);
+ subreq = NULL;
+ }
+
+ return part;
+}
+
+/*
+ * Write some of a pending folio data back to the server.
+ */
+static int netfs_write_folio(struct netfs_io_request *wreq,
+ struct writeback_control *wbc,
+ struct folio *folio)
+{
+ struct netfs_io_stream *upload = &wreq->io_streams[0];
+ struct netfs_io_stream *cache = &wreq->io_streams[1];
+ struct netfs_io_stream *stream;
+ struct netfs_group *fgroup; /* TODO: Use this with ceph */
+ struct netfs_folio *finfo;
+ size_t fsize = folio_size(folio), flen = fsize, foff = 0;
+ loff_t fpos = folio_pos(folio), i_size;
+ bool to_eof = false, streamw = false;
+ bool debug = false;
+
+ _enter("");
+
+ /* netfs_perform_write() may shift i_size around the page or from out
+ * of the page to beyond it, but cannot move i_size into or through the
+ * page since we have it locked.
+ */
+ i_size = i_size_read(wreq->inode);
+
+ if (fpos >= i_size) {
+ /* mmap beyond eof. */
+ _debug("beyond eof");
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+ wreq->nr_group_rel += netfs_folio_written_back(folio);
+ netfs_put_group_many(wreq->group, wreq->nr_group_rel);
+ wreq->nr_group_rel = 0;
+ return 0;
+ }
+
+ if (fpos + fsize > wreq->i_size)
+ wreq->i_size = i_size;
+
+ fgroup = netfs_folio_group(folio);
+ finfo = netfs_folio_info(folio);
+ if (finfo) {
+ foff = finfo->dirty_offset;
+ flen = foff + finfo->dirty_len;
+ streamw = true;
+ }
+
+ if (wreq->origin == NETFS_WRITETHROUGH) {
+ to_eof = false;
+ if (flen > i_size - fpos)
+ flen = i_size - fpos;
+ } else if (flen > i_size - fpos) {
+ flen = i_size - fpos;
+ if (!streamw)
+ folio_zero_segment(folio, flen, fsize);
+ to_eof = true;
+ } else if (flen == i_size - fpos) {
+ to_eof = true;
+ }
+ flen -= foff;
+
+ _debug("folio %zx %zx %zx", foff, flen, fsize);
+
+ /* Deal with discontinuities in the stream of dirty pages. These can
+ * arise from a number of sources:
+ *
+ * (1) Intervening non-dirty pages from random-access writes, multiple
+ * flushers writing back different parts simultaneously and manual
+ * syncing.
+ *
+ * (2) Partially-written pages from write-streaming.
+ *
+ * (3) Pages that belong to a different write-back group (eg. Ceph
+ * snapshots).
+ *
+ * (4) Actually-clean pages that were marked for write to the cache
+ * when they were read. Note that these appear as a special
+ * write-back group.
+ */
+ if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
+ netfs_issue_write(wreq, upload);
+ } else if (fgroup != wreq->group) {
+ /* We can't write this page to the server yet. */
+ kdebug("wrong group");
+ folio_redirty_for_writepage(wbc, folio);
+ folio_unlock(folio);
+ netfs_issue_write(wreq, upload);
+ netfs_issue_write(wreq, cache);
+ return 0;
+ }
+
+ if (foff > 0)
+ netfs_issue_write(wreq, upload);
+ if (streamw)
+ netfs_issue_write(wreq, cache);
+
+ /* Flip the page to the writeback state and unlock. If we're called
+ * from write-through, then the page has already been put into the wb
+ * state.
+ */
+ if (wreq->origin == NETFS_WRITEBACK)
+ folio_start_writeback(folio);
+ folio_unlock(folio);
+
+ if (fgroup == NETFS_FOLIO_COPY_TO_CACHE) {
+ if (!fscache_resources_valid(&wreq->cache_resources)) {
+ trace_netfs_folio(folio, netfs_folio_trace_cancel_copy);
+ netfs_issue_write(wreq, upload);
+ netfs_folio_written_back(folio);
+ return 0;
+ }
+ trace_netfs_folio(folio, netfs_folio_trace_store_copy);
+ } else if (!upload->construct) {
+ trace_netfs_folio(folio, netfs_folio_trace_store);
+ } else {
+ trace_netfs_folio(folio, netfs_folio_trace_store_plus);
+ }
+
+ /* Move the submission point forward to allow for write-streaming data
+ * not starting at the front of the page. We don't do write-streaming
+ * with the cache as the cache requires DIO alignment.
+ *
+ * Also skip uploading for data that's been read and just needs copying
+ * to the cache.
+ */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ stream->submit_max_len = fsize;
+ stream->submit_off = foff;
+ stream->submit_len = flen;
+ if ((stream->source == NETFS_WRITE_TO_CACHE && streamw) ||
+ (stream->source == NETFS_UPLOAD_TO_SERVER &&
+ fgroup == NETFS_FOLIO_COPY_TO_CACHE)) {
+ stream->submit_off = UINT_MAX;
+ stream->submit_len = 0;
+ stream->submit_max_len = 0;
+ }
+ }
+
+ /* Attach the folio to one or more subrequests. For a big folio, we
+ * could end up with thousands of subrequests if the wsize is small -
+ * but we might need to wait during the creation of subrequests for
+ * network resources (eg. SMB credits).
+ */
+ for (;;) {
+ ssize_t part;
+ size_t lowest_off = ULONG_MAX;
+ int choose_s = -1;
+
+ /* Always add to the lowest-submitted stream first. */
+ for (int s = 0; s < NR_IO_STREAMS; s++) {
+ stream = &wreq->io_streams[s];
+ if (stream->submit_len > 0 &&
+ stream->submit_off < lowest_off) {
+ lowest_off = stream->submit_off;
+ choose_s = s;
+ }
+ }
+
+ if (choose_s < 0)
+ break;
+ stream = &wreq->io_streams[choose_s];
+
+ part = netfs_advance_write(wreq, stream, fpos + stream->submit_off,
+ stream->submit_len, to_eof);
+ atomic64_set(&wreq->issued_to, fpos + stream->submit_off);
+ stream->submit_off += part;
+ stream->submit_max_len -= part;
+ if (part > stream->submit_len)
+ stream->submit_len = 0;
+ else
+ stream->submit_len -= part;
+ if (part > 0)
+ debug = true;
+ }
+
+ atomic64_set(&wreq->issued_to, fpos + fsize);
+
+ if (!debug)
+ kdebug("R=%x: No submit", wreq->debug_id);
+
+ if (flen < fsize)
+ for (int s = 0; s < NR_IO_STREAMS; s++)
+ netfs_issue_write(wreq, &wreq->io_streams[s]);
+
+ _leave(" = 0");
+ return 0;
+}
+
+/*
+ * Write some of the pending data back to the server
+ */
+int netfs_writepages(struct address_space *mapping,
+ struct writeback_control *wbc)
+{
+ struct netfs_inode *ictx = netfs_inode(mapping->host);
+ struct netfs_io_request *wreq = NULL;
+ struct folio *folio;
+ int error = 0;
+
+ if (wbc->sync_mode == WB_SYNC_ALL)
+ mutex_lock(&ictx->wb_lock);
+ else if (!mutex_trylock(&ictx->wb_lock))
+ return 0;
+
+ /* Need the first folio to be able to set up the op. */
+ folio = writeback_iter(mapping, wbc, NULL, &error);
+ if (!folio)
+ goto out;
+
+ wreq = netfs_create_write_req(mapping, NULL, folio_pos(folio), NETFS_WRITEBACK);
+ if (IS_ERR(wreq)) {
+ error = PTR_ERR(wreq);
+ goto couldnt_start;
+ }
+
+ trace_netfs_write(wreq, netfs_write_trace_writeback);
+ netfs_stat(&netfs_n_wh_writepages);
+
+ do {
+ _debug("wbiter %lx %llx", folio->index, wreq->start + wreq->submitted);
+
+ /* It appears we don't have to handle cyclic writeback wrapping. */
+ WARN_ON_ONCE(wreq && folio_pos(folio) < wreq->start + wreq->submitted);
+
+ if (netfs_folio_group(folio) != NETFS_FOLIO_COPY_TO_CACHE &&
+ unlikely(!test_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags))) {
+ set_bit(NETFS_RREQ_UPLOAD_TO_SERVER, &wreq->flags);
+ wreq->netfs_ops->begin_writeback(wreq);
+ }
+
+ error = netfs_write_folio(wreq, wbc, folio);
+ if (error < 0)
+ break;
+ } while ((folio = writeback_iter(mapping, wbc, folio, &error)));
+
+ for (int s = 0; s < NR_IO_STREAMS; s++)
+ netfs_issue_write(wreq, &wreq->io_streams[s]);
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+ mutex_unlock(&ictx->wb_lock);
+
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ _leave(" = %d", error);
+ return error;
+
+couldnt_start:
+ netfs_kill_dirty_pages(mapping, wbc, folio);
+out:
+ mutex_unlock(&ictx->wb_lock);
+ _leave(" = %d", error);
+ return error;
+}
+EXPORT_SYMBOL(netfs_writepages);
+
+/*
+ * Begin a write operation for writing through the pagecache.
+ */
+struct netfs_io_request *netfs_begin_writethrough(struct kiocb *iocb, size_t len)
+{
+ struct netfs_io_request *wreq = NULL;
+ struct netfs_inode *ictx = netfs_inode(file_inode(iocb->ki_filp));
+
+ mutex_lock(&ictx->wb_lock);
+
+ wreq = netfs_create_write_req(iocb->ki_filp->f_mapping, iocb->ki_filp,
+ iocb->ki_pos, NETFS_WRITETHROUGH);
+ if (IS_ERR(wreq)) {
+ mutex_unlock(&ictx->wb_lock);
+ return wreq;
+ }
+
+ wreq->io_streams[0].avail = true;
+ trace_netfs_write(wreq, netfs_write_trace_writethrough);
+ return wreq;
+}
+
+/*
+ * Advance the state of the write operation used when writing through the
+ * pagecache. Data has been copied into the pagecache that we need to append
+ * to the request. If we've added more than wsize then we need to create a new
+ * subrequest.
+ */
+int netfs_advance_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *folio, size_t copied, bool to_page_end,
+ struct folio **writethrough_cache)
+{
+ _enter("R=%x ic=%zu ws=%u cp=%zu tp=%u",
+ wreq->debug_id, wreq->iter.count, wreq->wsize, copied, to_page_end);
+
+ if (!*writethrough_cache) {
+ if (folio_test_dirty(folio))
+ /* Sigh. mmap. */
+ folio_clear_dirty_for_io(folio);
+
+ /* We can make multiple writes to the folio... */
+ folio_start_writeback(folio);
+ if (wreq->len == 0)
+ trace_netfs_folio(folio, netfs_folio_trace_wthru);
+ else
+ trace_netfs_folio(folio, netfs_folio_trace_wthru_plus);
+ *writethrough_cache = folio;
+ }
+
+ wreq->len += copied;
+ if (!to_page_end)
+ return 0;
+
+ *writethrough_cache = NULL;
+ return netfs_write_folio(wreq, wbc, folio);
+}
+
+/*
+ * End a write operation used when writing through the pagecache.
+ */
+int netfs_end_writethrough(struct netfs_io_request *wreq, struct writeback_control *wbc,
+ struct folio *writethrough_cache)
+{
+ struct netfs_inode *ictx = netfs_inode(wreq->inode);
+ int ret;
+
+ _enter("R=%x", wreq->debug_id);
+
+ if (writethrough_cache)
+ netfs_write_folio(wreq, wbc, writethrough_cache);
+
+ netfs_issue_write(wreq, &wreq->io_streams[0]);
+ netfs_issue_write(wreq, &wreq->io_streams[1]);
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+
+ mutex_unlock(&ictx->wb_lock);
+
+ ret = wreq->error;
+ netfs_put_request(wreq, false, netfs_rreq_trace_put_return);
+ return ret;
+}
+
+/*
+ * Write data to the server without going through the pagecache and without
+ * writing it to the local cache.
+ */
+int netfs_unbuffered_write(struct netfs_io_request *wreq, bool may_wait, size_t len)
+{
+ struct netfs_io_stream *upload = &wreq->io_streams[0];
+ ssize_t part;
+ loff_t start = wreq->start;
+ int error = 0;
+
+ _enter("%zx", len);
+
+ if (wreq->origin == NETFS_DIO_WRITE)
+ inode_dio_begin(wreq->inode);
+
+ while (len) {
+ // TODO: Prepare content encryption
+
+ _debug("unbuffered %zx", len);
+ part = netfs_advance_write(wreq, upload, start, len, false);
+ start += part;
+ len -= part;
+ if (test_bit(NETFS_RREQ_PAUSE, &wreq->flags)) {
+ trace_netfs_rreq(wreq, netfs_rreq_trace_wait_pause);
+ wait_on_bit(&wreq->flags, NETFS_RREQ_PAUSE, TASK_UNINTERRUPTIBLE);
+ }
+ if (test_bit(NETFS_RREQ_FAILED, &wreq->flags))
+ break;
+ }
+
+ netfs_issue_write(wreq, upload);
+
+ smp_wmb(); /* Write lists before ALL_QUEUED. */
+ set_bit(NETFS_RREQ_ALL_QUEUED, &wreq->flags);
+ if (list_empty(&upload->subrequests))
+ netfs_wake_write_collector(wreq, false);
+
+ _leave(" = %d", error);
+ return error;
+}
diff --git a/fs/nfs/file.c b/fs/nfs/file.c
index 407c6e15afe2..6bd127e6683d 100644
--- a/fs/nfs/file.c
+++ b/fs/nfs/file.c
@@ -433,7 +433,7 @@ static void nfs_invalidate_folio(struct folio *folio, size_t offset,
return;
/* Cancel any unstarted writes on this page */
nfs_wb_folio_cancel(inode, folio);
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
trace_nfs_invalidate_folio(inode, folio);
}
@@ -500,7 +500,7 @@ static int nfs_launder_folio(struct folio *folio)
dfprintk(PAGECACHE, "NFS: launder_folio(%ld, %llu)\n",
inode->i_ino, folio_pos(folio));
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
ret = nfs_wb_folio(inode, folio);
trace_nfs_launder_folio_done(inode, folio, ret);
return ret;
@@ -593,8 +593,8 @@ static vm_fault_t nfs_vm_page_mkwrite(struct vm_fault *vmf)
sb_start_pagefault(inode->i_sb);
/* make sure the cache has finished storing the page */
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0) {
+ if (folio_test_private_2(folio) && /* [DEPRECATED] */
+ folio_wait_private_2_killable(folio) < 0) {
ret = VM_FAULT_RETRY;
goto out;
}
diff --git a/fs/nfs/fscache.h b/fs/nfs/fscache.h
index e3cb4923316b..fbed0027996f 100644
--- a/fs/nfs/fscache.h
+++ b/fs/nfs/fscache.h
@@ -81,6 +81,8 @@ static inline void nfs_netfs_put(struct nfs_netfs_io_data *netfs)
static inline void nfs_netfs_inode_init(struct nfs_inode *nfsi)
{
netfs_inode_init(&nfsi->netfs, &nfs_netfs_ops, false);
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_ICTX_USE_PGPRIV2, &nfsi->netfs.flags);
}
extern void nfs_netfs_initiate_read(struct nfs_pgio_header *hdr);
extern void nfs_netfs_read_completion(struct nfs_pgio_header *hdr);
@@ -101,10 +103,10 @@ extern int nfs_netfs_read_folio(struct file *file, struct folio *folio);
static inline bool nfs_fscache_release_folio(struct folio *folio, gfp_t gfp)
{
- if (folio_test_fscache(folio)) {
+ if (folio_test_private_2(folio)) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio);
}
fscache_note_page_release(netfs_i_cookie(netfs_inode(folio->mapping->host)));
return true;
diff --git a/fs/nfs/write.c b/fs/nfs/write.c
index 5de85d725fb9..2329cbb0e446 100644
--- a/fs/nfs/write.c
+++ b/fs/nfs/write.c
@@ -2120,10 +2120,10 @@ int nfs_migrate_folio(struct address_space *mapping, struct folio *dst,
if (folio_test_private(src))
return -EBUSY;
- if (folio_test_fscache(src)) {
+ if (folio_test_private_2(src)) { /* [DEPRECATED] */
if (mode == MIGRATE_ASYNC)
return -EBUSY;
- folio_wait_fscache(src);
+ folio_wait_private_2(src);
}
return migrate_folio(mapping, dst, src, mode);
diff --git a/fs/smb/client/file.c b/fs/smb/client/file.c
index 9be37d0fe724..b39caae652f6 100644
--- a/fs/smb/client/file.c
+++ b/fs/smb/client/file.c
@@ -3032,12 +3032,12 @@ lock_again:
}
if (folio_test_writeback(folio) ||
- folio_test_fscache(folio)) {
+ folio_test_private_2(folio)) { /* [DEPRECATED] */
folio_unlock(folio);
if (wbc->sync_mode != WB_SYNC_NONE) {
folio_wait_writeback(folio);
#ifdef CONFIG_CIFS_FSCACHE
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio);
#endif
goto lock_again;
}
@@ -4510,8 +4510,8 @@ static vm_fault_t cifs_page_mkwrite(struct vm_fault *vmf)
* be modified. We then assume the entire folio will need writing back.
*/
#ifdef CONFIG_CIFS_FSCACHE
- if (folio_test_fscache(folio) &&
- folio_wait_fscache_killable(folio) < 0)
+ if (folio_test_private_2(folio) && /* [DEPRECATED] */
+ folio_wait_private_2_killable(folio) < 0)
return VM_FAULT_RETRY;
#endif
@@ -4977,10 +4977,10 @@ static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
{
if (folio_test_private(folio))
return 0;
- if (folio_test_fscache(folio)) {
+ if (folio_test_private_2(folio)) { /* [DEPRECATED] */
if (current_is_kswapd() || !(gfp & __GFP_FS))
return false;
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio);
}
fscache_note_page_release(cifs_inode_cookie(folio->mapping->host));
return true;
@@ -4989,7 +4989,7 @@ static bool cifs_release_folio(struct folio *folio, gfp_t gfp)
static void cifs_invalidate_folio(struct folio *folio, size_t offset,
size_t length)
{
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
}
static int cifs_launder_folio(struct folio *folio)
@@ -5009,7 +5009,7 @@ static int cifs_launder_folio(struct folio *folio)
if (folio_clear_dirty_for_io(folio))
rc = cifs_writepage_locked(&folio->page, &wbc);
- folio_wait_fscache(folio);
+ folio_wait_private_2(folio); /* [DEPRECATED] */
return rc;
}
diff --git a/include/linux/fscache.h b/include/linux/fscache.h
index 6e8562cbcc43..9de27643607f 100644
--- a/include/linux/fscache.h
+++ b/include/linux/fscache.h
@@ -172,9 +172,12 @@ extern void __fscache_invalidate(struct fscache_cookie *, const void *, loff_t,
extern int __fscache_begin_read_operation(struct netfs_cache_resources *, struct fscache_cookie *);
extern int __fscache_begin_write_operation(struct netfs_cache_resources *, struct fscache_cookie *);
-extern void __fscache_write_to_cache(struct fscache_cookie *, struct address_space *,
- loff_t, size_t, loff_t, netfs_io_terminated_t, void *,
- bool);
+void __fscache_write_to_cache(struct fscache_cookie *cookie,
+ struct address_space *mapping,
+ loff_t start, size_t len, loff_t i_size,
+ netfs_io_terminated_t term_func,
+ void *term_func_priv,
+ bool using_pgpriv2, bool cond);
extern void __fscache_clear_page_bits(struct address_space *, loff_t, size_t);
/**
@@ -597,7 +600,8 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
* @i_size: The new size of the inode
* @term_func: The function to call upon completion
* @term_func_priv: The private data for @term_func
- * @caching: If PG_fscache has been set
+ * @using_pgpriv2: If we're using PG_private_2 to mark in-progress write
+ * @caching: If we actually want to do the caching
*
* Helper function for a netfs to write dirty data from an inode into the cache
* object that's backing it.
@@ -608,19 +612,21 @@ static inline void fscache_clear_page_bits(struct address_space *mapping,
* marked with PG_fscache.
*
* If given, @term_func will be called upon completion and supplied with
- * @term_func_priv. Note that the PG_fscache flags will have been cleared by
- * this point, so the netfs must retain its own pin on the mapping.
+ * @term_func_priv. Note that if @using_pgpriv2 is set, the PG_private_2 flags
+ * will have been cleared by this point, so the netfs must retain its own pin
+ * on the mapping.
*/
static inline void fscache_write_to_cache(struct fscache_cookie *cookie,
struct address_space *mapping,
loff_t start, size_t len, loff_t i_size,
netfs_io_terminated_t term_func,
void *term_func_priv,
- bool caching)
+ bool using_pgpriv2, bool caching)
{
if (caching)
__fscache_write_to_cache(cookie, mapping, start, len, i_size,
- term_func, term_func_priv, caching);
+ term_func, term_func_priv,
+ using_pgpriv2, caching);
else if (term_func)
term_func(term_func_priv, -ENOBUFS, false);
diff --git a/include/linux/netfs.h b/include/linux/netfs.h
index 100cbb261269..298552f5122c 100644
--- a/include/linux/netfs.h
+++ b/include/linux/netfs.h
@@ -20,95 +20,24 @@
#include <linux/uio.h>
enum netfs_sreq_ref_trace;
-
-/*
- * Overload PG_private_2 to give us PG_fscache - this is used to indicate that
- * a page is currently backed by a local disk cache
- */
-#define folio_test_fscache(folio) folio_test_private_2(folio)
-#define PageFsCache(page) PagePrivate2((page))
-#define SetPageFsCache(page) SetPagePrivate2((page))
-#define ClearPageFsCache(page) ClearPagePrivate2((page))
-#define TestSetPageFsCache(page) TestSetPagePrivate2((page))
-#define TestClearPageFsCache(page) TestClearPagePrivate2((page))
+typedef struct mempool_s mempool_t;
/**
- * folio_start_fscache - Start an fscache write on a folio.
+ * folio_start_private_2 - Start an fscache write on a folio. [DEPRECATED]
* @folio: The folio.
*
* Call this function before writing a folio to a local cache. Starting a
* second write before the first one finishes is not allowed.
+ *
+ * Note that this should no longer be used.
*/
-static inline void folio_start_fscache(struct folio *folio)
+static inline void folio_start_private_2(struct folio *folio)
{
VM_BUG_ON_FOLIO(folio_test_private_2(folio), folio);
folio_get(folio);
folio_set_private_2(folio);
}
-/**
- * folio_end_fscache - End an fscache write on a folio.
- * @folio: The folio.
- *
- * Call this function after the folio has been written to the local cache.
- * This will wake any sleepers waiting on this folio.
- */
-static inline void folio_end_fscache(struct folio *folio)
-{
- folio_end_private_2(folio);
-}
-
-/**
- * folio_wait_fscache - Wait for an fscache write on this folio to end.
- * @folio: The folio.
- *
- * If this folio is currently being written to a local cache, wait for
- * the write to finish. Another write may start after this one finishes,
- * unless the caller holds the folio lock.
- */
-static inline void folio_wait_fscache(struct folio *folio)
-{
- folio_wait_private_2(folio);
-}
-
-/**
- * folio_wait_fscache_killable - Wait for an fscache write on this folio to end.
- * @folio: The folio.
- *
- * If this folio is currently being written to a local cache, wait
- * for the write to finish or for a fatal signal to be received.
- * Another write may start after this one finishes, unless the caller
- * holds the folio lock.
- *
- * Return:
- * - 0 if successful.
- * - -EINTR if a fatal signal was encountered.
- */
-static inline int folio_wait_fscache_killable(struct folio *folio)
-{
- return folio_wait_private_2_killable(folio);
-}
-
-static inline void set_page_fscache(struct page *page)
-{
- folio_start_fscache(page_folio(page));
-}
-
-static inline void end_page_fscache(struct page *page)
-{
- folio_end_private_2(page_folio(page));
-}
-
-static inline void wait_on_page_fscache(struct page *page)
-{
- folio_wait_private_2(page_folio(page));
-}
-
-static inline int wait_on_page_fscache_killable(struct page *page)
-{
- return folio_wait_private_2_killable(page_folio(page));
-}
-
/* Marks used on xarray-based buffers */
#define NETFS_BUF_PUT_MARK XA_MARK_0 /* - Page needs putting */
#define NETFS_BUF_PAGECACHE_MARK XA_MARK_1 /* - Page needs wb/dirty flag wrangling */
@@ -135,6 +64,7 @@ struct netfs_inode {
#if IS_ENABLED(CONFIG_FSCACHE)
struct fscache_cookie *cache;
#endif
+ struct mutex wb_lock; /* Writeback serialisation */
loff_t remote_i_size; /* Size of the remote file */
loff_t zero_point; /* Size after which we assume there's no data
* on the server */
@@ -142,7 +72,8 @@ struct netfs_inode {
#define NETFS_ICTX_ODIRECT 0 /* The file has DIO in progress */
#define NETFS_ICTX_UNBUFFERED 1 /* I/O should not use the pagecache */
#define NETFS_ICTX_WRITETHROUGH 2 /* Write-through caching */
-#define NETFS_ICTX_NO_WRITE_STREAMING 3 /* Don't engage in write-streaming */
+#define NETFS_ICTX_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
+ * write to cache on read */
};
/*
@@ -165,16 +96,25 @@ struct netfs_folio {
unsigned int dirty_len; /* Write-streaming dirty data length */
};
#define NETFS_FOLIO_INFO 0x1UL /* OR'd with folio->private. */
+#define NETFS_FOLIO_COPY_TO_CACHE ((struct netfs_group *)0x356UL) /* Write to the cache only */
-static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
+static inline bool netfs_is_folio_info(const void *priv)
{
- void *priv = folio_get_private(folio);
+ return (unsigned long)priv & NETFS_FOLIO_INFO;
+}
- if ((unsigned long)priv & NETFS_FOLIO_INFO)
+static inline struct netfs_folio *__netfs_folio_info(const void *priv)
+{
+ if (netfs_is_folio_info(priv))
return (struct netfs_folio *)((unsigned long)priv & ~NETFS_FOLIO_INFO);
return NULL;
}
+static inline struct netfs_folio *netfs_folio_info(struct folio *folio)
+{
+ return __netfs_folio_info(folio_get_private(folio));
+}
+
static inline struct netfs_group *netfs_folio_group(struct folio *folio)
{
struct netfs_folio *finfo;
@@ -187,6 +127,33 @@ static inline struct netfs_group *netfs_folio_group(struct folio *folio)
}
/*
+ * Stream of I/O subrequests going to a particular destination, such as the
+ * server or the local cache. This is mainly intended for writing where we may
+ * have to write to multiple destinations concurrently.
+ */
+struct netfs_io_stream {
+ /* Submission tracking */
+ struct netfs_io_subrequest *construct; /* Op being constructed */
+ unsigned int submit_off; /* Folio offset we're submitting from */
+ unsigned int submit_len; /* Amount of data left to submit */
+ unsigned int submit_max_len; /* Amount I/O can be rounded up to */
+ void (*prepare_write)(struct netfs_io_subrequest *subreq);
+ void (*issue_write)(struct netfs_io_subrequest *subreq);
+ /* Collection tracking */
+ struct list_head subrequests; /* Contributory I/O operations */
+ struct netfs_io_subrequest *front; /* Op being collected */
+ unsigned long long collected_to; /* Position we've collected results to */
+ size_t transferred; /* The amount transferred from this stream */
+ enum netfs_io_source source; /* Where to read from/write to */
+ unsigned short error; /* Aggregate error for the stream */
+ unsigned char stream_nr; /* Index of stream in parent table */
+ bool avail; /* T if stream is available */
+ bool active; /* T if stream is active */
+ bool need_retry; /* T if this stream needs retrying */
+ bool failed; /* T if this stream failed */
+};
+
+/*
* Resources required to do operations on a cache.
*/
struct netfs_cache_resources {
@@ -209,14 +176,17 @@ struct netfs_io_subrequest {
struct work_struct work;
struct list_head rreq_link; /* Link in rreq->subrequests */
struct iov_iter io_iter; /* Iterator for this subrequest */
- loff_t start; /* Where to start the I/O */
+ unsigned long long start; /* Where to start the I/O */
+ size_t max_len; /* Maximum size of the I/O */
size_t len; /* Size of the I/O */
size_t transferred; /* Amount of data transferred */
refcount_t ref;
short error; /* 0 or error that occurred */
unsigned short debug_index; /* Index in list (for debugging output) */
+ unsigned int nr_segs; /* Number of segs in io_iter */
unsigned int max_nr_segs; /* 0 or max number of segments in an iterator */
enum netfs_io_source source; /* Where to read from/write to */
+ unsigned char stream_nr; /* I/O stream this belongs to */
unsigned long flags;
#define NETFS_SREQ_COPY_TO_CACHE 0 /* Set if should copy the data to the cache */
#define NETFS_SREQ_CLEAR_TAIL 1 /* Set if the rest of the read should be cleared */
@@ -224,15 +194,20 @@ struct netfs_io_subrequest {
#define NETFS_SREQ_SEEK_DATA_READ 3 /* Set if ->read() should SEEK_DATA first */
#define NETFS_SREQ_NO_PROGRESS 4 /* Set if we didn't manage to read any data */
#define NETFS_SREQ_ONDEMAND 5 /* Set if it's from on-demand read mode */
+#define NETFS_SREQ_BOUNDARY 6 /* Set if ends on hard boundary (eg. ceph object) */
+#define NETFS_SREQ_IN_PROGRESS 8 /* Unlocked when the subrequest completes */
+#define NETFS_SREQ_NEED_RETRY 9 /* Set if the filesystem requests a retry */
+#define NETFS_SREQ_RETRYING 10 /* Set if we're retrying */
+#define NETFS_SREQ_FAILED 11 /* Set if the subreq failed unretryably */
};
enum netfs_io_origin {
NETFS_READAHEAD, /* This read was triggered by readahead */
NETFS_READPAGE, /* This read is a synchronous read */
NETFS_READ_FOR_WRITE, /* This read is to prepare a write */
+ NETFS_COPY_TO_CACHE, /* This write is to copy a read to the cache */
NETFS_WRITEBACK, /* This write was triggered by writepages */
NETFS_WRITETHROUGH, /* This write was made by netfs_perform_write() */
- NETFS_LAUNDER_WRITE, /* This is triggered by ->launder_folio() */
NETFS_UNBUFFERED_WRITE, /* This is an unbuffered write */
NETFS_DIO_READ, /* This is a direct I/O read */
NETFS_DIO_WRITE, /* This is a direct I/O write */
@@ -254,26 +229,36 @@ struct netfs_io_request {
struct netfs_cache_resources cache_resources;
struct list_head proc_link; /* Link in netfs_iorequests */
struct list_head subrequests; /* Contributory I/O operations */
+ struct netfs_io_stream io_streams[2]; /* Streams of parallel I/O operations */
+#define NR_IO_STREAMS 2 //wreq->nr_io_streams
+ struct netfs_group *group; /* Writeback group being written back */
struct iov_iter iter; /* Unencrypted-side iterator */
struct iov_iter io_iter; /* I/O (Encrypted-side) iterator */
void *netfs_priv; /* Private data for the netfs */
+ void *netfs_priv2; /* Private data for the netfs */
struct bio_vec *direct_bv; /* DIO buffer list (when handling iovec-iter) */
unsigned int direct_bv_count; /* Number of elements in direct_bv[] */
unsigned int debug_id;
unsigned int rsize; /* Maximum read size (0 for none) */
unsigned int wsize; /* Maximum write size (0 for none) */
- unsigned int subreq_counter; /* Next subreq->debug_index */
+ atomic_t subreq_counter; /* Next subreq->debug_index */
+ unsigned int nr_group_rel; /* Number of refs to release on ->group */
+ spinlock_t lock; /* Lock for queuing subreqs */
atomic_t nr_outstanding; /* Number of ops in progress */
atomic_t nr_copy_ops; /* Number of copy-to-cache ops in progress */
- size_t submitted; /* Amount submitted for I/O so far */
- size_t len; /* Length of the request */
size_t upper_len; /* Length can be extended to here */
+ unsigned long long submitted; /* Amount submitted for I/O so far */
+ unsigned long long len; /* Length of the request */
size_t transferred; /* Amount to be indicated as transferred */
short error; /* 0 or error that occurred */
enum netfs_io_origin origin; /* Origin of the request */
bool direct_bv_unpin; /* T if direct_bv[] must be unpinned */
- loff_t i_size; /* Size of the file */
- loff_t start; /* Start position */
+ unsigned long long i_size; /* Size of the file */
+ unsigned long long start; /* Start position */
+ atomic64_t issued_to; /* Write issuer folio cursor */
+ unsigned long long contiguity; /* Tracking for gaps in the writeback sequence */
+ unsigned long long collected_to; /* Point we've collected to */
+ unsigned long long cleaned_to; /* Position we've cleaned folios to */
pgoff_t no_unlock_folio; /* Don't unlock this folio after read */
refcount_t ref;
unsigned long flags;
@@ -287,6 +272,11 @@ struct netfs_io_request {
#define NETFS_RREQ_UPLOAD_TO_SERVER 8 /* Need to write to the server */
#define NETFS_RREQ_NONBLOCK 9 /* Don't block if possible (O_NONBLOCK) */
#define NETFS_RREQ_BLOCKED 10 /* We blocked */
+#define NETFS_RREQ_PAUSE 11 /* Pause subrequest generation */
+#define NETFS_RREQ_USE_IO_ITER 12 /* Use ->io_iter rather than ->i_pages */
+#define NETFS_RREQ_ALL_QUEUED 13 /* All subreqs are now queued */
+#define NETFS_RREQ_USE_PGPRIV2 31 /* [DEPRECATED] Use PG_private_2 to mark
+ * write to cache on read */
const struct netfs_request_ops *netfs_ops;
void (*cleanup)(struct netfs_io_request *req);
};
@@ -295,8 +285,8 @@ struct netfs_io_request {
* Operations the network filesystem can/must provide to the helpers.
*/
struct netfs_request_ops {
- unsigned int io_request_size; /* Alloc size for netfs_io_request struct */
- unsigned int io_subrequest_size; /* Alloc size for netfs_io_subrequest struct */
+ mempool_t *request_pool;
+ mempool_t *subrequest_pool;
int (*init_request)(struct netfs_io_request *rreq, struct file *file);
void (*free_request)(struct netfs_io_request *rreq);
void (*free_subrequest)(struct netfs_io_subrequest *rreq);
@@ -314,8 +304,10 @@ struct netfs_request_ops {
void (*update_i_size)(struct inode *inode, loff_t i_size);
/* Write request handling */
- void (*create_write_requests)(struct netfs_io_request *wreq,
- loff_t start, size_t len);
+ void (*begin_writeback)(struct netfs_io_request *wreq);
+ void (*prepare_write)(struct netfs_io_subrequest *subreq);
+ void (*issue_write)(struct netfs_io_subrequest *subreq);
+ void (*retry_request)(struct netfs_io_request *wreq, struct netfs_io_stream *stream);
void (*invalidate_cache)(struct netfs_io_request *wreq);
};
@@ -350,15 +342,27 @@ struct netfs_cache_ops {
netfs_io_terminated_t term_func,
void *term_func_priv);
+ /* Write data to the cache from a netfs subrequest. */
+ void (*issue_write)(struct netfs_io_subrequest *subreq);
+
/* Expand readahead request */
void (*expand_readahead)(struct netfs_cache_resources *cres,
- loff_t *_start, size_t *_len, loff_t i_size);
+ unsigned long long *_start,
+ unsigned long long *_len,
+ unsigned long long i_size);
/* Prepare a read operation, shortening it to a cached/uncached
* boundary as appropriate.
*/
enum netfs_io_source (*prepare_read)(struct netfs_io_subrequest *subreq,
- loff_t i_size);
+ unsigned long long i_size);
+
+ /* Prepare a write subrequest, working out if we're allowed to do it
+ * and finding out the maximum amount of data to gather before
+ * attempting to submit. If we're not permitted to do it, the
+ * subrequest should be marked failed.
+ */
+ void (*prepare_write_subreq)(struct netfs_io_subrequest *subreq);
/* Prepare a write operation, working out what part of the write we can
* actually do.
@@ -410,7 +414,6 @@ int netfs_unpin_writeback(struct inode *inode, struct writeback_control *wbc);
void netfs_clear_inode_writeback(struct inode *inode, const void *aux);
void netfs_invalidate_folio(struct folio *folio, size_t offset, size_t length);
bool netfs_release_folio(struct folio *folio, gfp_t gfp);
-int netfs_launder_folio(struct folio *folio);
/* VMA operations API. */
vm_fault_t netfs_page_mkwrite(struct vm_fault *vmf, struct netfs_group *netfs_group);
@@ -426,9 +429,7 @@ ssize_t netfs_extract_user_iter(struct iov_iter *orig, size_t orig_len,
iov_iter_extraction_t extraction_flags);
size_t netfs_limit_iter(const struct iov_iter *iter, size_t start_offset,
size_t max_size, size_t max_segs);
-struct netfs_io_subrequest *netfs_create_write_request(
- struct netfs_io_request *wreq, enum netfs_io_source dest,
- loff_t start, size_t len, work_func_t worker);
+void netfs_prepare_write_failed(struct netfs_io_subrequest *subreq);
void netfs_write_subrequest_terminated(void *_op, ssize_t transferred_or_error,
bool was_async);
void netfs_queue_write_request(struct netfs_io_subrequest *subreq);
@@ -472,6 +473,7 @@ static inline void netfs_inode_init(struct netfs_inode *ctx,
#if IS_ENABLED(CONFIG_FSCACHE)
ctx->cache = NULL;
#endif
+ mutex_init(&ctx->wb_lock);
/* ->releasepage() drives zero_point */
if (use_zero_point) {
ctx->zero_point = ctx->remote_i_size;
diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h
index 2df35e65557d..c5e33e2ca48a 100644
--- a/include/linux/pagemap.h
+++ b/include/linux/pagemap.h
@@ -40,6 +40,8 @@ int filemap_fdatawait_keep_errors(struct address_space *mapping);
int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend);
int filemap_fdatawait_range_keep_errors(struct address_space *mapping,
loff_t start_byte, loff_t end_byte);
+int filemap_invalidate_inode(struct inode *inode, bool flush,
+ loff_t start, loff_t end);
static inline int filemap_fdatawait(struct address_space *mapping)
{
diff --git a/include/net/9p/client.h b/include/net/9p/client.h
index 78ebcf782ce5..4f785098c67a 100644
--- a/include/net/9p/client.h
+++ b/include/net/9p/client.h
@@ -207,6 +207,8 @@ int p9_client_read(struct p9_fid *fid, u64 offset, struct iov_iter *to, int *err
int p9_client_read_once(struct p9_fid *fid, u64 offset, struct iov_iter *to,
int *err);
int p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err);
+struct netfs_io_subrequest;
+void p9_client_write_subreq(struct netfs_io_subrequest *subreq);
int p9_client_readdir(struct p9_fid *fid, char *data, u32 count, u64 offset);
int p9dirent_read(struct p9_client *clnt, char *buf, int len,
struct p9_dirent *dirent);
diff --git a/include/trace/events/netfs.h b/include/trace/events/netfs.h
index 447a8c21cf57..4ba553a6d71b 100644
--- a/include/trace/events/netfs.h
+++ b/include/trace/events/netfs.h
@@ -24,8 +24,8 @@
E_(netfs_read_trace_write_begin, "WRITEBEGN")
#define netfs_write_traces \
+ EM(netfs_write_trace_copy_to_cache, "COPY2CACH") \
EM(netfs_write_trace_dio_write, "DIO-WRITE") \
- EM(netfs_write_trace_launder, "LAUNDER ") \
EM(netfs_write_trace_unbuffered_write, "UNB-WRITE") \
EM(netfs_write_trace_writeback, "WRITEBACK") \
E_(netfs_write_trace_writethrough, "WRITETHRU")
@@ -34,9 +34,9 @@
EM(NETFS_READAHEAD, "RA") \
EM(NETFS_READPAGE, "RP") \
EM(NETFS_READ_FOR_WRITE, "RW") \
+ EM(NETFS_COPY_TO_CACHE, "CC") \
EM(NETFS_WRITEBACK, "WB") \
EM(NETFS_WRITETHROUGH, "WT") \
- EM(NETFS_LAUNDER_WRITE, "LW") \
EM(NETFS_UNBUFFERED_WRITE, "UW") \
EM(NETFS_DIO_READ, "DR") \
E_(NETFS_DIO_WRITE, "DW")
@@ -44,14 +44,18 @@
#define netfs_rreq_traces \
EM(netfs_rreq_trace_assess, "ASSESS ") \
EM(netfs_rreq_trace_copy, "COPY ") \
+ EM(netfs_rreq_trace_collect, "COLLECT") \
EM(netfs_rreq_trace_done, "DONE ") \
EM(netfs_rreq_trace_free, "FREE ") \
EM(netfs_rreq_trace_redirty, "REDIRTY") \
EM(netfs_rreq_trace_resubmit, "RESUBMT") \
+ EM(netfs_rreq_trace_set_pause, "PAUSE ") \
EM(netfs_rreq_trace_unlock, "UNLOCK ") \
EM(netfs_rreq_trace_unmark, "UNMARK ") \
EM(netfs_rreq_trace_wait_ip, "WAIT-IP") \
+ EM(netfs_rreq_trace_wait_pause, "WT-PAUS") \
EM(netfs_rreq_trace_wake_ip, "WAKE-IP") \
+ EM(netfs_rreq_trace_unpause, "UNPAUSE") \
E_(netfs_rreq_trace_write_done, "WR-DONE")
#define netfs_sreq_sources \
@@ -64,11 +68,15 @@
E_(NETFS_INVALID_WRITE, "INVL")
#define netfs_sreq_traces \
+ EM(netfs_sreq_trace_discard, "DSCRD") \
EM(netfs_sreq_trace_download_instead, "RDOWN") \
+ EM(netfs_sreq_trace_fail, "FAIL ") \
EM(netfs_sreq_trace_free, "FREE ") \
EM(netfs_sreq_trace_limited, "LIMIT") \
EM(netfs_sreq_trace_prepare, "PREP ") \
+ EM(netfs_sreq_trace_prep_failed, "PRPFL") \
EM(netfs_sreq_trace_resubmit_short, "SHORT") \
+ EM(netfs_sreq_trace_retry, "RETRY") \
EM(netfs_sreq_trace_submit, "SUBMT") \
EM(netfs_sreq_trace_terminated, "TERM ") \
EM(netfs_sreq_trace_write, "WRITE") \
@@ -88,6 +96,7 @@
#define netfs_rreq_ref_traces \
EM(netfs_rreq_trace_get_for_outstanding,"GET OUTSTND") \
EM(netfs_rreq_trace_get_subreq, "GET SUBREQ ") \
+ EM(netfs_rreq_trace_get_work, "GET WORK ") \
EM(netfs_rreq_trace_put_complete, "PUT COMPLT ") \
EM(netfs_rreq_trace_put_discard, "PUT DISCARD") \
EM(netfs_rreq_trace_put_failed, "PUT FAILED ") \
@@ -95,6 +104,8 @@
EM(netfs_rreq_trace_put_return, "PUT RETURN ") \
EM(netfs_rreq_trace_put_subreq, "PUT SUBREQ ") \
EM(netfs_rreq_trace_put_work, "PUT WORK ") \
+ EM(netfs_rreq_trace_put_work_complete, "PUT WORK CP") \
+ EM(netfs_rreq_trace_put_work_nq, "PUT WORK NQ") \
EM(netfs_rreq_trace_see_work, "SEE WORK ") \
E_(netfs_rreq_trace_new, "NEW ")
@@ -103,11 +114,14 @@
EM(netfs_sreq_trace_get_resubmit, "GET RESUBMIT") \
EM(netfs_sreq_trace_get_short_read, "GET SHORTRD") \
EM(netfs_sreq_trace_new, "NEW ") \
+ EM(netfs_sreq_trace_put_cancel, "PUT CANCEL ") \
EM(netfs_sreq_trace_put_clear, "PUT CLEAR ") \
EM(netfs_sreq_trace_put_discard, "PUT DISCARD") \
+ EM(netfs_sreq_trace_put_done, "PUT DONE ") \
EM(netfs_sreq_trace_put_failed, "PUT FAILED ") \
EM(netfs_sreq_trace_put_merged, "PUT MERGED ") \
EM(netfs_sreq_trace_put_no_copy, "PUT NO COPY") \
+ EM(netfs_sreq_trace_put_oom, "PUT OOM ") \
EM(netfs_sreq_trace_put_wip, "PUT WIP ") \
EM(netfs_sreq_trace_put_work, "PUT WORK ") \
E_(netfs_sreq_trace_put_terminated, "PUT TERM ")
@@ -124,24 +138,33 @@
EM(netfs_streaming_filled_page, "mod-streamw-f") \
EM(netfs_streaming_cont_filled_page, "mod-streamw-f+") \
/* The rest are for writeback */ \
+ EM(netfs_folio_trace_cancel_copy, "cancel-copy") \
EM(netfs_folio_trace_clear, "clear") \
- EM(netfs_folio_trace_clear_s, "clear-s") \
+ EM(netfs_folio_trace_clear_cc, "clear-cc") \
EM(netfs_folio_trace_clear_g, "clear-g") \
- EM(netfs_folio_trace_copy_to_cache, "copy") \
- EM(netfs_folio_trace_end_copy, "end-copy") \
+ EM(netfs_folio_trace_clear_s, "clear-s") \
+ EM(netfs_folio_trace_copy_to_cache, "mark-copy") \
EM(netfs_folio_trace_filled_gaps, "filled-gaps") \
EM(netfs_folio_trace_kill, "kill") \
- EM(netfs_folio_trace_launder, "launder") \
+ EM(netfs_folio_trace_kill_cc, "kill-cc") \
+ EM(netfs_folio_trace_kill_g, "kill-g") \
+ EM(netfs_folio_trace_kill_s, "kill-s") \
EM(netfs_folio_trace_mkwrite, "mkwrite") \
EM(netfs_folio_trace_mkwrite_plus, "mkwrite+") \
+ EM(netfs_folio_trace_not_under_wback, "!wback") \
EM(netfs_folio_trace_read_gaps, "read-gaps") \
- EM(netfs_folio_trace_redirty, "redirty") \
EM(netfs_folio_trace_redirtied, "redirtied") \
EM(netfs_folio_trace_store, "store") \
+ EM(netfs_folio_trace_store_copy, "store-copy") \
EM(netfs_folio_trace_store_plus, "store+") \
EM(netfs_folio_trace_wthru, "wthru") \
E_(netfs_folio_trace_wthru_plus, "wthru+")
+#define netfs_collect_contig_traces \
+ EM(netfs_contig_trace_collect, "Collect") \
+ EM(netfs_contig_trace_jump, "-->JUMP-->") \
+ E_(netfs_contig_trace_unlock, "Unlock")
+
#ifndef __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
#define __NETFS_DECLARE_TRACE_ENUMS_ONCE_ONLY
@@ -158,6 +181,7 @@ enum netfs_failure { netfs_failures } __mode(byte);
enum netfs_rreq_ref_trace { netfs_rreq_ref_traces } __mode(byte);
enum netfs_sreq_ref_trace { netfs_sreq_ref_traces } __mode(byte);
enum netfs_folio_trace { netfs_folio_traces } __mode(byte);
+enum netfs_collect_contig_trace { netfs_collect_contig_traces } __mode(byte);
#endif
@@ -179,6 +203,7 @@ netfs_failures;
netfs_rreq_ref_traces;
netfs_sreq_ref_traces;
netfs_folio_traces;
+netfs_collect_contig_traces;
/*
* Now redefine the EM() and E_() macros to map the enums to the strings that
@@ -279,7 +304,7 @@ TRACE_EVENT(netfs_sreq,
__entry->start = sreq->start;
),
- TP_printk("R=%08x[%u] %s %s f=%02x s=%llx %zx/%zx e=%d",
+ TP_printk("R=%08x[%x] %s %s f=%02x s=%llx %zx/%zx e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__print_symbolic(__entry->what, netfs_sreq_traces),
@@ -319,7 +344,7 @@ TRACE_EVENT(netfs_failure,
__entry->start = sreq ? sreq->start : 0;
),
- TP_printk("R=%08x[%d] %s f=%02x s=%llx %zx/%zx %s e=%d",
+ TP_printk("R=%08x[%x] %s f=%02x s=%llx %zx/%zx %s e=%d",
__entry->rreq, __entry->index,
__print_symbolic(__entry->source, netfs_sreq_sources),
__entry->flags,
@@ -412,16 +437,18 @@ TRACE_EVENT(netfs_write_iter,
__field(unsigned long long, start )
__field(size_t, len )
__field(unsigned int, flags )
+ __field(unsigned int, ino )
),
TP_fast_assign(
__entry->start = iocb->ki_pos;
__entry->len = iov_iter_count(from);
+ __entry->ino = iocb->ki_filp->f_inode->i_ino;
__entry->flags = iocb->ki_flags;
),
- TP_printk("WRITE-ITER s=%llx l=%zx f=%x",
- __entry->start, __entry->len, __entry->flags)
+ TP_printk("WRITE-ITER i=%x s=%llx l=%zx f=%x",
+ __entry->ino, __entry->start, __entry->len, __entry->flags)
);
TRACE_EVENT(netfs_write,
@@ -433,9 +460,10 @@ TRACE_EVENT(netfs_write,
TP_STRUCT__entry(
__field(unsigned int, wreq )
__field(unsigned int, cookie )
+ __field(unsigned int, ino )
__field(enum netfs_write_trace, what )
__field(unsigned long long, start )
- __field(size_t, len )
+ __field(unsigned long long, len )
),
TP_fast_assign(
@@ -443,18 +471,213 @@ TRACE_EVENT(netfs_write,
struct fscache_cookie *__cookie = netfs_i_cookie(__ctx);
__entry->wreq = wreq->debug_id;
__entry->cookie = __cookie ? __cookie->debug_id : 0;
+ __entry->ino = wreq->inode->i_ino;
__entry->what = what;
__entry->start = wreq->start;
__entry->len = wreq->len;
),
- TP_printk("R=%08x %s c=%08x by=%llx-%llx",
+ TP_printk("R=%08x %s c=%08x i=%x by=%llx-%llx",
__entry->wreq,
__print_symbolic(__entry->what, netfs_write_traces),
__entry->cookie,
+ __entry->ino,
__entry->start, __entry->start + __entry->len - 1)
);
+TRACE_EVENT(netfs_collect,
+ TP_PROTO(const struct netfs_io_request *wreq),
+
+ TP_ARGS(wreq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned int, len )
+ __field(unsigned long long, transferred )
+ __field(unsigned long long, start )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->start = wreq->start;
+ __entry->len = wreq->len;
+ __entry->transferred = wreq->transferred;
+ ),
+
+ TP_printk("R=%08x s=%llx-%llx",
+ __entry->wreq,
+ __entry->start + __entry->transferred,
+ __entry->start + __entry->len)
+ );
+
+TRACE_EVENT(netfs_collect_contig,
+ TP_PROTO(const struct netfs_io_request *wreq, unsigned long long to,
+ enum netfs_collect_contig_trace type),
+
+ TP_ARGS(wreq, to, type),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq)
+ __field(enum netfs_collect_contig_trace, type)
+ __field(unsigned long long, contiguity)
+ __field(unsigned long long, to)
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->type = type;
+ __entry->contiguity = wreq->contiguity;
+ __entry->to = to;
+ ),
+
+ TP_printk("R=%08x %llx -> %llx %s",
+ __entry->wreq,
+ __entry->contiguity,
+ __entry->to,
+ __print_symbolic(__entry->type, netfs_collect_contig_traces))
+ );
+
+TRACE_EVENT(netfs_collect_sreq,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ const struct netfs_io_subrequest *subreq),
+
+ TP_ARGS(wreq, subreq),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned int, subreq )
+ __field(unsigned int, stream )
+ __field(unsigned int, len )
+ __field(unsigned int, transferred )
+ __field(unsigned long long, start )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->subreq = subreq->debug_index;
+ __entry->stream = subreq->stream_nr;
+ __entry->start = subreq->start;
+ __entry->len = subreq->len;
+ __entry->transferred = subreq->transferred;
+ ),
+
+ TP_printk("R=%08x[%u:%02x] s=%llx t=%x/%x",
+ __entry->wreq, __entry->stream, __entry->subreq,
+ __entry->start, __entry->transferred, __entry->len)
+ );
+
+TRACE_EVENT(netfs_collect_folio,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ const struct folio *folio,
+ unsigned long long fend,
+ unsigned long long collected_to),
+
+ TP_ARGS(wreq, folio, fend, collected_to),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned long, index )
+ __field(unsigned long long, fend )
+ __field(unsigned long long, cleaned_to )
+ __field(unsigned long long, collected_to )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->index = folio->index;
+ __entry->fend = fend;
+ __entry->cleaned_to = wreq->cleaned_to;
+ __entry->collected_to = collected_to;
+ ),
+
+ TP_printk("R=%08x ix=%05lx r=%llx-%llx t=%llx/%llx",
+ __entry->wreq, __entry->index,
+ (unsigned long long)__entry->index * PAGE_SIZE, __entry->fend,
+ __entry->cleaned_to, __entry->collected_to)
+ );
+
+TRACE_EVENT(netfs_collect_state,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ unsigned long long collected_to,
+ unsigned int notes),
+
+ TP_ARGS(wreq, collected_to, notes),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq )
+ __field(unsigned int, notes )
+ __field(unsigned long long, collected_to )
+ __field(unsigned long long, cleaned_to )
+ __field(unsigned long long, contiguity )
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->notes = notes;
+ __entry->collected_to = collected_to;
+ __entry->cleaned_to = wreq->cleaned_to;
+ __entry->contiguity = wreq->contiguity;
+ ),
+
+ TP_printk("R=%08x cto=%llx fto=%llx ctg=%llx n=%x",
+ __entry->wreq, __entry->collected_to,
+ __entry->cleaned_to, __entry->contiguity,
+ __entry->notes)
+ );
+
+TRACE_EVENT(netfs_collect_gap,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ const struct netfs_io_stream *stream,
+ unsigned long long jump_to, char type),
+
+ TP_ARGS(wreq, stream, jump_to, type),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq)
+ __field(unsigned char, stream)
+ __field(unsigned char, type)
+ __field(unsigned long long, from)
+ __field(unsigned long long, to)
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->stream = stream->stream_nr;
+ __entry->from = stream->collected_to;
+ __entry->to = jump_to;
+ __entry->type = type;
+ ),
+
+ TP_printk("R=%08x[%x:] %llx->%llx %c",
+ __entry->wreq, __entry->stream,
+ __entry->from, __entry->to, __entry->type)
+ );
+
+TRACE_EVENT(netfs_collect_stream,
+ TP_PROTO(const struct netfs_io_request *wreq,
+ const struct netfs_io_stream *stream),
+
+ TP_ARGS(wreq, stream),
+
+ TP_STRUCT__entry(
+ __field(unsigned int, wreq)
+ __field(unsigned char, stream)
+ __field(unsigned long long, collected_to)
+ __field(unsigned long long, front)
+ ),
+
+ TP_fast_assign(
+ __entry->wreq = wreq->debug_id;
+ __entry->stream = stream->stream_nr;
+ __entry->collected_to = stream->collected_to;
+ __entry->front = stream->front ? stream->front->start : UINT_MAX;
+ ),
+
+ TP_printk("R=%08x[%x:] cto=%llx frn=%llx",
+ __entry->wreq, __entry->stream,
+ __entry->collected_to, __entry->front)
+ );
+
#undef EM
#undef E_
#endif /* _TRACE_NETFS_H */
diff --git a/mm/filemap.c b/mm/filemap.c
index 30de18c4fd28..1d6b3a369077 100644
--- a/mm/filemap.c
+++ b/mm/filemap.c
@@ -1540,7 +1540,7 @@ EXPORT_SYMBOL(folio_end_private_2);
* folio_wait_private_2 - Wait for PG_private_2 to be cleared on a folio.
* @folio: The folio to wait on.
*
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio.
+ * Wait for PG_private_2 to be cleared on a folio.
*/
void folio_wait_private_2(struct folio *folio)
{
@@ -1553,8 +1553,8 @@ EXPORT_SYMBOL(folio_wait_private_2);
* folio_wait_private_2_killable - Wait for PG_private_2 to be cleared on a folio.
* @folio: The folio to wait on.
*
- * Wait for PG_private_2 (aka PG_fscache) to be cleared on a folio or until a
- * fatal signal is received by the calling task.
+ * Wait for PG_private_2 to be cleared on a folio or until a fatal signal is
+ * received by the calling task.
*
* Return:
* - 0 if successful.
@@ -4134,6 +4134,60 @@ bool filemap_release_folio(struct folio *folio, gfp_t gfp)
}
EXPORT_SYMBOL(filemap_release_folio);
+/**
+ * filemap_invalidate_inode - Invalidate/forcibly write back a range of an inode's pagecache
+ * @inode: The inode to flush
+ * @flush: Set to write back rather than simply invalidate.
+ * @start: First byte to in range.
+ * @end: Last byte in range (inclusive), or LLONG_MAX for everything from start
+ * onwards.
+ *
+ * Invalidate all the folios on an inode that contribute to the specified
+ * range, possibly writing them back first. Whilst the operation is
+ * undertaken, the invalidate lock is held to prevent new folios from being
+ * installed.
+ */
+int filemap_invalidate_inode(struct inode *inode, bool flush,
+ loff_t start, loff_t end)
+{
+ struct address_space *mapping = inode->i_mapping;
+ pgoff_t first = start >> PAGE_SHIFT;
+ pgoff_t last = end >> PAGE_SHIFT;
+ pgoff_t nr = end == LLONG_MAX ? ULONG_MAX : last - first + 1;
+
+ if (!mapping || !mapping->nrpages || end < start)
+ goto out;
+
+ /* Prevent new folios from being added to the inode. */
+ filemap_invalidate_lock(mapping);
+
+ if (!mapping->nrpages)
+ goto unlock;
+
+ unmap_mapping_pages(mapping, first, nr, false);
+
+ /* Write back the data if we're asked to. */
+ if (flush) {
+ struct writeback_control wbc = {
+ .sync_mode = WB_SYNC_ALL,
+ .nr_to_write = LONG_MAX,
+ .range_start = start,
+ .range_end = end,
+ };
+
+ filemap_fdatawrite_wbc(mapping, &wbc);
+ }
+
+ /* Wait for writeback to complete on all folios and discard. */
+ truncate_inode_pages_range(mapping, start, end);
+
+unlock:
+ filemap_invalidate_unlock(mapping);
+out:
+ return filemap_check_errors(mapping);
+}
+EXPORT_SYMBOL_GPL(filemap_invalidate_inode);
+
#ifdef CONFIG_CACHESTAT_SYSCALL
/**
* filemap_cachestat() - compute the page cache statistics of a mapping
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 3e19b87049db..06fc89d981e8 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -2546,6 +2546,7 @@ done:
folio_batch_release(&wbc->fbatch);
return NULL;
}
+EXPORT_SYMBOL_GPL(writeback_iter);
/**
* write_cache_pages - walk the list of dirty pages of the given address space and write all of them.
diff --git a/net/9p/Kconfig b/net/9p/Kconfig
index 00ebce9e5a65..bcdab9c23b40 100644
--- a/net/9p/Kconfig
+++ b/net/9p/Kconfig
@@ -5,6 +5,7 @@
menuconfig NET_9P
tristate "Plan 9 Resource Sharing Support (9P2000)"
+ select NETFS_SUPPORT
help
If you say Y here, you will get experimental support for
Plan 9 resource sharing via the 9P2000 protocol.
diff --git a/net/9p/client.c b/net/9p/client.c
index f7e90b4769bb..00774656eeac 100644
--- a/net/9p/client.c
+++ b/net/9p/client.c
@@ -18,6 +18,7 @@
#include <linux/sched/signal.h>
#include <linux/uaccess.h>
#include <linux/uio.h>
+#include <linux/netfs.h>
#include <net/9p/9p.h>
#include <linux/parser.h>
#include <linux/seq_file.h>
@@ -1661,6 +1662,54 @@ p9_client_write(struct p9_fid *fid, u64 offset, struct iov_iter *from, int *err)
}
EXPORT_SYMBOL(p9_client_write);
+void
+p9_client_write_subreq(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *wreq = subreq->rreq;
+ struct p9_fid *fid = wreq->netfs_priv;
+ struct p9_client *clnt = fid->clnt;
+ struct p9_req_t *req;
+ unsigned long long start = subreq->start + subreq->transferred;
+ int written, len = subreq->len - subreq->transferred;
+ int err;
+
+ p9_debug(P9_DEBUG_9P, ">>> TWRITE fid %d offset %llu len %d\n",
+ fid->fid, start, len);
+
+ /* Don't bother zerocopy for small IO (< 1024) */
+ if (clnt->trans_mod->zc_request && len > 1024) {
+ req = p9_client_zc_rpc(clnt, P9_TWRITE, NULL, &subreq->io_iter,
+ 0, wreq->len, P9_ZC_HDR_SZ, "dqd",
+ fid->fid, start, len);
+ } else {
+ req = p9_client_rpc(clnt, P9_TWRITE, "dqV", fid->fid,
+ start, len, &subreq->io_iter);
+ }
+ if (IS_ERR(req)) {
+ netfs_write_subrequest_terminated(subreq, PTR_ERR(req), false);
+ return;
+ }
+
+ err = p9pdu_readf(&req->rc, clnt->proto_version, "d", &written);
+ if (err) {
+ trace_9p_protocol_dump(clnt, &req->rc);
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, err, false);
+ return;
+ }
+
+ if (written > len) {
+ pr_err("bogus RWRITE count (%d > %u)\n", written, len);
+ written = len;
+ }
+
+ p9_debug(P9_DEBUG_9P, "<<< RWRITE count %d\n", len);
+
+ p9_req_put(clnt, req);
+ netfs_write_subrequest_terminated(subreq, written, false);
+}
+EXPORT_SYMBOL(p9_client_write_subreq);
+
struct p9_wstat *p9_client_stat(struct p9_fid *fid)
{
int err;