aboutsummaryrefslogtreecommitdiff
path: root/fs/ceph/addr.c
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ceph/addr.c')
-rw-r--r--fs/ceph/addr.c117
1 files changed, 76 insertions, 41 deletions
diff --git a/fs/ceph/addr.c b/fs/ceph/addr.c
index 8c16bc5250ef..5d9ccda098cc 100644
--- a/fs/ceph/addr.c
+++ b/fs/ceph/addr.c
@@ -13,6 +13,7 @@
#include <linux/iversion.h>
#include <linux/ktime.h>
#include <linux/netfs.h>
+#include <trace/events/netfs.h>
#include "super.h"
#include "mds_client.h"
@@ -205,21 +206,6 @@ static void ceph_netfs_expand_readahead(struct netfs_io_request *rreq)
}
}
-static bool ceph_netfs_clamp_length(struct netfs_io_subrequest *subreq)
-{
- struct inode *inode = subreq->rreq->inode;
- struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
- struct ceph_inode_info *ci = ceph_inode(inode);
- u64 objno, objoff;
- u32 xlen;
-
- /* Truncate the extent at the end of the current block */
- ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
- &objno, &objoff, &xlen);
- subreq->len = min(xlen, fsc->mount_options->rsize);
- return true;
-}
-
static void finish_netfs_read(struct ceph_osd_request *req)
{
struct inode *inode = req->r_inode;
@@ -246,7 +232,8 @@ static void finish_netfs_read(struct ceph_osd_request *req)
if (err >= 0) {
if (sparse && err > 0)
err = ceph_sparse_ext_map_end(op);
- if (err < subreq->len)
+ if (err < subreq->len &&
+ subreq->rreq->origin != NETFS_DIO_READ)
__set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
if (IS_ENCRYPTED(inode) && err > 0) {
err = ceph_fscrypt_decrypt_extents(inode,
@@ -263,7 +250,12 @@ static void finish_netfs_read(struct ceph_osd_request *req)
calc_pages_for(osd_data->alignment,
osd_data->length), false);
}
- netfs_subreq_terminated(subreq, err, false);
+ if (err > 0) {
+ subreq->transferred = err;
+ err = 0;
+ }
+ trace_netfs_sreq(subreq, netfs_sreq_trace_io_progress);
+ netfs_read_subreq_terminated(subreq, err, false);
iput(req->r_inode);
ceph_dec_osd_stopping_blocker(fsc->mdsc);
}
@@ -277,12 +269,12 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
struct ceph_mds_request *req;
struct ceph_mds_client *mdsc = ceph_sb_to_mdsc(inode->i_sb);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct iov_iter iter;
ssize_t err = 0;
size_t len;
int mode;
- __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
+ if (rreq->origin != NETFS_DIO_READ)
+ __set_bit(NETFS_SREQ_CLEAR_TAIL, &subreq->flags);
__clear_bit(NETFS_SREQ_COPY_TO_CACHE, &subreq->flags);
if (subreq->start >= inode->i_size)
@@ -299,6 +291,7 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
req->r_args.getattr.mask = cpu_to_le32(CEPH_STAT_CAP_INLINE_DATA);
req->r_num_caps = 2;
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
err = ceph_mdsc_do_request(mdsc, NULL, req);
if (err < 0)
goto out;
@@ -312,17 +305,36 @@ static bool ceph_netfs_issue_op_inline(struct netfs_io_subrequest *subreq)
}
len = min_t(size_t, iinfo->inline_len - subreq->start, subreq->len);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
- err = copy_to_iter(iinfo->inline_data + subreq->start, len, &iter);
- if (err == 0)
+ err = copy_to_iter(iinfo->inline_data + subreq->start, len, &subreq->io_iter);
+ if (err == 0) {
err = -EFAULT;
+ } else {
+ subreq->transferred += err;
+ err = 0;
+ }
ceph_mdsc_put_request(req);
out:
- netfs_subreq_terminated(subreq, err, false);
+ netfs_read_subreq_terminated(subreq, err, false);
return true;
}
+static int ceph_netfs_prepare_read(struct netfs_io_subrequest *subreq)
+{
+ struct netfs_io_request *rreq = subreq->rreq;
+ struct inode *inode = rreq->inode;
+ struct ceph_inode_info *ci = ceph_inode(inode);
+ struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
+ u64 objno, objoff;
+ u32 xlen;
+
+ /* Truncate the extent at the end of the current block */
+ ceph_calc_file_object_mapping(&ci->i_layout, subreq->start, subreq->len,
+ &objno, &objoff, &xlen);
+ rreq->io_streams[0].sreq_max_len = umin(xlen, fsc->mount_options->rsize);
+ return 0;
+}
+
static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
{
struct netfs_io_request *rreq = subreq->rreq;
@@ -332,9 +344,8 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct ceph_client *cl = fsc->client;
struct ceph_osd_request *req = NULL;
struct ceph_vino vino = ceph_vino(inode);
- struct iov_iter iter;
- int err = 0;
- u64 len = subreq->len;
+ int err;
+ u64 len;
bool sparse = IS_ENCRYPTED(inode) || ceph_test_mount_opt(fsc, SPARSEREAD);
u64 off = subreq->start;
int extent_cnt;
@@ -347,6 +358,12 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
if (ceph_has_inline_data(ci) && ceph_netfs_issue_op_inline(subreq))
return;
+ // TODO: This rounding here is slightly dodgy. It *should* work, for
+ // now, as the cache only deals in blocks that are a multiple of
+ // PAGE_SIZE and fscrypt blocks are at most PAGE_SIZE. What needs to
+ // happen is for the fscrypt driving to be moved into netfslib and the
+ // data in the cache also to be stored encrypted.
+ len = subreq->len;
ceph_fscrypt_adjust_off_and_len(inode, &off, &len);
req = ceph_osdc_new_request(&fsc->client->osdc, &ci->i_layout, vino,
@@ -369,8 +386,6 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
doutc(cl, "%llx.%llx pos=%llu orig_len=%zu len=%llu\n",
ceph_vinop(inode), subreq->start, subreq->len, len);
- iov_iter_xarray(&iter, ITER_DEST, &rreq->mapping->i_pages, subreq->start, len);
-
/*
* FIXME: For now, use CEPH_OSD_DATA_TYPE_PAGES instead of _ITER for
* encrypted inodes. We'd need infrastructure that handles an iov_iter
@@ -382,7 +397,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
struct page **pages;
size_t page_off;
- err = iov_iter_get_pages_alloc2(&iter, &pages, len, &page_off);
+ err = iov_iter_get_pages_alloc2(&subreq->io_iter, &pages, len, &page_off);
if (err < 0) {
doutc(cl, "%llx.%llx failed to allocate pages, %d\n",
ceph_vinop(inode), err);
@@ -397,7 +412,7 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
osd_req_op_extent_osd_data_pages(req, 0, pages, len, 0, false,
false);
} else {
- osd_req_op_extent_osd_iter(req, 0, &iter);
+ osd_req_op_extent_osd_iter(req, 0, &subreq->io_iter);
}
if (!ceph_inc_osd_stopping_blocker(fsc->mdsc)) {
err = -EIO;
@@ -408,22 +423,27 @@ static void ceph_netfs_issue_read(struct netfs_io_subrequest *subreq)
req->r_inode = inode;
ihold(inode);
+ trace_netfs_sreq(subreq, netfs_sreq_trace_submit);
ceph_osdc_start_request(req->r_osdc, req);
out:
ceph_osdc_put_request(req);
if (err)
- netfs_subreq_terminated(subreq, err, false);
+ netfs_read_subreq_terminated(subreq, err, false);
doutc(cl, "%llx.%llx result %d\n", ceph_vinop(inode), err);
}
static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
{
struct inode *inode = rreq->inode;
+ struct ceph_fs_client *fsc = ceph_inode_to_fs_client(inode);
struct ceph_client *cl = ceph_inode_to_client(inode);
int got = 0, want = CEPH_CAP_FILE_CACHE;
struct ceph_netfs_request_data *priv;
int ret = 0;
+ /* [DEPRECATED] Use PG_private_2 to mark folio being written to the cache. */
+ __set_bit(NETFS_RREQ_USE_PGPRIV2, &rreq->flags);
+
if (rreq->origin != NETFS_READAHEAD)
return 0;
@@ -467,6 +487,7 @@ static int ceph_init_request(struct netfs_io_request *rreq, struct file *file)
priv->caps = got;
rreq->netfs_priv = priv;
+ rreq->io_streams[0].sreq_max_len = fsc->mount_options->rsize;
out:
if (ret < 0)
@@ -491,13 +512,18 @@ static void ceph_netfs_free_request(struct netfs_io_request *rreq)
const struct netfs_request_ops ceph_netfs_ops = {
.init_request = ceph_init_request,
.free_request = ceph_netfs_free_request,
+ .prepare_read = ceph_netfs_prepare_read,
.issue_read = ceph_netfs_issue_read,
.expand_readahead = ceph_netfs_expand_readahead,
- .clamp_length = ceph_netfs_clamp_length,
.check_write_begin = ceph_netfs_check_write_begin,
};
#ifdef CONFIG_CEPH_FSCACHE
+static void ceph_set_page_fscache(struct page *page)
+{
+ folio_start_private_2(page_folio(page)); /* [DEPRECATED] */
+}
+
static void ceph_fscache_write_terminated(void *priv, ssize_t error, bool was_async)
{
struct inode *inode = priv;
@@ -515,6 +541,10 @@ static void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, b
ceph_fscache_write_terminated, inode, true, caching);
}
#else
+static inline void ceph_set_page_fscache(struct page *page)
+{
+}
+
static inline void ceph_fscache_write_to_cache(struct inode *inode, u64 off, u64 len, bool caching)
{
}
@@ -706,6 +736,8 @@ static int writepage_nounlock(struct page *page, struct writeback_control *wbc)
len = wlen;
set_page_writeback(page);
+ if (caching)
+ ceph_set_page_fscache(page);
ceph_fscache_write_to_cache(inode, page_off, len, caching);
if (IS_ENCRYPTED(inode)) {
@@ -789,6 +821,8 @@ static int ceph_writepage(struct page *page, struct writeback_control *wbc)
return AOP_WRITEPAGE_ACTIVATE;
}
+ folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
+
err = writepage_nounlock(page, wbc);
if (err == -ERESTARTSYS) {
/* direct memory reclaimer was killed by SIGKILL. return 0
@@ -1062,7 +1096,8 @@ get_more_pages:
unlock_page(page);
break;
}
- if (PageWriteback(page)) {
+ if (PageWriteback(page) ||
+ PagePrivate2(page) /* [DEPRECATED] */) {
if (wbc->sync_mode == WB_SYNC_NONE) {
doutc(cl, "%p under writeback\n", page);
unlock_page(page);
@@ -1070,6 +1105,7 @@ get_more_pages:
}
doutc(cl, "waiting on writeback %p\n", page);
wait_on_page_writeback(page);
+ folio_wait_private_2(page_folio(page)); /* [DEPRECATED] */
}
if (!clear_page_dirty_for_io(page)) {
@@ -1254,6 +1290,8 @@ new_request:
}
set_page_writeback(page);
+ if (caching)
+ ceph_set_page_fscache(page);
len += thp_size(page);
}
ceph_fscache_write_to_cache(inode, offset, len, caching);
@@ -1486,20 +1524,18 @@ static int ceph_netfs_check_write_begin(struct file *file, loff_t pos, unsigned
*/
static int ceph_write_begin(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len,
- struct page **pagep, void **fsdata)
+ struct folio **foliop, void **fsdata)
{
struct inode *inode = file_inode(file);
struct ceph_inode_info *ci = ceph_inode(inode);
- struct folio *folio = NULL;
int r;
- r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, &folio, NULL);
+ r = netfs_write_begin(&ci->netfs, file, inode->i_mapping, pos, len, foliop, NULL);
if (r < 0)
return r;
- folio_wait_private_2(folio); /* [DEPRECATED] */
- WARN_ON_ONCE(!folio_test_locked(folio));
- *pagep = &folio->page;
+ folio_wait_private_2(*foliop); /* [DEPRECATED] */
+ WARN_ON_ONCE(!folio_test_locked(*foliop));
return 0;
}
@@ -1509,9 +1545,8 @@ static int ceph_write_begin(struct file *file, struct address_space *mapping,
*/
static int ceph_write_end(struct file *file, struct address_space *mapping,
loff_t pos, unsigned len, unsigned copied,
- struct page *subpage, void *fsdata)
+ struct folio *folio, void *fsdata)
{
- struct folio *folio = page_folio(subpage);
struct inode *inode = file_inode(file);
struct ceph_client *cl = ceph_inode_to_client(inode);
bool check_cap = false;