From bb05a617f06b7a882e19c4f475b8e37f14d9ceac Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 24 May 2023 17:27:08 -0400 Subject: [PATCH 01/22] NFSv4.2: Fix READ_PLUS smatch warnings Smatch reports: fs/nfs/nfs42xdr.c:1131 decode_read_plus() warn: missing error code? 'status' Which Dan suggests to fix by doing a hardcoded "return 0" from the "if (segments == 0)" check. Additionally, smatch reports that the "status = -EIO" assignment is not used. This patch addresses both these issues. Reported-by: kernel test robot Reported-by: Dan Carpenter Closes: https://lore.kernel.org/r/202305222209.6l5VM2lL-lkp@intel.com/ Fixes: d3b00a802c845 ("NFS: Replace the READ_PLUS decoding code") Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 95234208dc9e..d0919c5bf61c 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1056,13 +1056,12 @@ static int decode_read_plus(struct xdr_stream *xdr, struct nfs_pgio_res *res) res->eof = be32_to_cpup(p++); segments = be32_to_cpup(p++); if (segments == 0) - return status; + return 0; segs = kmalloc_array(segments, sizeof(*segs), GFP_KERNEL); if (!segs) return -ENOMEM; - status = -EIO; for (i = 0; i < segments; i++) { status = decode_read_plus_segment(xdr, &segs[i]); if (status < 0) From 8d18f6c5bb864d97a730f471c56cdecf313efe64 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 31 May 2023 17:02:54 -0400 Subject: [PATCH 02/22] NFSv4.2: Fix READ_PLUS size calculations I bump the decode_read_plus_maxsz to account for hole segments, but I need to subtract out this increase when calling rpc_prepare_reply_pages() so the common case of single data segment replies can be directly placed into the xdr pages without needing to be shifted around. Reported-by: Chuck Lever Fixes: d3b00a802c845 ("NFS: Replace the READ_PLUS decoding code") Signed-off-by: Anna Schumaker --- fs/nfs/nfs42xdr.c | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index d0919c5bf61c..78193f04d892 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -54,10 +54,16 @@ (1 /* data_content4 */ + \ 2 /* data_info4.di_offset */ + \ 1 /* data_info4.di_length */) +#define NFS42_READ_PLUS_HOLE_SEGMENT_SIZE \ + (1 /* data_content4 */ + \ + 2 /* data_info4.di_offset */ + \ + 2 /* data_info4.di_length */) +#define READ_PLUS_SEGMENT_SIZE_DIFF (NFS42_READ_PLUS_HOLE_SEGMENT_SIZE - \ + NFS42_READ_PLUS_DATA_SEGMENT_SIZE) #define decode_read_plus_maxsz (op_decode_hdr_maxsz + \ 1 /* rpr_eof */ + \ 1 /* rpr_contents count */ + \ - NFS42_READ_PLUS_DATA_SEGMENT_SIZE) + NFS42_READ_PLUS_HOLE_SEGMENT_SIZE) #define encode_seek_maxsz (op_encode_hdr_maxsz + \ encode_stateid_maxsz + \ 2 /* offset */ + \ @@ -617,8 +623,8 @@ static void nfs4_xdr_enc_read_plus(struct rpc_rqst *req, encode_putfh(xdr, args->fh, &hdr); encode_read_plus(xdr, args, &hdr); - rpc_prepare_reply_pages(req, args->pages, args->pgbase, - args->count, hdr.replen); + rpc_prepare_reply_pages(req, args->pages, args->pgbase, args->count, + hdr.replen - READ_PLUS_SEGMENT_SIZE_DIFF); encode_nops(&hdr); } From 303a78052091c81e9003915c521fdca1c7e117af Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 9 Jun 2023 15:26:25 -0400 Subject: [PATCH 03/22] NFSv4.2: Rework scratch handling for READ_PLUS (again) I found that the read code might send multiple requests using the same nfs_pgio_header, but nfs4_proc_read_setup() is only called once. This is how we ended up occasionally double-freeing the scratch buffer, but also means we set a NULL pointer but non-zero length to the xdr scratch buffer. This results in an oops the first time decoding needs to copy something to scratch, which frequently happens when decoding READ_PLUS hole segments. I fix this by moving scratch handling into the pageio read code. I provide a function to allocate scratch space for decoding read replies, and free the scratch buffer when the nfs_pgio_header is freed. Fixes: fbd2a05f29a9 (NFSv4.2: Rework scratch handling for READ_PLUS) Signed-off-by: Anna Schumaker --- fs/nfs/internal.h | 1 + fs/nfs/nfs42.h | 1 + fs/nfs/nfs42xdr.c | 2 +- fs/nfs/nfs4proc.c | 13 +------------ fs/nfs/read.c | 10 ++++++++++ 5 files changed, 14 insertions(+), 13 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 913c09806c7f..41abea340ad8 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -493,6 +493,7 @@ extern const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; extern void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, struct inode *inode, bool force_mds, const struct nfs_pgio_completion_ops *compl_ops); +extern bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size); extern int nfs_read_add_folio(struct nfs_pageio_descriptor *pgio, struct nfs_open_context *ctx, struct folio *folio); diff --git a/fs/nfs/nfs42.h b/fs/nfs/nfs42.h index 0fe5aacbcfdf..b59876b01a1e 100644 --- a/fs/nfs/nfs42.h +++ b/fs/nfs/nfs42.h @@ -13,6 +13,7 @@ * more? Need to consider not to pre-alloc too much for a compound. */ #define PNFS_LAYOUTSTATS_MAXDEV (4) +#define READ_PLUS_SCRATCH_SIZE (16) /* nfs4.2proc.c */ #ifdef CONFIG_NFS_V4_2 diff --git a/fs/nfs/nfs42xdr.c b/fs/nfs/nfs42xdr.c index 78193f04d892..9e3ae53e2205 100644 --- a/fs/nfs/nfs42xdr.c +++ b/fs/nfs/nfs42xdr.c @@ -1433,7 +1433,7 @@ static int nfs4_xdr_dec_read_plus(struct rpc_rqst *rqstp, struct compound_hdr hdr; int status; - xdr_set_scratch_buffer(xdr, res->scratch, sizeof(res->scratch)); + xdr_set_scratch_buffer(xdr, res->scratch, READ_PLUS_SCRATCH_SIZE); status = decode_compound_hdr(xdr, &hdr); if (status) diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 832fa226b8f2..3c24c3c99e8a 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -5438,18 +5438,8 @@ static bool nfs4_read_plus_not_supported(struct rpc_task *task, return false; } -static inline void nfs4_read_plus_scratch_free(struct nfs_pgio_header *hdr) -{ - if (hdr->res.scratch) { - kfree(hdr->res.scratch); - hdr->res.scratch = NULL; - } -} - static int nfs4_read_done(struct rpc_task *task, struct nfs_pgio_header *hdr) { - nfs4_read_plus_scratch_free(hdr); - if (!nfs4_sequence_done(task, &hdr->res.seq_res)) return -EAGAIN; if (nfs4_read_stateid_changed(task, &hdr->args)) @@ -5469,8 +5459,7 @@ static bool nfs42_read_plus_support(struct nfs_pgio_header *hdr, /* Note: We don't use READ_PLUS with pNFS yet */ if (nfs_server_capable(hdr->inode, NFS_CAP_READ_PLUS) && !hdr->ds_clp) { msg->rpc_proc = &nfs4_procedures[NFSPROC4_CLNT_READ_PLUS]; - hdr->res.scratch = kmalloc(32, GFP_KERNEL); - return hdr->res.scratch != NULL; + return nfs_read_alloc_scratch(hdr, READ_PLUS_SCRATCH_SIZE); } return false; } diff --git a/fs/nfs/read.c b/fs/nfs/read.c index f71eeee67e20..7dc21a48e3e7 100644 --- a/fs/nfs/read.c +++ b/fs/nfs/read.c @@ -47,6 +47,8 @@ static struct nfs_pgio_header *nfs_readhdr_alloc(void) static void nfs_readhdr_free(struct nfs_pgio_header *rhdr) { + if (rhdr->res.scratch != NULL) + kfree(rhdr->res.scratch); kmem_cache_free(nfs_rdata_cachep, rhdr); } @@ -108,6 +110,14 @@ void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) } EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); +bool nfs_read_alloc_scratch(struct nfs_pgio_header *hdr, size_t size) +{ + WARN_ON(hdr->res.scratch != NULL); + hdr->res.scratch = kmalloc(size, GFP_KERNEL); + return hdr->res.scratch != NULL; +} +EXPORT_SYMBOL_GPL(nfs_read_alloc_scratch); + static void nfs_readpage_release(struct nfs_page *req, int error) { struct folio *folio = nfs_page_to_folio(req); From 61182c796d74f54ba66d17bac6f516183ec09af2 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Fri, 23 Jun 2023 11:43:14 -0400 Subject: [PATCH 04/22] SUNRPC: kmap() the xdr pages during decode If the pages are in HIGHMEM then we need to make sure they're mapped before trying to read data off of them, otherwise we could end up with a NULL pointer dereference. The downside to this is that we need an extra cleanup step at the end of decode to kunmap() the last page. I introduced an xdr_finish_decode() function to do this. Right now this function only calls the unmap_current_page() function, but other generic cleanup steps could be added in the future if we come across anything else. Reported-by: Krzysztof Kozlowski Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 2 ++ net/sunrpc/clnt.c | 1 + net/sunrpc/svc.c | 2 ++ net/sunrpc/xdr.c | 27 ++++++++++++++++++++++++++- 4 files changed, 31 insertions(+), 1 deletion(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index f89ec4b5ea16..adc844db1ea5 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -224,6 +224,7 @@ struct xdr_stream { struct kvec *iov; /* pointer to the current kvec */ struct kvec scratch; /* Scratch buffer */ struct page **page_ptr; /* pointer to the current page */ + void *page_kaddr; /* kmapped address of the current page */ unsigned int nwords; /* Remaining decode buffer length */ struct rpc_rqst *rqst; /* For debugging */ @@ -255,6 +256,7 @@ extern void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst); extern void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, struct page **pages, unsigned int len); +extern void xdr_finish_decode(struct xdr_stream *xdr); extern __be32 *xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes); extern unsigned int xdr_read_pages(struct xdr_stream *xdr, unsigned int len); extern void xdr_enter_page(struct xdr_stream *xdr, unsigned int len); diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index d7c697af3762..ca2c6efe19c9 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -2602,6 +2602,7 @@ call_decode(struct rpc_task *task) case 0: task->tk_action = rpc_exit_task; task->tk_status = rpcauth_unwrap_resp(task, &xdr); + xdr_finish_decode(&xdr); return; case -EAGAIN: task->tk_status = 0; diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c index 587811a002c9..a864414ce811 100644 --- a/net/sunrpc/svc.c +++ b/net/sunrpc/svc.c @@ -1370,6 +1370,8 @@ svc_process_common(struct svc_rqst *rqstp) rc = process.dispatch(rqstp); if (procp->pc_release) procp->pc_release(rqstp); + xdr_finish_decode(xdr); + if (!rc) goto dropit; if (rqstp->rq_auth_stat != rpc_auth_ok) diff --git a/net/sunrpc/xdr.c b/net/sunrpc/xdr.c index 2a22e78af116..f5011344dfe7 100644 --- a/net/sunrpc/xdr.c +++ b/net/sunrpc/xdr.c @@ -1288,6 +1288,14 @@ static unsigned int xdr_set_tail_base(struct xdr_stream *xdr, return xdr_set_iov(xdr, buf->tail, base, len); } +static void xdr_stream_unmap_current_page(struct xdr_stream *xdr) +{ + if (xdr->page_kaddr) { + kunmap_local(xdr->page_kaddr); + xdr->page_kaddr = NULL; + } +} + static unsigned int xdr_set_page_base(struct xdr_stream *xdr, unsigned int base, unsigned int len) { @@ -1305,12 +1313,18 @@ static unsigned int xdr_set_page_base(struct xdr_stream *xdr, if (len > maxlen) len = maxlen; + xdr_stream_unmap_current_page(xdr); xdr_stream_page_set_pos(xdr, base); base += xdr->buf->page_base; pgnr = base >> PAGE_SHIFT; xdr->page_ptr = &xdr->buf->pages[pgnr]; - kaddr = page_address(*xdr->page_ptr); + + if (PageHighMem(*xdr->page_ptr)) { + xdr->page_kaddr = kmap_local_page(*xdr->page_ptr); + kaddr = xdr->page_kaddr; + } else + kaddr = page_address(*xdr->page_ptr); pgoff = base & ~PAGE_MASK; xdr->p = (__be32*)(kaddr + pgoff); @@ -1364,6 +1378,7 @@ void xdr_init_decode(struct xdr_stream *xdr, struct xdr_buf *buf, __be32 *p, struct rpc_rqst *rqst) { xdr->buf = buf; + xdr->page_kaddr = NULL; xdr_reset_scratch_buffer(xdr); xdr->nwords = XDR_QUADLEN(buf->len); if (xdr_set_iov(xdr, buf->head, 0, buf->len) == 0 && @@ -1396,6 +1411,16 @@ void xdr_init_decode_pages(struct xdr_stream *xdr, struct xdr_buf *buf, } EXPORT_SYMBOL_GPL(xdr_init_decode_pages); +/** + * xdr_finish_decode - Clean up the xdr_stream after decoding data. + * @xdr: pointer to xdr_stream struct + */ +void xdr_finish_decode(struct xdr_stream *xdr) +{ + xdr_stream_unmap_current_page(xdr); +} +EXPORT_SYMBOL(xdr_finish_decode); + static __be32 * __xdr_inline_decode(struct xdr_stream *xdr, size_t nbytes) { unsigned int nwords = XDR_QUADLEN(nbytes); From 9cf2744d249144fc0fe17667b56da78216678378 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Mon, 17 Jul 2023 16:33:14 -0400 Subject: [PATCH 05/22] NFS: Enable the READ_PLUS operation by default Now that the remaining issues have been worked out, including some unexpected 32 bit issues, we can safely enable the feature by default. Signed-off-by: Anna Schumaker --- fs/nfs/Kconfig | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/fs/nfs/Kconfig b/fs/nfs/Kconfig index b6fc169be1b1..7df2503cef6c 100644 --- a/fs/nfs/Kconfig +++ b/fs/nfs/Kconfig @@ -209,8 +209,6 @@ config NFS_DISABLE_UDP_SUPPORT config NFS_V4_2_READ_PLUS bool "NFS: Enable support for the NFSv4.2 READ_PLUS operation" depends on NFS_V4_2 - default n + default y help - This is intended for developers only. The READ_PLUS operation has - been shown to have issues under specific conditions and should not - be used in production. + Choose Y here to enable use of the NFS v4.2 READ_PLUS operation. From f9597ba8872a8f79f97b712ca098ffec841a374c Mon Sep 17 00:00:00 2001 From: Yue Haibing Date: Sat, 29 Jul 2023 20:31:52 +0800 Subject: [PATCH 06/22] xprtrdma: Remove unused function declaration rpcrdma_bc_post_recv() rpcrdma_bc_post_recv() is never implemented since introduction in commit f531a5dbc451 ("xprtrdma: Pre-allocate backward rpc_rqst and send/receive buffers"). Signed-off-by: Yue Haibing Reviewed-by: Chuck Lever Signed-off-by: Anna Schumaker --- net/sunrpc/xprtrdma/xprt_rdma.h | 1 - 1 file changed, 1 deletion(-) diff --git a/net/sunrpc/xprtrdma/xprt_rdma.h b/net/sunrpc/xprtrdma/xprt_rdma.h index 5e5ff6784ef5..da409450dfc0 100644 --- a/net/sunrpc/xprtrdma/xprt_rdma.h +++ b/net/sunrpc/xprtrdma/xprt_rdma.h @@ -593,7 +593,6 @@ void xprt_rdma_cleanup(void); int xprt_rdma_bc_setup(struct rpc_xprt *, unsigned int); size_t xprt_rdma_bc_maxpayload(struct rpc_xprt *); unsigned int xprt_rdma_bc_max_slots(struct rpc_xprt *); -int rpcrdma_bc_post_recv(struct rpcrdma_xprt *, unsigned int); void rpcrdma_bc_receive_call(struct rpcrdma_xprt *, struct rpcrdma_rep *); int xprt_rdma_bc_send_reply(struct rpc_rqst *rqst); void xprt_rdma_bc_free_rqst(struct rpc_rqst *); From e87cf8a28e7592bd19064e8181324ae26bc02932 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 30 Jun 2023 12:46:53 +0300 Subject: [PATCH 07/22] SUNRPC: clean up integer overflow check This integer overflow check works as intended but Clang and GCC and warn about it when compiling with W=1. include/linux/sunrpc/xdr.h:539:17: error: comparison is always false due to limited range of data type [-Werror=type-limits] Use size_mul() to prevent the integer overflow. It silences the warning and it's cleaner as well. Reported-by: Dmitry Antipov Closes: https://lore.kernel.org/all/20230601143332.255312-1-dmantipov@yandex.ru/ Signed-off-by: Dan Carpenter Acked-by: Jeff Layton Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xdr.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/include/linux/sunrpc/xdr.h b/include/linux/sunrpc/xdr.h index adc844db1ea5..68915180a29c 100644 --- a/include/linux/sunrpc/xdr.h +++ b/include/linux/sunrpc/xdr.h @@ -777,9 +777,7 @@ xdr_stream_decode_uint32_array(struct xdr_stream *xdr, if (unlikely(xdr_stream_decode_u32(xdr, &len) < 0)) return -EBADMSG; - if (len > SIZE_MAX / sizeof(*p)) - return -EBADMSG; - p = xdr_inline_decode(xdr, len * sizeof(*p)); + p = xdr_inline_decode(xdr, size_mul(len, sizeof(*p))); if (unlikely(!p)) return -EBADMSG; if (array == NULL) From 08be82ba0cffdfa15ce2e2c312cb704823971862 Mon Sep 17 00:00:00 2001 From: GUO Zihua Date: Fri, 18 Aug 2023 11:31:02 +0800 Subject: [PATCH 08/22] NFS: Move common includes outside ifdef module.h, clnt.h, addr.h and dns_resolve.h is always included whether CONFIG_NFS_USE_KERNEL_DNS is set or not and their order does not seems to matter. Move them outside the ifdef to simplify code and avoid checkincludes message. Signed-off-by: GUO Zihua Signed-off-by: Anna Schumaker --- fs/nfs/dns_resolve.c | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/fs/nfs/dns_resolve.c b/fs/nfs/dns_resolve.c index 6603b5cee029..714975e5c0db 100644 --- a/fs/nfs/dns_resolve.c +++ b/fs/nfs/dns_resolve.c @@ -7,14 +7,16 @@ * Resolves DNS hostnames into valid ip addresses */ -#ifdef CONFIG_NFS_USE_KERNEL_DNS - #include #include #include -#include + #include "dns_resolve.h" +#ifdef CONFIG_NFS_USE_KERNEL_DNS + +#include + ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, struct sockaddr_storage *ss, size_t salen) { @@ -35,7 +37,6 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #else -#include #include #include #include @@ -43,15 +44,12 @@ ssize_t nfs_dns_resolve_name(struct net *net, char *name, size_t namelen, #include #include #include -#include -#include #include #include #include #include #include "nfs4_fs.h" -#include "dns_resolve.h" #include "cache_lib.h" #include "netns.h" From 96562c45af5c31b89a197af28f79bfa838fb8391 Mon Sep 17 00:00:00 2001 From: Fedor Pchelkin Date: Thu, 20 Jul 2023 18:37:51 +0300 Subject: [PATCH 09/22] NFSv4/pnfs: minor fix for cleanup path in nfs4_get_device_info It is an almost improbable error case but when page allocating loop in nfs4_get_device_info() fails then we should only free the already allocated pages, as __free_page() can't deal with NULL arguments. Found by Linux Verification Center (linuxtesting.org). Cc: stable@vger.kernel.org Signed-off-by: Fedor Pchelkin Reviewed-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_dev.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs_dev.c b/fs/nfs/pnfs_dev.c index ddbbf4fcda86..178001c90156 100644 --- a/fs/nfs/pnfs_dev.c +++ b/fs/nfs/pnfs_dev.c @@ -154,7 +154,7 @@ nfs4_get_device_info(struct nfs_server *server, set_bit(NFS_DEVICEID_NOCACHE, &d->flags); out_free_pages: - for (i = 0; i < max_pages; i++) + while (--i >= 0) __free_page(pages[i]); kfree(pages); out_free_pdev: From a841c9cb9b04b05525f0928633e84e95921ab298 Mon Sep 17 00:00:00 2001 From: "huzhi001@208suo.com" Date: Wed, 19 Jul 2023 19:00:38 +0800 Subject: [PATCH 10/22] filemap: Fix errors in file.c The following checkpatch errors are removed: ERROR: "foo * bar" should be "foo *bar" "foo * bar" should be "foo *bar" Signed-off-by: ZhiHu Signed-off-by: Anna Schumaker --- fs/nfs/file.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/file.c b/fs/nfs/file.c index 79b1b3fcd3fc..3f9768810427 100644 --- a/fs/nfs/file.c +++ b/fs/nfs/file.c @@ -200,7 +200,7 @@ nfs_file_splice_read(struct file *in, loff_t *ppos, struct pipe_inode_info *pipe EXPORT_SYMBOL_GPL(nfs_file_splice_read); int -nfs_file_mmap(struct file * file, struct vm_area_struct * vma) +nfs_file_mmap(struct file *file, struct vm_area_struct *vma) { struct inode *inode = file_inode(file); int status; From 08b45fcb2d4675f6182fe0edc0d8b1fe604051fa Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Mon, 24 Jul 2023 11:08:46 +0300 Subject: [PATCH 11/22] nfs/blocklayout: Use the passed in gfp flags This allocation should use the passed in GFP_ flags instead of GFP_KERNEL. One places where this matters is in filelayout_pg_init_write() which uses GFP_NOFS as the allocation flags. Fixes: 5c83746a0cf2 ("pnfs/blocklayout: in-kernel GETDEVICEINFO XDR parsing") Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Signed-off-by: Anna Schumaker --- fs/nfs/blocklayout/dev.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/blocklayout/dev.c b/fs/nfs/blocklayout/dev.c index 70f5563a8e81..65cbb5607a5f 100644 --- a/fs/nfs/blocklayout/dev.c +++ b/fs/nfs/blocklayout/dev.c @@ -404,7 +404,7 @@ bl_parse_concat(struct nfs_server *server, struct pnfs_block_dev *d, int ret, i; d->children = kcalloc(v->concat.volumes_count, - sizeof(struct pnfs_block_dev), GFP_KERNEL); + sizeof(struct pnfs_block_dev), gfp_mask); if (!d->children) return -ENOMEM; @@ -433,7 +433,7 @@ bl_parse_stripe(struct nfs_server *server, struct pnfs_block_dev *d, int ret, i; d->children = kcalloc(v->stripe.volumes_count, - sizeof(struct pnfs_block_dev), GFP_KERNEL); + sizeof(struct pnfs_block_dev), gfp_mask); if (!d->children) return -ENOMEM; From 14e7316a3c73cf45cef4422549f3585fc1c53521 Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Fri, 21 Jul 2023 09:23:00 +0800 Subject: [PATCH 12/22] nfs: fix redundant readdir request after get eof When a directory contains 17 files (except . and ..), nfs client sends a redundant readdir request after get eof. A simple reproduce, At NFS server, create a directory with 17 files under exported directory. # mkdir test # cd test # for i in {0..16} ; do touch $i; done At NFS client, no matter mounting through nfsv3 or nfsv4, does ls (or ll) at the created test directory. A tshark output likes following (for nfsv4), # tshark -i eth0 tcp port 2049 -Tfields -e ip.src -e ip.dst -e nfs -e nfs.cookie4 srcip dstip SEQUENCE, PUTFH, READDIR 0 dstip srcip SEQUENCE PUTFH READDIR 909539109313539306,2108391201987888856,2305312124304486544,2566335452463141496,2978225129081509984,4263037479923412583,4304697173036510679,4666703455469210097,4759208201298769007,4776701232145978803,5338408478512081262,5949498658935544804,5971526429894832903,6294060338267709855,6528840566229532529,8600463293536422524,9223372036854775807 srcip dstip srcip dstip SEQUENCE, PUTFH, READDIR 9223372036854775807 dstip srcip SEQUENCE PUTFH READDIR The READDIR with cookie 9223372036854775807(0x7FFFFFFFFFFFFFFF) is redundant. Reviewed-by: Benjamin Coddington Signed-off-by: Kinglong Mee Signed-off-by: Anna Schumaker --- fs/nfs/dir.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fs/nfs/dir.c b/fs/nfs/dir.c index 8f3112e71a6a..e6a51fd94fea 100644 --- a/fs/nfs/dir.c +++ b/fs/nfs/dir.c @@ -1089,6 +1089,17 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, for (i = desc->cache_entry_index; i < array->size; i++) { struct nfs_cache_array_entry *ent; + /* + * nfs_readdir_handle_cache_misses return force clear at + * (cache_misses > NFS_READDIR_CACHE_MISS_THRESHOLD) for + * readdir heuristic, NFS_READDIR_CACHE_MISS_THRESHOLD + 1 + * entries need be emitted here. + */ + if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 2) { + desc->eob = true; + break; + } + ent = &array->array[i]; if (!dir_emit(desc->ctx, ent->name, ent->name_len, nfs_compat_user_ino64(ent->ino), ent->d_type)) { @@ -1107,10 +1118,6 @@ static void nfs_do_filldir(struct nfs_readdir_descriptor *desc, desc->ctx->pos = desc->dir_cookie; else desc->ctx->pos++; - if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) { - desc->eob = true; - break; - } } if (array->folio_is_eof) desc->eof = !desc->eob; From 88975a55969e11f26fe3846bf4fbf8e7dc8cbbd4 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:22:14 -0400 Subject: [PATCH 13/22] NFS: Fix a potential data corruption We must ensure that the subrequests are joined back into the head before we can retransmit a request. If the head was not on the commit lists, because the server wrote it synchronously, we still need to add it back to the retransmission list. Add a call that mirrors the effect of nfs_cancel_remove_inode() for O_DIRECT. Fixes: ed5d588fe47f ("NFS: Try to join page groups before an O_DIRECT retransmission") Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/direct.c | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) diff --git a/fs/nfs/direct.c b/fs/nfs/direct.c index aaffaaa336cc..47d892a1d363 100644 --- a/fs/nfs/direct.c +++ b/fs/nfs/direct.c @@ -472,13 +472,31 @@ ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, return result; } +static void nfs_direct_add_page_head(struct list_head *list, + struct nfs_page *req) +{ + struct nfs_page *head = req->wb_head; + + if (!list_empty(&head->wb_list) || !nfs_lock_request(head)) + return; + if (!list_empty(&head->wb_list)) { + nfs_unlock_request(head); + return; + } + list_add(&head->wb_list, list); + kref_get(&head->wb_kref); + kref_get(&head->wb_kref); +} + static void nfs_direct_join_group(struct list_head *list, struct inode *inode) { struct nfs_page *req, *subreq; list_for_each_entry(req, list, wb_list) { - if (req->wb_head != req) + if (req->wb_head != req) { + nfs_direct_add_page_head(&req->wb_list, req); continue; + } subreq = req->wb_this_page; if (subreq == req) continue; From 3a107f07403aa3bf0d604996a30922812e30f8a9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:32:21 -0400 Subject: [PATCH 14/22] SUNRPC: Set the TCP_SYNCNT to match the socket timeout Set the TCP SYN count so that we abort the connection attempt at around the expected timeout value. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 9f010369100a..47d0b6a8c32e 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2230,9 +2230,13 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, struct socket *sock) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct net *net = sock_net(sock->sk); + unsigned long connect_timeout; + unsigned long syn_retries; unsigned int keepidle; unsigned int keepcnt; unsigned int timeo; + unsigned long t; spin_lock(&xprt->transport_lock); keepidle = DIV_ROUND_UP(xprt->timeout->to_initval, HZ); @@ -2250,6 +2254,16 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, /* TCP user timeout (see RFC5482) */ tcp_sock_set_user_timeout(sock->sk, timeo); + + /* Connect timeout */ + connect_timeout = max_t(unsigned long, + DIV_ROUND_UP(xprt->connect_timeout, HZ), 1); + syn_retries = max_t(unsigned long, + READ_ONCE(net->ipv4.sysctl_tcp_syn_retries), 1); + for (t = 0; t <= syn_retries && (1UL << t) < connect_timeout; t++) + ; + if (t <= syn_retries) + tcp_sock_set_syncnt(sock->sk, t - 1); } static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, From 3e6ff89d2e4b605d7064686e6d8991b6f780df3f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:32:22 -0400 Subject: [PATCH 15/22] SUNRPC: Refactor and simplify connect timeout Instead of requiring the requests to redrive the connection several times, just let the TCP connect code manage it now that we've adjusted the TCP_SYNCNT value. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/xprtsock.c | 34 +++++++++++++++++++++------------- 1 file changed, 21 insertions(+), 13 deletions(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 47d0b6a8c32e..e558f0024fe5 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2266,6 +2266,25 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt, tcp_sock_set_syncnt(sock->sk, t - 1); } +static void xs_tcp_do_set_connect_timeout(struct rpc_xprt *xprt, + unsigned long connect_timeout) +{ + struct sock_xprt *transport = + container_of(xprt, struct sock_xprt, xprt); + struct rpc_timeout to; + unsigned long initval; + + memcpy(&to, xprt->timeout, sizeof(to)); + /* Arbitrary lower limit */ + initval = max_t(unsigned long, connect_timeout, XS_TCP_INIT_REEST_TO); + to.to_initval = initval; + to.to_maxval = initval; + to.to_retries = 0; + memcpy(&transport->tcp_timeout, &to, sizeof(transport->tcp_timeout)); + xprt->timeout = &transport->tcp_timeout; + xprt->connect_timeout = connect_timeout; +} + static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, unsigned long connect_timeout, unsigned long reconnect_timeout) @@ -2277,19 +2296,8 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, spin_lock(&xprt->transport_lock); if (reconnect_timeout < xprt->max_reconnect_timeout) xprt->max_reconnect_timeout = reconnect_timeout; - if (connect_timeout < xprt->connect_timeout) { - memcpy(&to, xprt->timeout, sizeof(to)); - initval = DIV_ROUND_UP(connect_timeout, to.to_retries + 1); - /* Arbitrary lower limit */ - if (initval < XS_TCP_INIT_REEST_TO << 1) - initval = XS_TCP_INIT_REEST_TO << 1; - to.to_initval = initval; - to.to_maxval = initval; - memcpy(&transport->tcp_timeout, &to, - sizeof(transport->tcp_timeout)); - xprt->timeout = &transport->tcp_timeout; - xprt->connect_timeout = connect_timeout; - } + if (connect_timeout < xprt->connect_timeout) + xs_tcp_do_set_connect_timeout(xprt, connect_timeout); set_bit(XPRT_SOCK_UPD_TIMEOUT, &transport->sock_state); spin_unlock(&xprt->transport_lock); } From d2ee413884cdbdfcfc1560526615519311a47d33 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:32:23 -0400 Subject: [PATCH 16/22] SUNRPC: Allow specification of TCP client connect timeout at setup When we create a TCP transport, the connect timeout parameters are currently fixed to be 90s. This is problematic in the pNFS flexfiles case, where we may have multiple mirrors, and we would like to fail over quickly to the next mirror if a data server is down. This patch adds the ability to specify the connection parameters at RPC client creation time. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- include/linux/sunrpc/clnt.h | 2 ++ include/linux/sunrpc/xprt.h | 2 ++ net/sunrpc/clnt.c | 2 ++ net/sunrpc/xprtsock.c | 7 +++++-- 4 files changed, 11 insertions(+), 2 deletions(-) diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 4f41d839face..af7358277f1c 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -148,6 +148,8 @@ struct rpc_create_args { const struct cred *cred; unsigned int max_connect; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct rpc_add_xprt_test { diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index b52411bcfe4e..4ecc89301eb7 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -351,6 +351,8 @@ struct xprt_create { struct rpc_xprt_switch *bc_xps; unsigned int flags; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; struct xprt_class { diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index ca2c6efe19c9..06df08b0ee9e 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -534,6 +534,8 @@ struct rpc_clnt *rpc_create(struct rpc_create_args *args) .servername = args->servername, .bc_xprt = args->bc_xprt, .xprtsec = args->xprtsec, + .connect_timeout = args->connect_timeout, + .reconnect_timeout = args->reconnect_timeout, }; char servername[48]; struct rpc_clnt *clnt; diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index e558f0024fe5..6e845e51cbf3 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -2290,8 +2290,6 @@ static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt, unsigned long reconnect_timeout) { struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); - struct rpc_timeout to; - unsigned long initval; spin_lock(&xprt->transport_lock); if (reconnect_timeout < xprt->max_reconnect_timeout) @@ -3350,8 +3348,13 @@ static struct rpc_xprt *xs_setup_tcp(struct xprt_create *args) xprt->timeout = &xs_tcp_default_timeout; xprt->max_reconnect_timeout = xprt->timeout->to_maxval; + if (args->reconnect_timeout) + xprt->max_reconnect_timeout = args->reconnect_timeout; + xprt->connect_timeout = xprt->timeout->to_initval * (xprt->timeout->to_retries + 1); + if (args->connect_timeout) + xs_tcp_do_set_connect_timeout(xprt, args->connect_timeout); INIT_WORK(&transport->recv_worker, xs_stream_data_receive_workfn); INIT_WORK(&transport->error_worker, xs_error_handle); From cd18f24085f012b46b8271640b3c60fb27c0b05f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:32:24 -0400 Subject: [PATCH 17/22] SUNRPC: Don't override connect timeouts in rpc_clnt_add_xprt() If the caller specifies the connect timeouts in the arguments to rpc_clnt_add_xprt(), then we shouldn't override them. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- net/sunrpc/clnt.c | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/net/sunrpc/clnt.c b/net/sunrpc/clnt.c index 06df08b0ee9e..8d75290f1a31 100644 --- a/net/sunrpc/clnt.c +++ b/net/sunrpc/clnt.c @@ -3072,6 +3072,11 @@ int rpc_clnt_add_xprt(struct rpc_clnt *clnt, } xprt->resvport = resvport; xprt->reuseport = reuseport; + + if (xprtargs->connect_timeout) + connect_timeout = xprtargs->connect_timeout; + if (xprtargs->reconnect_timeout) + reconnect_timeout = xprtargs->reconnect_timeout; if (xprt->ops->set_connect_timeout != NULL) xprt->ops->set_connect_timeout(xprt, connect_timeout, From 537935f72eb28a3dd0097386f06402e25e66359a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 19 Aug 2023 17:32:25 -0400 Subject: [PATCH 18/22] NFS/pNFS: Set the connect timeout for the pNFS flexfiles driver Ensure that the connect timeout for the pNFS flexfiles driver is of the same order as the I/O timeout, so that we can fail over quickly when trying to read from a data server that is down. Signed-off-by: Trond Myklebust Signed-off-by: Anna Schumaker --- fs/nfs/client.c | 2 ++ fs/nfs/internal.h | 2 ++ fs/nfs/nfs3client.c | 3 +++ fs/nfs/pnfs_nfs.c | 3 +++ 4 files changed, 10 insertions(+) diff --git a/fs/nfs/client.c b/fs/nfs/client.c index e4c5f193ed5e..44eca51b2808 100644 --- a/fs/nfs/client.c +++ b/fs/nfs/client.c @@ -517,6 +517,8 @@ int nfs_create_rpc_client(struct nfs_client *clp, .authflavor = flavor, .cred = cl_init->cred, .xprtsec = cl_init->xprtsec, + .connect_timeout = cl_init->connect_timeout, + .reconnect_timeout = cl_init->reconnect_timeout, }; if (test_bit(NFS_CS_DISCRTRY, &clp->cl_flags)) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 41abea340ad8..9c9cf764f600 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -82,6 +82,8 @@ struct nfs_client_initdata { const struct rpc_timeout *timeparms; const struct cred *cred; struct xprtsec_parms xprtsec; + unsigned long connect_timeout; + unsigned long reconnect_timeout; }; /* diff --git a/fs/nfs/nfs3client.c b/fs/nfs/nfs3client.c index eff3802c5e03..674c012868b1 100644 --- a/fs/nfs/nfs3client.c +++ b/fs/nfs/nfs3client.c @@ -86,6 +86,7 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, int ds_proto, unsigned int ds_timeo, unsigned int ds_retrans) { struct rpc_timeout ds_timeout; + unsigned long connect_timeout = ds_timeo * (ds_retrans + 1) * HZ / 10; struct nfs_client *mds_clp = mds_srv->nfs_client; struct nfs_client_initdata cl_init = { .addr = ds_addr, @@ -98,6 +99,8 @@ struct nfs_client *nfs3_set_ds_client(struct nfs_server *mds_srv, .timeparms = &ds_timeout, .cred = mds_srv->cred, .xprtsec = mds_clp->cl_xprtsec, + .connect_timeout = connect_timeout, + .reconnect_timeout = connect_timeout, }; struct nfs_client *clp; char buf[INET6_ADDRSTRLEN + 1]; diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index a0112ad4937a..a08cfda6fff1 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -852,6 +852,7 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, { struct nfs_client *clp = ERR_PTR(-EIO); struct nfs4_pnfs_ds_addr *da; + unsigned long connect_timeout = timeo * (retrans + 1) * HZ / 10; int status = 0; dprintk("--> %s DS %s\n", __func__, ds->ds_remotestr); @@ -870,6 +871,8 @@ static int _nfs4_pnfs_v3_ds_connect(struct nfs_server *mds_srv, .dstaddr = (struct sockaddr *)&da->da_addr, .addrlen = da->da_addrlen, .servername = clp->cl_hostname, + .connect_timeout = connect_timeout, + .reconnect_timeout = connect_timeout, }; if (da->da_transport != clp->cl_proto) From 51d674a5e4889f1c8e223ac131cf218e1631e423 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 13 Jul 2023 13:02:38 -0400 Subject: [PATCH 19/22] NFSv4.1: use EXCHGID4_FLAG_USE_PNFS_DS for DS server After receiving the location(s) of the DS server(s) in the GETDEVINCEINFO, create the request for the clientid to such server and indicate that the client is connecting to a DS. Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs4client.c | 3 +++ fs/nfs/nfs4proc.c | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index d9114a754db7..27fb25567ce7 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -232,6 +232,8 @@ struct nfs_client *nfs4_alloc_client(const struct nfs_client_initdata *cl_init) __set_bit(NFS_CS_DISCRTRY, &clp->cl_flags); __set_bit(NFS_CS_NO_RETRANS_TIMEOUT, &clp->cl_flags); + if (test_bit(NFS_CS_DS, &cl_init->init_flags)) + __set_bit(NFS_CS_DS, &clp->cl_flags); /* * Set up the connection to the server before we add add to the * global list. @@ -1007,6 +1009,7 @@ struct nfs_client *nfs4_set_ds_client(struct nfs_server *mds_srv, if (mds_srv->flags & NFS_MOUNT_NORESVPORT) __set_bit(NFS_CS_NORESVPORT, &cl_init.init_flags); + __set_bit(NFS_CS_DS, &cl_init.init_flags); /* * Set an authflavor equual to the MDS value. Use the MDS nfs_client * cl_ipaddr so as to use the same EXCHANGE_ID co_ownerid as the MDS diff --git a/fs/nfs/nfs4proc.c b/fs/nfs/nfs4proc.c index 3c24c3c99e8a..3bc6bfdf7b81 100644 --- a/fs/nfs/nfs4proc.c +++ b/fs/nfs/nfs4proc.c @@ -8787,6 +8787,8 @@ nfs4_run_exchange_id(struct nfs_client *clp, const struct cred *cred, #ifdef CONFIG_NFS_V4_1_MIGRATION calldata->args.flags |= EXCHGID4_FLAG_SUPP_MOVED_MIGR; #endif + if (test_bit(NFS_CS_DS, &clp->cl_flags)) + calldata->args.flags |= EXCHGID4_FLAG_USE_PNFS_DS; msg.rpc_argp = &calldata->args; msg.rpc_resp = &calldata->res; task_setup_data.callback_data = calldata; @@ -8864,6 +8866,8 @@ static int _nfs4_proc_exchange_id(struct nfs_client *clp, const struct cred *cre /* Save the EXCHANGE_ID verifier session trunk tests */ memcpy(clp->cl_confirm.data, argp->verifier.data, sizeof(clp->cl_confirm.data)); + if (resp->flags & EXCHGID4_FLAG_USE_PNFS_DS) + set_bit(NFS_CS_DS, &clp->cl_flags); out: trace_nfs4_exchange_id(clp, status); rpc_put_task(task); From f67b55b6588bcf9316a1e6e8d529100a5aa3ebe6 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 22 Aug 2023 14:22:38 -0400 Subject: [PATCH 20/22] NFS: Guard against READDIR loop when entry names exceed MAXNAMELEN Commit 64cfca85bacd asserts the only valid return values for nfs2/3_decode_dirent should not include -ENAMETOOLONG, but for a server that sends a filename3 which exceeds MAXNAMELEN in a READDIR response the client's behavior will be to endlessly retry the operation. We could map -ENAMETOOLONG into -EBADCOOKIE, but that would produce truncated listings without any error. The client should return an error for this case to clearly assert that the server implementation must be corrected. Fixes: 64cfca85bacd ("NFS: Return valid errors from nfs2/3_decode_dirent()") Signed-off-by: Benjamin Coddington Signed-off-by: Anna Schumaker --- fs/nfs/nfs2xdr.c | 2 +- fs/nfs/nfs3xdr.c | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs2xdr.c b/fs/nfs/nfs2xdr.c index 05c3b4b2b3dd..c19093814296 100644 --- a/fs/nfs/nfs2xdr.c +++ b/fs/nfs/nfs2xdr.c @@ -949,7 +949,7 @@ int nfs2_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_filename_inline(xdr, &entry->name, &entry->len); if (unlikely(error)) - return -EAGAIN; + return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN; /* * The type (size and byte order) of nfscookie isn't defined in diff --git a/fs/nfs/nfs3xdr.c b/fs/nfs/nfs3xdr.c index 3b0b650c9c5a..60f032be805a 100644 --- a/fs/nfs/nfs3xdr.c +++ b/fs/nfs/nfs3xdr.c @@ -1991,7 +1991,7 @@ int nfs3_decode_dirent(struct xdr_stream *xdr, struct nfs_entry *entry, error = decode_inline_filename3(xdr, &entry->name, &entry->len); if (unlikely(error)) - return -EAGAIN; + return error == -ENAMETOOLONG ? -ENAMETOOLONG : -EAGAIN; error = decode_cookie3(xdr, &new_cookie); if (unlikely(error)) From 5690eed941ab7e33c3c3d6b850100cabf740f075 Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Thu, 24 Aug 2023 16:43:53 -0400 Subject: [PATCH 21/22] NFSv4.2: fix handling of COPY ERR_OFFLOAD_NO_REQ If the client sent a synchronous copy and the server replied with ERR_OFFLOAD_NO_REQ indicating that it wants an asynchronous copy instead, the client should retry with asynchronous copy. Fixes: 539f57b3e0fd ("NFS handle COPY ERR_OFFLOAD_NO_REQS") Signed-off-by: Olga Kornievskaia Signed-off-by: Anna Schumaker --- fs/nfs/nfs42proc.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/fs/nfs/nfs42proc.c b/fs/nfs/nfs42proc.c index 49f78e23b34c..063e00aff87e 100644 --- a/fs/nfs/nfs42proc.c +++ b/fs/nfs/nfs42proc.c @@ -471,8 +471,9 @@ ssize_t nfs42_proc_copy(struct file *src, loff_t pos_src, continue; } break; - } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && !args.sync) { - args.sync = true; + } else if (err == -NFS4ERR_OFFLOAD_NO_REQS && + args.sync != res.synchronous) { + args.sync = res.synchronous; dst_exception.retry = 1; continue; } else if ((err == -ESTALE || From c4a123d2e8c4dc91d581ee7d05c0cd51a0273fab Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 30 Aug 2023 14:31:31 -0400 Subject: [PATCH 22/22] pNFS: Fix assignment of xprtdata.cred The comma at the end of the line was leftover from an earlier refactor of the _nfs4_pnfs_v3_ds_connect() function. This is technically valid C, so the compilers didn't catch it, but if I'm understanding how it works correctly it assigns the return value of rpc_clnt_add_xprtr() to xprtdata.cred. Reported-by: Olga Kornievskaia Fixes: a12f996d3413 ("NFSv4/pNFS: Use connections to a DS that are all of the same protocol family") Signed-off-by: Anna Schumaker --- fs/nfs/pnfs_nfs.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fs/nfs/pnfs_nfs.c b/fs/nfs/pnfs_nfs.c index a08cfda6fff1..afd23910f3bf 100644 --- a/fs/nfs/pnfs_nfs.c +++ b/fs/nfs/pnfs_nfs.c @@ -946,7 +946,7 @@ static int _nfs4_pnfs_v4_ds_connect(struct nfs_server *mds_srv, * Test this address for session trunking and * add as an alias */ - xprtdata.cred = nfs4_get_clid_cred(clp), + xprtdata.cred = nfs4_get_clid_cred(clp); rpc_clnt_add_xprt(clp->cl_rpcclient, &xprt_args, rpc_clnt_setup_test_and_add_xprt, &rpcdata);