From fd1defc257e2b12ab69bc0b379105c00eca4e112 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Thu, 6 Feb 2014 14:38:53 -0500 Subject: [PATCH 1/8] NFS: Do not set NFS_INO_INVALID_LABEL unless server supports labeled NFS Commit aa9c2669626c (NFS: Client implementation of Labeled-NFS) introduces a performance regression. When nfs_zap_caches_locked is called, it sets the NFS_INO_INVALID_LABEL flag irrespectively of whether or not the NFS server supports security labels. Since that flag is never cleared, it means that all calls to nfs_revalidate_inode() will now trigger an on-the-wire GETATTR call. This patch ensures that we never set the NFS_INO_INVALID_LABEL unless the server advertises support for labeled NFS. It also causes nfs_setsecurity() to clear NFS_INO_INVALID_LABEL when it has successfully set the security label for the inode. Finally it gets rid of the NFS_INO_INVALID_LABEL cruft from nfs_update_inode, which has nothing to do with labeled NFS. Reported-by: Neil Brown Cc: stable@vger.kernel.org # 3.11+ Tested-by: Neil Brown Signed-off-by: Trond Myklebust --- fs/nfs/inode.c | 14 ++++++++++---- fs/nfs/internal.h | 9 +++++++++ 2 files changed, 19 insertions(+), 4 deletions(-) diff --git a/fs/nfs/inode.c b/fs/nfs/inode.c index 28a0a3cbd3b7..360114ae8b82 100644 --- a/fs/nfs/inode.c +++ b/fs/nfs/inode.c @@ -164,17 +164,16 @@ static void nfs_zap_caches_locked(struct inode *inode) if (S_ISREG(mode) || S_ISDIR(mode) || S_ISLNK(mode)) { nfs_fscache_invalidate(inode); nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_DATA | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; } else nfsi->cache_validity |= NFS_INO_INVALID_ATTR - | NFS_INO_INVALID_LABEL | NFS_INO_INVALID_ACCESS | NFS_INO_INVALID_ACL | NFS_INO_REVAL_PAGECACHE; + nfs_zap_label_cache_locked(nfsi); } void nfs_zap_caches(struct inode *inode) @@ -266,6 +265,13 @@ nfs_init_locked(struct inode *inode, void *opaque) } #ifdef CONFIG_NFS_V4_SECURITY_LABEL +static void nfs_clear_label_invalid(struct inode *inode) +{ + spin_lock(&inode->i_lock); + NFS_I(inode)->cache_validity &= ~NFS_INO_INVALID_LABEL; + spin_unlock(&inode->i_lock); +} + void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, struct nfs4_label *label) { @@ -283,6 +289,7 @@ void nfs_setsecurity(struct inode *inode, struct nfs_fattr *fattr, __func__, (char *)label->label, label->len, error); + nfs_clear_label_invalid(inode); } } @@ -1648,7 +1655,7 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) inode->i_blocks = fattr->du.nfs2.blocks; /* Update attrtimeo value if we're out of the unstable period */ - if (invalid & (NFS_INO_INVALID_ATTR|NFS_INO_INVALID_LABEL)) { + if (invalid & NFS_INO_INVALID_ATTR) { nfs_inc_stats(inode, NFSIOS_ATTRINVALIDATE); nfsi->attrtimeo = NFS_MINATTRTIMEO(inode); nfsi->attrtimeo_timestamp = now; @@ -1661,7 +1668,6 @@ static int nfs_update_inode(struct inode *inode, struct nfs_fattr *fattr) } } invalid &= ~NFS_INO_INVALID_ATTR; - invalid &= ~NFS_INO_INVALID_LABEL; /* Don't invalidate the data if we were to blame */ if (!(S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index 8b5cc04a8611..fafdddac8271 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -279,9 +279,18 @@ static inline void nfs4_label_free(struct nfs4_label *label) } return; } + +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ + if (nfs_server_capable(&nfsi->vfs_inode, NFS_CAP_SECURITY_LABEL)) + nfsi->cache_validity |= NFS_INO_INVALID_LABEL; +} #else static inline struct nfs4_label *nfs4_label_alloc(struct nfs_server *server, gfp_t flags) { return NULL; } static inline void nfs4_label_free(void *label) {} +static inline void nfs_zap_label_cache_locked(struct nfs_inode *nfsi) +{ +} #endif /* CONFIG_NFS_V4_SECURITY_LABEL */ /* proc.c */ From a699d65ec4ff82245d2c4fdfb8ed1d64776d756e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 10 Feb 2014 16:28:52 -0500 Subject: [PATCH 2/8] SUNRPC: Don't create a gss auth cache unless rpc.gssd is running An infinite loop is caused when nfs4_establish_lease() fails with -EACCES. This causes nfs4_handle_reclaim_lease_error() to sleep a bit and resets the NFS4CLNT_LEASE_EXPIRED bit. This in turn causes nfs4_state_manager() to try and reestablished the lease, again, again, again... The problem is a valid RPCSEC_GSS client is being created when rpc.gssd is not running. Link: http://lkml.kernel.org/r/1392066375-16502-1-git-send-email-steved@redhat.com Fixes: 0ea9de0ea6a4 (sunrpc: turn warn_gssd() log message into a dprintk()) Reported-by: Steve Dickson Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 6c0513a7f992..44a61e8fda6f 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -991,6 +991,8 @@ gss_create_new(struct rpc_auth_create_args *args, struct rpc_clnt *clnt) gss_auth->service = gss_pseudoflavor_to_service(gss_auth->mech, flavor); if (gss_auth->service == 0) goto err_put_mech; + if (!gssd_running(gss_auth->net)) + goto err_put_mech; auth = &gss_auth->rpc_auth; auth->au_cslack = GSS_CRED_SLACK >> 2; auth->au_rslack = GSS_VERF_SLACK >> 2; From 06ea0bfe6e6043cb56a78935a19f6f8ebc636226 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Feb 2014 09:15:54 -0500 Subject: [PATCH 3/8] SUNRPC: Fix races in xs_nospace() When a send failure occurs due to the socket being out of buffer space, we call xs_nospace() in order to have the RPC task wait until the socket has drained enough to make it worth while trying again. The current patch fixes a race in which the socket is drained before we get round to setting up the machinery in xs_nospace(), and which is reported to cause hangs. Link: http://lkml.kernel.org/r/20140210170315.33dfc621@notabene.brown Fixes: a9a6b52ee1ba (SUNRPC: Don't start the retransmission timer...) Reported-by: Neil Brown Cc: stable@vger.kernel.org Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 817a1e523969..0addefca8e77 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -510,6 +510,7 @@ static int xs_nospace(struct rpc_task *task) struct rpc_rqst *req = task->tk_rqstp; struct rpc_xprt *xprt = req->rq_xprt; struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt); + struct sock *sk = transport->inet; int ret = -EAGAIN; dprintk("RPC: %5u xmit incomplete (%u left of %u)\n", @@ -527,7 +528,7 @@ static int xs_nospace(struct rpc_task *task) * window size */ set_bit(SOCK_NOSPACE, &transport->sock->flags); - transport->inet->sk_write_pending++; + sk->sk_write_pending++; /* ...and wait for more buffer space */ xprt_wait_for_buffer_space(task, xs_nospace_callback); } @@ -537,6 +538,9 @@ static int xs_nospace(struct rpc_task *task) } spin_unlock_bh(&xprt->transport_lock); + + /* Race breaker in case memory is freed before above code is called */ + sk->sk_write_space(sk); return ret; } From 628356791b04ea988fee070f66a748a823d001bb Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 11 Feb 2014 13:56:54 -0500 Subject: [PATCH 4/8] SUNRPC: Fix potential memory scribble in xprt_free_bc_request() The call to xprt_free_allocation() will call list_del() on req->rq_bc_pa_list, which is not attached to a list. This patch moves the list_del() out of xprt_free_allocation() and into those callers that need it. Signed-off-by: Trond Myklebust --- net/sunrpc/backchannel_rqst.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/backchannel_rqst.c b/net/sunrpc/backchannel_rqst.c index 890a29912d5a..e860d4f7ed2a 100644 --- a/net/sunrpc/backchannel_rqst.c +++ b/net/sunrpc/backchannel_rqst.c @@ -64,7 +64,6 @@ static void xprt_free_allocation(struct rpc_rqst *req) free_page((unsigned long)xbufp->head[0].iov_base); xbufp = &req->rq_snd_buf; free_page((unsigned long)xbufp->head[0].iov_base); - list_del(&req->rq_bc_pa_list); kfree(req); } @@ -168,8 +167,10 @@ out_free: /* * Memory allocation failed, free the temporary list */ - list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) + list_for_each_entry_safe(req, tmp, &tmp_list, rq_bc_pa_list) { + list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); + } dprintk("RPC: setup backchannel transport failed\n"); return -ENOMEM; @@ -198,6 +199,7 @@ void xprt_destroy_backchannel(struct rpc_xprt *xprt, unsigned int max_reqs) xprt_dec_alloc_count(xprt, max_reqs); list_for_each_entry_safe(req, tmp, &xprt->bc_pa_list, rq_bc_pa_list) { dprintk("RPC: req=%p\n", req); + list_del(&req->rq_bc_pa_list); xprt_free_allocation(req); if (--max_reqs == 0) break; From 9eb2ddb48ce3a7bd745c14a933112994647fa3cd Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Feb 2014 12:14:13 -0500 Subject: [PATCH 5/8] SUNRPC: Ensure that gss_auth isn't freed before its upcall messages Fix a race in which the RPC client is shutting down while the gss daemon is processing a downcall. If the RPC client manages to shut down before the gss daemon is done, then the struct gss_auth used in gss_release_msg() may have already been freed. Link: http://lkml.kernel.org/r/1392494917.71728.YahooMailNeo@web140002.mail.bf1.yahoo.com Reported-by: John Reported-by: Borislav Petkov Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 44a61e8fda6f..1ba1fd114912 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -108,6 +108,7 @@ struct gss_auth { static DEFINE_SPINLOCK(pipe_version_lock); static struct rpc_wait_queue pipe_version_rpc_waitqueue; static DECLARE_WAIT_QUEUE_HEAD(pipe_version_waitqueue); +static void gss_put_auth(struct gss_auth *gss_auth); static void gss_free_ctx(struct gss_cl_ctx *); static const struct rpc_pipe_ops gss_upcall_ops_v0; @@ -320,6 +321,7 @@ gss_release_msg(struct gss_upcall_msg *gss_msg) if (gss_msg->ctx != NULL) gss_put_ctx(gss_msg->ctx); rpc_destroy_wait_queue(&gss_msg->rpc_waitqueue); + gss_put_auth(gss_msg->auth); kfree(gss_msg); } @@ -500,6 +502,7 @@ gss_alloc_msg(struct gss_auth *gss_auth, if (err) goto err_free_msg; }; + kref_get(&gss_auth->kref); return gss_msg; err_free_msg: kfree(gss_msg); @@ -1063,6 +1066,12 @@ gss_free_callback(struct kref *kref) gss_free(gss_auth); } +static void +gss_put_auth(struct gss_auth *gss_auth) +{ + kref_put(&gss_auth->kref, gss_free_callback); +} + static void gss_destroy(struct rpc_auth *auth) { @@ -1084,7 +1093,7 @@ gss_destroy(struct rpc_auth *auth) gss_auth->gss_pipe[1] = NULL; rpcauth_destroy_credcache(auth); - kref_put(&gss_auth->kref, gss_free_callback); + gss_put_auth(gss_auth); } /* @@ -1255,7 +1264,7 @@ gss_destroy_nullcred(struct rpc_cred *cred) call_rcu(&cred->cr_rcu, gss_free_cred_callback); if (ctx) gss_put_ctx(ctx); - kref_put(&gss_auth->kref, gss_free_callback); + gss_put_auth(gss_auth); } static void From e9776d0f4adee8877145672f6416b06b57f2dc27 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Feb 2014 13:28:01 -0500 Subject: [PATCH 6/8] SUNRPC: Fix a pipe_version reference leak In gss_alloc_msg(), if the call to gss_encode_v1_msg() fails, we want to release the reference to the pipe_version that was obtained earlier in the function. Fixes: 9d3a2260f0f4b (SUNRPC: Fix buffer overflow checking in...) Signed-off-by: Trond Myklebust --- net/sunrpc/auth_gss/auth_gss.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/net/sunrpc/auth_gss/auth_gss.c b/net/sunrpc/auth_gss/auth_gss.c index 1ba1fd114912..36e431ee1c90 100644 --- a/net/sunrpc/auth_gss/auth_gss.c +++ b/net/sunrpc/auth_gss/auth_gss.c @@ -500,10 +500,12 @@ gss_alloc_msg(struct gss_auth *gss_auth, default: err = gss_encode_v1_msg(gss_msg, service_name, gss_auth->target_name); if (err) - goto err_free_msg; + goto err_put_pipe_version; }; kref_get(&gss_auth->kref); return gss_msg; +err_put_pipe_version: + put_pipe_version(gss_auth->net); err_free_msg: kfree(gss_msg); err: From 292f503cade2b1d966239ef56a851e6897d1ba92 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 16 Feb 2014 21:42:56 -0500 Subject: [PATCH 7/8] NFSv4: Use the correct net namespace in nfs4_update_server We need to use the same net namespace that was used to resolve the hostname and sockaddr arguments. Fixes: 32e62b7c3ef09 (NFS: Add nfs4_update_server) Cc: Chuck Lever Signed-off-by: Trond Myklebust --- fs/nfs/internal.h | 3 ++- fs/nfs/nfs4client.c | 7 ++++--- fs/nfs/nfs4namespace.c | 12 ++++++------ 3 files changed, 12 insertions(+), 10 deletions(-) diff --git a/fs/nfs/internal.h b/fs/nfs/internal.h index fafdddac8271..b46cf5a67329 100644 --- a/fs/nfs/internal.h +++ b/fs/nfs/internal.h @@ -176,7 +176,8 @@ extern struct nfs_server *nfs4_create_server( extern struct nfs_server *nfs4_create_referral_server(struct nfs_clone_mount *, struct nfs_fh *); extern int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen); + struct sockaddr *sap, size_t salen, + struct net *net); extern void nfs_free_server(struct nfs_server *server); extern struct nfs_server *nfs_clone_server(struct nfs_server *, struct nfs_fh *, diff --git a/fs/nfs/nfs4client.c b/fs/nfs/nfs4client.c index 860ad26a5590..0e46d3d1b6cc 100644 --- a/fs/nfs/nfs4client.c +++ b/fs/nfs/nfs4client.c @@ -1135,6 +1135,7 @@ static int nfs_probe_destination(struct nfs_server *server) * @hostname: new end-point's hostname * @sap: new end-point's socket address * @salen: size of "sap" + * @net: net namespace * * The nfs_server must be quiescent before this function is invoked. * Either its session is drained (NFSv4.1+), or its transport is @@ -1143,13 +1144,13 @@ static int nfs_probe_destination(struct nfs_server *server) * Returns zero on success, or a negative errno value. */ int nfs4_update_server(struct nfs_server *server, const char *hostname, - struct sockaddr *sap, size_t salen) + struct sockaddr *sap, size_t salen, struct net *net) { struct nfs_client *clp = server->nfs_client; struct rpc_clnt *clnt = server->client; struct xprt_create xargs = { .ident = clp->cl_proto, - .net = &init_net, + .net = net, .dstaddr = sap, .addrlen = salen, .servername = hostname, @@ -1189,7 +1190,7 @@ int nfs4_update_server(struct nfs_server *server, const char *hostname, error = nfs4_set_client(server, hostname, sap, salen, buf, clp->cl_rpcclient->cl_auth->au_flavor, clp->cl_proto, clnt->cl_timeout, - clp->cl_minorversion, clp->cl_net); + clp->cl_minorversion, net); nfs_put_client(clp); if (error != 0) { nfs_server_insert_lists(server); diff --git a/fs/nfs/nfs4namespace.c b/fs/nfs/nfs4namespace.c index 4e7f05d3e9db..3d5dbf80d46a 100644 --- a/fs/nfs/nfs4namespace.c +++ b/fs/nfs/nfs4namespace.c @@ -121,9 +121,8 @@ static int nfs4_validate_fspath(struct dentry *dentry, } static size_t nfs_parse_server_name(char *string, size_t len, - struct sockaddr *sa, size_t salen, struct nfs_server *server) + struct sockaddr *sa, size_t salen, struct net *net) { - struct net *net = rpc_net_ns(server->client); ssize_t ret; ret = rpc_pton(net, string, len, sa, salen); @@ -223,6 +222,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(NFS_SB(mountdata->sb)->client); struct vfsmount *mnt = ERR_PTR(-ENOENT); char *mnt_path; unsigned int maxbuflen; @@ -248,8 +248,7 @@ static struct vfsmount *try_location(struct nfs_clone_mount *mountdata, continue; mountdata->addrlen = nfs_parse_server_name(buf->data, buf->len, - mountdata->addr, addr_bufsize, - NFS_SB(mountdata->sb)); + mountdata->addr, addr_bufsize, net); if (mountdata->addrlen == 0) continue; @@ -419,6 +418,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, const struct nfs4_fs_location *location) { const size_t addr_bufsize = sizeof(struct sockaddr_storage); + struct net *net = rpc_net_ns(server->client); struct sockaddr *sap; unsigned int s; size_t salen; @@ -440,7 +440,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, continue; salen = nfs_parse_server_name(buf->data, buf->len, - sap, addr_bufsize, server); + sap, addr_bufsize, net); if (salen == 0) continue; rpc_set_port(sap, NFS_PORT); @@ -450,7 +450,7 @@ static int nfs4_try_replacing_one_location(struct nfs_server *server, if (hostname == NULL) break; - error = nfs4_update_server(server, hostname, sap, salen); + error = nfs4_update_server(server, hostname, sap, salen, net); kfree(hostname); if (error == 0) break; From 146d70caaa1b87f64597743429d7da4b8073d0c9 Mon Sep 17 00:00:00 2001 From: Andy Adamson Date: Tue, 18 Feb 2014 10:36:05 -0500 Subject: [PATCH 8/8] NFS fix error return in nfs4_select_rw_stateid Do not return an error when nfs4_copy_delegation_stateid succeeds. Signed-off-by: Andy Adamson Link: http://lkml.kernel.org/r/1392737765-41942-1-git-send-email-andros@netapp.com Fixes: ef1820f9be27b (NFSv4: Don't try to recover NFSv4 locks when...) Cc: NeilBrown Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- fs/nfs/nfs4state.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c index e5be72518bd7..e1a47217c05e 100644 --- a/fs/nfs/nfs4state.c +++ b/fs/nfs/nfs4state.c @@ -1015,8 +1015,11 @@ int nfs4_select_rw_stateid(nfs4_stateid *dst, struct nfs4_state *state, if (ret == -EIO) /* A lost lock - don't even consider delegations */ goto out; - if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) + /* returns true if delegation stateid found and copied */ + if (nfs4_copy_delegation_stateid(dst, state->inode, fmode)) { + ret = 0; goto out; + } if (ret != -ENOENT) /* nfs4_copy_delegation_stateid() didn't over-write * dst, so it still has the lock stateid which we now