From c443305529d1d3d3bee0d68fdd14ae89835e091f Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Dec 2018 07:52:11 -0500 Subject: SUNRPC: Fix RPC receive hangs The RPC code is occasionally hanging when the receive code fails to empty the socket buffer due to a partial read of the data. When we convert that to an EAGAIN, it appears we occasionally leave data in the socket. The fix is to just keep reading until the socket returns EAGAIN/EWOULDBLOCK. Reported-by: Catalin Marinas Reported-by: Cristian Marussi Reported-by: Chuck Lever Signed-off-by: Trond Myklebust Tested-by: Catalin Marinas Tested-by: Cristian Marussi --- net/sunrpc/xprtsock.c | 39 +++++++++++++++++++-------------------- 1 file changed, 19 insertions(+), 20 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index ae77c71c1f64..0898752cecfe 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -398,7 +398,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; seek = 0; } else { seek -= buf->head[0].iov_len; @@ -418,7 +418,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; seek = 0; } else { seek -= buf->page_len; @@ -433,7 +433,7 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, if (offset == count || msg->msg_flags & (MSG_EOR|MSG_TRUNC)) goto out; if (ret != want) - goto eagain; + goto out; } else offset += buf->tail[0].iov_len; ret = -EMSGSIZE; @@ -441,9 +441,6 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, out: *read = offset - seek_init; return ret; -eagain: - ret = -EAGAIN; - goto out; sock_err: offset += seek; goto out; @@ -486,19 +483,18 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, if (transport->recv.offset == transport->recv.len) { if (xs_read_stream_request_done(transport)) msg->msg_flags |= MSG_EOR; - return transport->recv.copied; + return read; } switch (ret) { + default: + break; case -EMSGSIZE: - return transport->recv.copied; + return read; case 0: return -ESHUTDOWN; - default: - if (ret < 0) - return ret; } - return -EAGAIN; + return ret < 0 ? ret : read; } static size_t @@ -537,7 +533,7 @@ xs_read_stream_call(struct sock_xprt *transport, struct msghdr *msg, int flags) ret = xs_read_stream_request(transport, msg, flags, req); if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) - xprt_complete_bc_request(req, ret); + xprt_complete_bc_request(req, transport->recv.copied); return ret; } @@ -570,7 +566,7 @@ xs_read_stream_reply(struct sock_xprt *transport, struct msghdr *msg, int flags) spin_lock(&xprt->queue_lock); if (msg->msg_flags & (MSG_EOR|MSG_TRUNC)) - xprt_complete_rqst(req->rq_task, ret); + xprt_complete_rqst(req->rq_task, transport->recv.copied); xprt_unpin_rqst(req); out: spin_unlock(&xprt->queue_lock); @@ -591,10 +587,8 @@ xs_read_stream(struct sock_xprt *transport, int flags) if (ret <= 0) goto out_err; transport->recv.offset = ret; - if (ret != want) { - ret = -EAGAIN; - goto out_err; - } + if (transport->recv.offset != want) + return transport->recv.offset; transport->recv.len = be32_to_cpu(transport->recv.fraghdr) & RPC_FRAGMENT_SIZE_MASK; transport->recv.offset -= sizeof(transport->recv.fraghdr); @@ -602,6 +596,9 @@ xs_read_stream(struct sock_xprt *transport, int flags) } switch (be32_to_cpu(transport->recv.calldir)) { + default: + msg.msg_flags |= MSG_TRUNC; + break; case RPC_CALL: ret = xs_read_stream_call(transport, &msg, flags); break; @@ -616,6 +613,8 @@ xs_read_stream(struct sock_xprt *transport, int flags) goto out_err; read += ret; if (transport->recv.offset < transport->recv.len) { + if (!(msg.msg_flags & MSG_TRUNC)) + return read; ret = xs_read_discard(transport->sock, &msg, flags, transport->recv.len - transport->recv.offset); if (ret <= 0) @@ -623,7 +622,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) transport->recv.offset += ret; read += ret; if (transport->recv.offset != transport->recv.len) - return -EAGAIN; + return read; } if (xs_read_stream_request_done(transport)) { trace_xs_stream_read_request(transport); @@ -653,7 +652,7 @@ static void xs_stream_data_receive(struct sock_xprt *transport) clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); for (;;) { ret = xs_read_stream(transport, MSG_DONTWAIT); - if (ret <= 0) + if (ret < 0) break; read += ret; cond_resched(); -- cgit From 16e5e90f0e4f9b7b2e4d08558a2f695e2fa1fb0d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 2 Dec 2018 15:22:48 -0500 Subject: SUNRPC: Fix up handling of the XDRBUF_SPARSE_PAGES flag If the allocator fails before it has reached the target number of pages, then we need to recheck that we're not seeking past the page buffer. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 0898752cecfe..cd85c492c267 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -330,18 +330,16 @@ xs_alloc_sparse_pages(struct xdr_buf *buf, size_t want, gfp_t gfp) { size_t i,n; - if (!(buf->flags & XDRBUF_SPARSE_PAGES)) + if (!want || !(buf->flags & XDRBUF_SPARSE_PAGES)) return want; - if (want > buf->page_len) - want = buf->page_len; n = (buf->page_base + want + PAGE_SIZE - 1) >> PAGE_SHIFT; for (i = 0; i < n; i++) { if (buf->pages[i]) continue; buf->bvec[i].bv_page = buf->pages[i] = alloc_page(gfp); if (!buf->pages[i]) { - buf->page_len = (i * PAGE_SIZE) - buf->page_base; - return buf->page_len; + i *= PAGE_SIZE; + return i > buf->page_base ? i - buf->page_base : 0; } } return want; @@ -404,10 +402,11 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, seek -= buf->head[0].iov_len; offset += buf->head[0].iov_len; } - if (seek < buf->page_len) { - want = xs_alloc_sparse_pages(buf, - min_t(size_t, count - offset, buf->page_len), - GFP_NOWAIT); + + want = xs_alloc_sparse_pages(buf, + min_t(size_t, count - offset, buf->page_len), + GFP_NOWAIT); + if (seek < want) { ret = xs_read_bvec(sock, msg, flags, buf->bvec, xdr_buf_pagecount(buf), want + buf->page_base, @@ -421,9 +420,10 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, goto out; seek = 0; } else { - seek -= buf->page_len; - offset += buf->page_len; + seek -= want; + offset += want; } + if (seek < buf->tail[0].iov_len) { want = min_t(size_t, count - offset, buf->tail[0].iov_len); ret = xs_read_kvec(sock, msg, flags, &buf->tail[0], want, seek); -- cgit From 26781eab48ece79000ffc4e69be402f2524e1137 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Dec 2018 15:41:38 -0500 Subject: SUNRPC: Treat EFAULT as a truncated message in xs_read_stream_request() Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index cd85c492c267..86bb502e538a 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -437,7 +437,6 @@ xs_read_xdr_buf(struct socket *sock, struct msghdr *msg, int flags, } else offset += buf->tail[0].iov_len; ret = -EMSGSIZE; - msg->msg_flags |= MSG_TRUNC; out: *read = offset - seek_init; return ret; @@ -489,7 +488,9 @@ xs_read_stream_request(struct sock_xprt *transport, struct msghdr *msg, switch (ret) { default: break; + case -EFAULT: case -EMSGSIZE: + msg->msg_flags |= MSG_TRUNC; return read; case 0: return -ESHUTDOWN; -- cgit From b76a5afdce6c6dacfbd51863b31b3d7cc61ca21e Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Dec 2018 15:58:58 -0500 Subject: SUNRPC: Use the discard iterator rather than MSG_TRUNC When discarding message data from the stream, we're better off using the discard iterator, since that will work with non-TCP streams. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 86bb502e538a..fc6d129401ba 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -376,8 +376,8 @@ static ssize_t xs_read_discard(struct socket *sock, struct msghdr *msg, int flags, size_t count) { - struct kvec kvec = { 0 }; - return xs_read_kvec(sock, msg, flags | MSG_TRUNC, &kvec, count, 0); + iov_iter_discard(&msg->msg_iter, READ, count); + return sock_recvmsg(sock, msg, flags); } static ssize_t @@ -616,6 +616,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) if (transport->recv.offset < transport->recv.len) { if (!(msg.msg_flags & MSG_TRUNC)) return read; + msg.msg_flags = 0; ret = xs_read_discard(transport->sock, &msg, flags, transport->recv.len - transport->recv.offset); if (ret <= 0) -- cgit From dfcf0380858b4e760ae02665649d884d1baa50c9 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 4 Dec 2018 07:50:06 -0500 Subject: SUNRPC: Fix up socket polling Ensure that we do not exit the socket read callback without clearing XPRT_SOCK_DATA_READY. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index fc6d129401ba..92d08be2384d 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -649,9 +649,9 @@ static void xs_stream_data_receive(struct sock_xprt *transport) ssize_t ret = 0; mutex_lock(&transport->recv_mutex); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); if (transport->sock == NULL) goto out; - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); for (;;) { ret = xs_read_stream(transport, MSG_DONTWAIT); if (ret < 0) @@ -1346,10 +1346,10 @@ static void xs_udp_data_receive(struct sock_xprt *transport) int err; mutex_lock(&transport->recv_mutex); + clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); sk = transport->inet; if (sk == NULL) goto out; - clear_bit(XPRT_SOCK_DATA_READY, &transport->sock_state); for (;;) { skb = skb_recv_udp(sk, 0, 1, &err); if (skb == NULL) -- cgit From 79462857eb547e5d17fc8445b9768615e02dc1cf Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 3 Dec 2018 18:49:00 -0500 Subject: SUNRPC: Don't force a redundant disconnection in xs_read_stream() If the connection is broken, then xs_tcp_state_change() will take care of scheduling the socket close as soon as appropriate. xs_read_stream() just needs to report the error. Signed-off-by: Trond Myklebust --- net/sunrpc/xprtsock.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'net/sunrpc/xprtsock.c') diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c index 92d08be2384d..8a5e823e0b33 100644 --- a/net/sunrpc/xprtsock.c +++ b/net/sunrpc/xprtsock.c @@ -634,13 +634,7 @@ xs_read_stream(struct sock_xprt *transport, int flags) transport->recv.len = 0; return read; out_err: - switch (ret) { - case 0: - case -ESHUTDOWN: - xprt_force_disconnect(&transport->xprt); - return -ESHUTDOWN; - } - return ret; + return ret != 0 ? ret : -ESHUTDOWN; } static void xs_stream_data_receive(struct sock_xprt *transport) -- cgit