aboutsummaryrefslogtreecommitdiff
path: root/net/sunrpc/xprtsock.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/sunrpc/xprtsock.c')
-rw-r--r--net/sunrpc/xprtsock.c88
1 files changed, 63 insertions, 25 deletions
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index d6656d7768f4..dd9d295813cf 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -47,6 +47,8 @@
#include <net/udp.h>
#include <net/tcp.h>
+#include <trace/events/sunrpc.h>
+
#include "sunrpc.h"
static void xs_close(struct rpc_xprt *xprt);
@@ -391,8 +393,10 @@ static int xs_send_kvec(struct socket *sock, struct sockaddr *addr, int addrlen,
return kernel_sendmsg(sock, &msg, NULL, 0, 0);
}
-static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more)
+static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned int base, int more, bool zerocopy)
{
+ ssize_t (*do_sendpage)(struct socket *sock, struct page *page,
+ int offset, size_t size, int flags);
struct page **ppage;
unsigned int remainder;
int err, sent = 0;
@@ -401,6 +405,9 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
base += xdr->page_base;
ppage = xdr->pages + (base >> PAGE_SHIFT);
base &= ~PAGE_MASK;
+ do_sendpage = sock->ops->sendpage;
+ if (!zerocopy)
+ do_sendpage = sock_no_sendpage;
for(;;) {
unsigned int len = min_t(unsigned int, PAGE_SIZE - base, remainder);
int flags = XS_SENDMSG_FLAGS;
@@ -408,7 +415,7 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
remainder -= len;
if (remainder != 0 || more)
flags |= MSG_MORE;
- err = sock->ops->sendpage(sock, *ppage, base, len, flags);
+ err = do_sendpage(sock, *ppage, base, len, flags);
if (remainder == 0 || err != len)
break;
sent += err;
@@ -429,9 +436,10 @@ static int xs_send_pagedata(struct socket *sock, struct xdr_buf *xdr, unsigned i
* @addrlen: UDP only -- length of destination address
* @xdr: buffer containing this request
* @base: starting position in the buffer
+ * @zerocopy: true if it is safe to use sendpage()
*
*/
-static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base)
+static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen, struct xdr_buf *xdr, unsigned int base, bool zerocopy)
{
unsigned int remainder = xdr->len - base;
int err, sent = 0;
@@ -459,7 +467,7 @@ static int xs_sendpages(struct socket *sock, struct sockaddr *addr, int addrlen,
if (base < xdr->page_len) {
unsigned int len = xdr->page_len - base;
remainder -= len;
- err = xs_send_pagedata(sock, xdr, base, remainder != 0);
+ err = xs_send_pagedata(sock, xdr, base, remainder != 0, zerocopy);
if (remainder == 0 || err != len)
goto out;
sent += err;
@@ -562,7 +570,7 @@ static int xs_local_send_request(struct rpc_task *task)
req->rq_svec->iov_base, req->rq_svec->iov_len);
status = xs_sendpages(transport->sock, NULL, 0,
- xdr, req->rq_bytes_sent);
+ xdr, req->rq_bytes_sent, true);
dprintk("RPC: %s(%u) = %d\n",
__func__, xdr->len - req->rq_bytes_sent, status);
if (likely(status >= 0)) {
@@ -618,7 +626,7 @@ static int xs_udp_send_request(struct rpc_task *task)
status = xs_sendpages(transport->sock,
xs_addr(xprt),
xprt->addrlen, xdr,
- req->rq_bytes_sent);
+ req->rq_bytes_sent, true);
dprintk("RPC: xs_udp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -665,8 +673,10 @@ static void xs_tcp_shutdown(struct rpc_xprt *xprt)
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct socket *sock = transport->sock;
- if (sock != NULL)
+ if (sock != NULL) {
kernel_sock_shutdown(sock, SHUT_WR);
+ trace_rpc_socket_shutdown(xprt, sock);
+ }
}
/**
@@ -689,6 +699,7 @@ static int xs_tcp_send_request(struct rpc_task *task)
struct rpc_xprt *xprt = req->rq_xprt;
struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
struct xdr_buf *xdr = &req->rq_snd_buf;
+ bool zerocopy = true;
int status;
xs_encode_stream_record_marker(&req->rq_snd_buf);
@@ -696,13 +707,20 @@ static int xs_tcp_send_request(struct rpc_task *task)
xs_pktdump("packet data:",
req->rq_svec->iov_base,
req->rq_svec->iov_len);
+ /* Don't use zero copy if this is a resend. If the RPC call
+ * completes while the socket holds a reference to the pages,
+ * then we may end up resending corrupted data.
+ */
+ if (task->tk_flags & RPC_TASK_SENT)
+ zerocopy = false;
/* Continue transmitting the packet/record. We must be careful
* to cope with writespace callbacks arriving _after_ we have
* called sendmsg(). */
while (1) {
status = xs_sendpages(transport->sock,
- NULL, 0, xdr, req->rq_bytes_sent);
+ NULL, 0, xdr, req->rq_bytes_sent,
+ zerocopy);
dprintk("RPC: xs_tcp_send_request(%u) = %d\n",
xdr->len - req->rq_bytes_sent, status);
@@ -811,6 +829,7 @@ static void xs_reset_transport(struct sock_xprt *transport)
sk->sk_no_check = 0;
+ trace_rpc_socket_close(&transport->xprt, sock);
sock_release(sock);
}
@@ -830,6 +849,8 @@ static void xs_close(struct rpc_xprt *xprt)
dprintk("RPC: xs_close xprt %p\n", xprt);
+ cancel_delayed_work_sync(&transport->connect_worker);
+
xs_reset_transport(transport);
xprt->reestablish_timeout = 0;
@@ -849,14 +870,6 @@ static void xs_tcp_close(struct rpc_xprt *xprt)
xs_tcp_shutdown(xprt);
}
-static void xs_local_destroy(struct rpc_xprt *xprt)
-{
- xs_close(xprt);
- xs_free_peer_addresses(xprt);
- xprt_free(xprt);
- module_put(THIS_MODULE);
-}
-
/**
* xs_destroy - prepare to shutdown a transport
* @xprt: doomed transport
@@ -864,13 +877,12 @@ static void xs_local_destroy(struct rpc_xprt *xprt)
*/
static void xs_destroy(struct rpc_xprt *xprt)
{
- struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
-
dprintk("RPC: xs_destroy xprt %p\n", xprt);
- cancel_delayed_work_sync(&transport->connect_worker);
-
- xs_local_destroy(xprt);
+ xs_close(xprt);
+ xs_free_peer_addresses(xprt);
+ xprt_free(xprt);
+ module_put(THIS_MODULE);
}
static inline struct rpc_xprt *xprt_from_sock(struct sock *sk)
@@ -1492,6 +1504,7 @@ static void xs_tcp_state_change(struct sock *sk)
sock_flag(sk, SOCK_ZAPPED),
sk->sk_shutdown);
+ trace_rpc_socket_state_change(xprt, sk->sk_socket);
switch (sk->sk_state) {
case TCP_ESTABLISHED:
spin_lock(&xprt->transport_lock);
@@ -1505,6 +1518,7 @@ static void xs_tcp_state_change(struct sock *sk)
transport->tcp_copied = 0;
transport->tcp_flags =
TCP_RCV_COPY_FRAGHDR | TCP_RCV_COPY_XID;
+ xprt->connect_cookie++;
xprt_wake_pending_tasks(xprt, -EAGAIN);
}
@@ -1810,6 +1824,10 @@ static inline void xs_reclassify_socket(int family, struct socket *sock)
}
#endif
+static void xs_dummy_setup_socket(struct work_struct *work)
+{
+}
+
static struct socket *xs_create_sock(struct rpc_xprt *xprt,
struct sock_xprt *transport, int family, int type, int protocol)
{
@@ -1896,6 +1914,7 @@ static int xs_local_setup_socket(struct sock_xprt *transport)
xprt, xprt->address_strings[RPC_DISPLAY_ADDR]);
status = xs_local_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
switch (status) {
case 0:
dprintk("RPC: xprt %p connected to %s\n",
@@ -2039,6 +2058,7 @@ static void xs_udp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
xs_udp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, 0);
status = 0;
out:
xprt_clear_connecting(xprt);
@@ -2064,6 +2084,8 @@ static void xs_abort_connection(struct sock_xprt *transport)
memset(&any, 0, sizeof(any));
any.sa_family = AF_UNSPEC;
result = kernel_connect(transport->sock, &any, sizeof(any), 0);
+ trace_rpc_socket_reset_connection(&transport->xprt,
+ transport->sock, result);
if (!result)
xs_sock_reset_connection_flags(&transport->xprt);
dprintk("RPC: AF_UNSPEC connect return code %d\n", result);
@@ -2102,6 +2124,19 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
if (!transport->inet) {
struct sock *sk = sock->sk;
+ unsigned int keepidle = xprt->timeout->to_initval / HZ;
+ unsigned int keepcnt = xprt->timeout->to_retries + 1;
+ unsigned int opt_on = 1;
+
+ /* TCP Keepalive options */
+ kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
+ (char *)&opt_on, sizeof(opt_on));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
+ (char *)&keepidle, sizeof(keepidle));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
+ (char *)&keepidle, sizeof(keepidle));
+ kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
+ (char *)&keepcnt, sizeof(keepcnt));
write_lock_bh(&sk->sk_callback_lock);
@@ -2141,7 +2176,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
case 0:
case -EINPROGRESS:
/* SYN_SENT! */
- xprt->connect_cookie++;
if (xprt->reestablish_timeout < XS_TCP_INIT_REEST_TO)
xprt->reestablish_timeout = XS_TCP_INIT_REEST_TO;
}
@@ -2194,6 +2228,7 @@ static void xs_tcp_setup_socket(struct work_struct *work)
xprt->address_strings[RPC_DISPLAY_PORT]);
status = xs_tcp_finish_connecting(xprt, sock);
+ trace_rpc_socket_connect(xprt, sock, status);
dprintk("RPC: %p connect status %d connected %d sock state %d\n",
xprt, -status, xprt_connected(xprt),
sock->sk->sk_state);
@@ -2487,7 +2522,7 @@ static struct rpc_xprt_ops xs_local_ops = {
.send_request = xs_local_send_request,
.set_retrans_timeout = xprt_set_retrans_timeout_def,
.close = xs_close,
- .destroy = xs_local_destroy,
+ .destroy = xs_destroy,
.print_stats = xs_local_print_stats,
};
@@ -2644,6 +2679,9 @@ static struct rpc_xprt *xs_setup_local(struct xprt_create *args)
xprt->ops = &xs_local_ops;
xprt->timeout = &xs_local_default_timeout;
+ INIT_DELAYED_WORK(&transport->connect_worker,
+ xs_dummy_setup_socket);
+
switch (sun->sun_family) {
case AF_LOCAL:
if (sun->sun_path[0] != '/') {
@@ -2848,8 +2886,8 @@ static struct rpc_xprt *xs_setup_bc_tcp(struct xprt_create *args)
if (args->bc_xprt->xpt_bc_xprt) {
/*
* This server connection already has a backchannel
- * export; we can't create a new one, as we wouldn't be
- * able to match replies based on xid any more. So,
+ * transport; we can't create a new one, as we wouldn't
+ * be able to match replies based on xid any more. So,
* reuse the already-existing one:
*/
return args->bc_xprt->xpt_bc_xprt;