From 69b92b5b7419846e2a0d61a097b11b17a089e046 Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 21 Jun 2017 13:40:12 -0700 Subject: rds: tcp: send handshake ping-probe from passive endpoint The RDS handshake ping probe added by commit 5916e2c1554f ("RDS: TCP: Enable multipath RDS for TCP") is sent from rds_sendmsg() before the first data packet is sent to a peer. If the conversation is not bidirectional (i.e., one side is always passive and never invokes rds_sendmsg()) and the passive side restarts its rds_tcp module, a new HS ping probe needs to be sent, so that the number of paths can be re-established. This patch achieves that by sending a HS ping probe from rds_tcp_accept_one() when c_npaths is 0 (i.e., we have not done a handshake probe with this peer yet). Signed-off-by: Sowmini Varadhan Tested-by: Jenny Xu Signed-off-by: David S. Miller --- net/rds/rds.h | 1 + net/rds/recv.c | 6 +++--- net/rds/send.c | 14 ++++++-------- net/rds/tcp_listen.c | 2 ++ 4 files changed, 12 insertions(+), 11 deletions(-) diff --git a/net/rds/rds.h b/net/rds/rds.h index d6a04a05eb79..aa696b361e20 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -827,6 +827,7 @@ void rds_send_drop_acked(struct rds_connection *conn, u64 ack, is_acked_func is_acked); void rds_send_path_drop_acked(struct rds_conn_path *cp, u64 ack, is_acked_func is_acked); +void rds_send_ping(struct rds_connection *conn, int cp_index); int rds_send_pong(struct rds_conn_path *cp, __be16 dport); /* rdma.c */ diff --git a/net/rds/recv.c b/net/rds/recv.c index 49493dbc43a1..373a6aa1d976 100644 --- a/net/rds/recv.c +++ b/net/rds/recv.c @@ -227,6 +227,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, } /* if RDS_EXTHDR_NPATHS was not found, default to a single-path */ conn->c_npaths = max_t(int, conn->c_npaths, 1); + conn->c_ping_triggered = 0; rds_conn_peer_gen_update(conn, new_peer_gen_num); } @@ -244,8 +245,7 @@ static void rds_recv_hs_exthdrs(struct rds_header *hdr, * called after reception of the probe-pong on all mprds_paths. * Otherwise (sender of probe-ping is not the smaller ip addr): just call * rds_conn_path_connect_if_down on the hashed path. (see rule 4) - * 4. when cp_index > 0, rds_connect_worker must only trigger - * a connection if laddr < faddr. + * 4. rds_connect_worker must only trigger a connection if laddr < faddr. * 5. sender may end up queuing the packet on the cp. will get sent out later. * when connection is completed. */ @@ -256,7 +256,7 @@ static void rds_start_mprds(struct rds_connection *conn) if (conn->c_npaths > 1 && IS_CANONICAL(conn->c_laddr, conn->c_faddr)) { - for (i = 1; i < conn->c_npaths; i++) { + for (i = 0; i < conn->c_npaths; i++) { cp = &conn->c_path[i]; rds_conn_path_connect_if_down(cp); } diff --git a/net/rds/send.c b/net/rds/send.c index 3652a50397c7..e81aa176f4e2 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -971,8 +971,6 @@ static int rds_cmsg_send(struct rds_sock *rs, struct rds_message *rm, return ret; } -static void rds_send_ping(struct rds_connection *conn); - static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) { int hash; @@ -982,7 +980,7 @@ static int rds_send_mprds_hash(struct rds_sock *rs, struct rds_connection *conn) else hash = RDS_MPATH_HASH(rs, conn->c_npaths); if (conn->c_npaths == 0 && hash != 0) { - rds_send_ping(conn); + rds_send_ping(conn, 0); if (conn->c_npaths == 0) { wait_event_interruptible(conn->c_hs_waitq, @@ -1282,11 +1280,11 @@ rds_send_pong(struct rds_conn_path *cp, __be16 dport) return rds_send_probe(cp, 0, dport, 0); } -static void -rds_send_ping(struct rds_connection *conn) +void +rds_send_ping(struct rds_connection *conn, int cp_index) { unsigned long flags; - struct rds_conn_path *cp = &conn->c_path[0]; + struct rds_conn_path *cp = &conn->c_path[cp_index]; spin_lock_irqsave(&cp->cp_lock, flags); if (conn->c_ping_triggered) { @@ -1295,6 +1293,6 @@ rds_send_ping(struct rds_connection *conn) } conn->c_ping_triggered = 1; spin_unlock_irqrestore(&cp->cp_lock, flags); - rds_send_probe(&conn->c_path[0], cpu_to_be16(RDS_FLAG_PROBE_PORT), - 0, 0); + rds_send_probe(cp, cpu_to_be16(RDS_FLAG_PROBE_PORT), 0, 0); } +EXPORT_SYMBOL_GPL(rds_send_ping); diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index df291ac245d6..6089e9a8e00a 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -192,6 +192,8 @@ int rds_tcp_accept_one(struct socket *sock) } new_sock = NULL; ret = 0; + if (conn->c_npaths == 0) + rds_send_ping(cp->cp_conn, cp->cp_index); goto out; rst_nsk: /* reset the newly returned accept sock and bail. -- cgit From c14b0366813a8f6df7de181d8a333af7db6fb84e Mon Sep 17 00:00:00 2001 From: Sowmini Varadhan Date: Wed, 21 Jun 2017 13:40:13 -0700 Subject: rds: tcp: set linger to 1 when unloading a rds-tcp If we are unloading the rds_tcp module, we can set linger to 1 and drop pending packets to accelerate reconnect. The peer will end up resetting the connection based on new generation numbers of the new incarnation, so hanging on to unsent TCP packets via linger is mostly pointless in this case. Signed-off-by: Sowmini Varadhan Tested-by: Jenny Xu Signed-off-by: David S. Miller --- net/rds/connection.c | 1 + net/rds/rds.h | 3 ++- net/rds/tcp.h | 1 + net/rds/tcp_connect.c | 2 ++ net/rds/tcp_listen.c | 2 +- 5 files changed, 7 insertions(+), 2 deletions(-) diff --git a/net/rds/connection.c b/net/rds/connection.c index 382443b060cb..50a3789ac23e 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -412,6 +412,7 @@ void rds_conn_destroy(struct rds_connection *conn) "%pI4\n", conn, &conn->c_laddr, &conn->c_faddr); + conn->c_destroy_in_prog = 1; /* Ensure conn will not be scheduled for reconnect */ spin_lock_irq(&rds_conn_lock); hlist_del_init_rcu(&conn->c_hash_node); diff --git a/net/rds/rds.h b/net/rds/rds.h index aa696b361e20..4a25db7075b1 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -137,7 +137,8 @@ struct rds_connection { __be32 c_faddr; unsigned int c_loopback:1, c_ping_triggered:1, - c_pad_to_32:30; + c_destroy_in_prog:1, + c_pad_to_32:29; int c_npaths; struct rds_connection *c_passive; struct rds_transport *c_trans; diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 56ea6620fcf9..f8800b7ce79c 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -71,6 +71,7 @@ void rds_tcp_listen_data_ready(struct sock *sk); int rds_tcp_accept_one(struct socket *sock); int rds_tcp_keepalive(struct socket *sock); void *rds_tcp_listen_sock_def_readable(struct net *net); +void rds_tcp_set_linger(struct socket *sock); /* tcp_recv.c */ int rds_tcp_recv_init(void); diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 5a62a083bb5a..cbe08a1fa4c7 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -170,6 +170,8 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp) cp->cp_conn, tc, sock); if (sock) { + if (cp->cp_conn->c_destroy_in_prog) + rds_tcp_set_linger(sock); sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN); lock_sock(sock->sk); rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */ diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c index 6089e9a8e00a..c6dc8caaf5ca 100644 --- a/net/rds/tcp_listen.c +++ b/net/rds/tcp_listen.c @@ -112,7 +112,7 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn) return NULL; } -static void rds_tcp_set_linger(struct socket *sock) +void rds_tcp_set_linger(struct socket *sock) { struct linger no_linger = { .l_onoff = 1, -- cgit