diff options
author | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
---|---|---|
committer | Mark Brown <broonie@kernel.org> | 2016-11-04 12:16:38 -0600 |
commit | cc9b94029e9ef51787af908e9856b1eed314bc00 (patch) | |
tree | 9675310b89d0f6fb1f7bd9423f0638c4ee5226fd /net/rds/tcp.c | |
parent | 13bed58ce8748d430a26e353a09b89f9d613a71f (diff) | |
parent | 1b5b42216469b05ef4b5916cb40b127dfab1da88 (diff) |
Merge branch 'topic/error' of git://git.kernel.org/pub/scm/linux/kernel/git/broonie/regulator into regulator-fixed
Diffstat (limited to 'net/rds/tcp.c')
-rw-r--r-- | net/rds/tcp.c | 195 |
1 files changed, 148 insertions, 47 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 86187dad1440..fcddacc92e01 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -56,8 +56,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *fpos); -int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; -int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; +static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; +static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; static struct ctl_table rds_tcp_sysctl_table[] = { #define RDS_TCP_SNDBUF 0 @@ -126,13 +126,72 @@ void rds_tcp_restore_callbacks(struct socket *sock, } /* - * This is the only path that sets tc->t_sock. Send and receive trust that - * it is set. The RDS_CONN_UP bit protects those paths from being - * called while it isn't set. + * rds_tcp_reset_callbacks() switches the to the new sock and + * returns the existing tc->t_sock. + * + * The only functions that set tc->t_sock are rds_tcp_set_callbacks + * and rds_tcp_reset_callbacks. Send and receive trust that + * it is set. The absence of RDS_CONN_UP bit protects those paths + * from being called while it isn't set. */ -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +void rds_tcp_reset_callbacks(struct socket *sock, + struct rds_conn_path *cp) { - struct rds_tcp_connection *tc = conn->c_transport_data; + struct rds_tcp_connection *tc = cp->cp_transport_data; + struct socket *osock = tc->t_sock; + + if (!osock) + goto newsock; + + /* Need to resolve a duelling SYN between peers. + * We have an outstanding SYN to this peer, which may + * potentially have transitioned to the RDS_CONN_UP state, + * so we must quiesce any send threads before resetting + * cp_transport_data. We quiesce these threads by setting + * cp_state to something other than RDS_CONN_UP, and then + * waiting for any existing threads in rds_send_xmit to + * complete release_in_xmit(). (Subsequent threads entering + * rds_send_xmit() will bail on !rds_conn_up(). + * + * However an incoming syn-ack at this point would end up + * marking the conn as RDS_CONN_UP, and would again permit + * rds_send_xmi() threads through, so ideally we would + * synchronize on RDS_CONN_UP after lock_sock(), but cannot + * do that: waiting on !RDS_IN_XMIT after lock_sock() may + * end up deadlocking with tcp_sendmsg(), and the RDS_IN_XMIT + * would not get set. As a result, we set c_state to + * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change + * cannot mark rds_conn_path_up() in the window before lock_sock() + */ + atomic_set(&cp->cp_state, RDS_CONN_RESETTING); + wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags)); + lock_sock(osock->sk); + /* reset receive side state for rds_tcp_data_recv() for osock */ + cancel_delayed_work_sync(&cp->cp_send_w); + cancel_delayed_work_sync(&cp->cp_recv_w); + if (tc->t_tinc) { + rds_inc_put(&tc->t_tinc->ti_inc); + tc->t_tinc = NULL; + } + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; + rds_tcp_restore_callbacks(osock, tc); + release_sock(osock->sk); + sock_release(osock); +newsock: + rds_send_path_reset(cp); + lock_sock(sock->sk); + rds_tcp_set_callbacks(sock, cp); + release_sock(sock->sk); +} + +/* Add tc to rds_tcp_tc_list and set tc->t_sock. See comments + * above rds_tcp_reset_callbacks for notes about synchronization + * with data path + */ +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp) +{ + struct rds_tcp_connection *tc = cp->cp_transport_data; rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc); write_lock_bh(&sock->sk->sk_callback_lock); @@ -148,12 +207,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) sock->sk->sk_data_ready = sock->sk->sk_user_data; tc->t_sock = sock; - tc->conn = conn; + tc->t_cpath = cp; tc->t_orig_data_ready = sock->sk->sk_data_ready; tc->t_orig_write_space = sock->sk->sk_write_space; tc->t_orig_state_change = sock->sk->sk_state_change; - sock->sk->sk_user_data = conn; + sock->sk->sk_user_data = cp; sock->sk->sk_data_ready = rds_tcp_data_ready; sock->sk->sk_write_space = rds_tcp_write_space; sock->sk->sk_state_change = rds_tcp_state_change; @@ -211,24 +270,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr) static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp) { struct rds_tcp_connection *tc; + int i; - tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); - if (!tc) - return -ENOMEM; + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); + if (!tc) + return -ENOMEM; - mutex_init(&tc->t_conn_lock); - tc->t_sock = NULL; - tc->t_tinc = NULL; - tc->t_tinc_hdr_rem = sizeof(struct rds_header); - tc->t_tinc_data_rem = 0; + mutex_init(&tc->t_conn_path_lock); + tc->t_sock = NULL; + tc->t_tinc = NULL; + tc->t_tinc_hdr_rem = sizeof(struct rds_header); + tc->t_tinc_data_rem = 0; - conn->c_transport_data = tc; + conn->c_path[i].cp_transport_data = tc; + tc->t_cpath = &conn->c_path[i]; - spin_lock_irq(&rds_tcp_conn_lock); - list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); - spin_unlock_irq(&rds_tcp_conn_lock); + spin_lock_irq(&rds_tcp_conn_lock); + list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); + spin_unlock_irq(&rds_tcp_conn_lock); + rdsdebug("rds_conn_path [%d] tc %p\n", i, + conn->c_path[i].cp_transport_data); + } - rdsdebug("alloced tc %p\n", conn->c_transport_data); return 0; } @@ -245,6 +309,17 @@ static void rds_tcp_conn_free(void *arg) kmem_cache_free(rds_tcp_conn_slab, tc); } +static bool list_has_conn(struct list_head *list, struct rds_connection *conn) +{ + struct rds_tcp_connection *tc, *_tc; + + list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { + if (tc->t_cpath->cp_conn == conn) + return true; + } + return false; +} + static void rds_tcp_destroy_conns(void) { struct rds_tcp_connection *tc, *_tc; @@ -252,29 +327,28 @@ static void rds_tcp_destroy_conns(void) /* avoid calling conn_destroy with irqs off */ spin_lock_irq(&rds_tcp_conn_lock); - list_splice(&rds_tcp_conn_list, &tmp_list); - INIT_LIST_HEAD(&rds_tcp_conn_list); + list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); + } spin_unlock_irq(&rds_tcp_conn_lock); - list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); - } + list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) + rds_conn_destroy(tc->t_cpath->cp_conn); } static void rds_tcp_exit(void); struct rds_transport rds_tcp_transport = { .laddr_check = rds_tcp_laddr_check, - .xmit_prepare = rds_tcp_xmit_prepare, - .xmit_complete = rds_tcp_xmit_complete, + .xmit_path_prepare = rds_tcp_xmit_path_prepare, + .xmit_path_complete = rds_tcp_xmit_path_complete, .xmit = rds_tcp_xmit, - .recv = rds_tcp_recv, + .recv_path = rds_tcp_recv_path, .conn_alloc = rds_tcp_conn_alloc, .conn_free = rds_tcp_conn_free, - .conn_connect = rds_tcp_conn_connect, - .conn_shutdown = rds_tcp_conn_shutdown, + .conn_path_connect = rds_tcp_conn_path_connect, + .conn_path_shutdown = rds_tcp_conn_path_shutdown, .inc_copy_to_user = rds_tcp_inc_copy_to_user, .inc_free = rds_tcp_inc_free, .stats_info_copy = rds_tcp_stats_info_copy, @@ -283,6 +357,7 @@ struct rds_transport rds_tcp_transport = { .t_name = "tcp", .t_type = RDS_TRANS_TCP, .t_prefer_loopback = 1, + .t_mp_capable = 1, }; static int rds_tcp_netid; @@ -416,10 +491,30 @@ static struct pernet_operations rds_tcp_net_ops = { .size = sizeof(struct rds_tcp_net), }; +/* explicitly send a RST on each socket, thereby releasing any socket refcnts + * that may otherwise hold up netns deletion. + */ +static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) +{ + struct rds_conn_path *cp; + struct rds_tcp_connection *tc; + int i; + struct sock *sk; + + for (i = 0; i < RDS_MPATH_WORKERS; i++) { + cp = &conn->c_path[i]; + tc = cp->cp_transport_data; + if (!tc->t_sock) + continue; + sk = tc->t_sock->sk; + sk->sk_prot->disconnect(sk, 0); + tcp_done(sk); + } +} + static void rds_tcp_kill_sock(struct net *net) { struct rds_tcp_connection *tc, *_tc; - struct sock *sk; LIST_HEAD(tmp_list); struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); @@ -428,23 +523,27 @@ static void rds_tcp_kill_sock(struct net *net) flush_work(&rtn->rds_tcp_accept_w); spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - list_move_tail(&tc->t_tcp_node, &tmp_list); + if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) + list_move_tail(&tc->t_tcp_node, &tmp_list); } spin_unlock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { - sk = tc->t_sock->sk; - sk->sk_prot->disconnect(sk, 0); - tcp_done(sk); - if (tc->conn->c_passive) - rds_conn_destroy(tc->conn->c_passive); - rds_conn_destroy(tc->conn); + rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); + rds_conn_destroy(tc->t_cpath->cp_conn); } } +void *rds_tcp_listen_sock_def_readable(struct net *net) +{ + struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + + return rtn->rds_tcp_listen_sock->sk->sk_user_data; +} + static int rds_tcp_dev_event(struct notifier_block *this, unsigned long event, void *ptr) { @@ -479,12 +578,13 @@ static void rds_tcp_sysctl_reset(struct net *net) spin_lock_irq(&rds_tcp_conn_lock); list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { - struct net *c_net = read_pnet(&tc->conn->c_net); + struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net); if (net != c_net || !tc->t_sock) continue; - rds_conn_drop(tc->conn); /* reconnect with new parameters */ + /* reconnect with new parameters */ + rds_conn_path_drop(tc->t_cpath); } spin_unlock_irq(&rds_tcp_conn_lock); } @@ -544,7 +644,7 @@ static int rds_tcp_init(void) ret = rds_tcp_recv_init(); if (ret) - goto out_slab; + goto out_pernet; ret = rds_trans_register(&rds_tcp_transport); if (ret) @@ -556,8 +656,9 @@ static int rds_tcp_init(void) out_recv: rds_tcp_recv_exit(); -out_slab: +out_pernet: unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab: kmem_cache_destroy(rds_tcp_conn_slab); out: return ret; |