diff options
Diffstat (limited to 'net/rds/tcp.c')
| -rw-r--r-- | net/rds/tcp.c | 165 | 
1 files changed, 97 insertions, 68 deletions
diff --git a/net/rds/tcp.c b/net/rds/tcp.c index 74ee126a6fe6..fcddacc92e01 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -56,8 +56,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,  				 void __user *buffer, size_t *lenp,  				 loff_t *fpos); -int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; -int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF; +static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF; +static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;  static struct ctl_table rds_tcp_sysctl_table[] = {  #define	RDS_TCP_SNDBUF	0 @@ -135,9 +135,9 @@ void rds_tcp_restore_callbacks(struct socket *sock,   * from being called while it isn't set.   */  void rds_tcp_reset_callbacks(struct socket *sock, -			     struct rds_connection *conn) +			     struct rds_conn_path *cp)  { -	struct rds_tcp_connection *tc = conn->c_transport_data; +	struct rds_tcp_connection *tc = cp->cp_transport_data;  	struct socket *osock = tc->t_sock;  	if (!osock) @@ -147,8 +147,8 @@ void rds_tcp_reset_callbacks(struct socket *sock,  	 * We have an outstanding SYN to this peer, which may  	 * potentially have transitioned to the RDS_CONN_UP state,  	 * so we must quiesce any send threads before resetting -	 * c_transport_data. We quiesce these threads by setting -	 * c_state to something other than RDS_CONN_UP, and then +	 * cp_transport_data. We quiesce these threads by setting +	 * cp_state to something other than RDS_CONN_UP, and then  	 * waiting for any existing threads in rds_send_xmit to  	 * complete release_in_xmit(). (Subsequent threads entering  	 * rds_send_xmit() will bail on !rds_conn_up(). @@ -163,38 +163,25 @@ void rds_tcp_reset_callbacks(struct socket *sock,  	 * RDS_CONN_RESETTTING, to ensure that rds_tcp_state_change  	 * cannot mark rds_conn_path_up() in the window before lock_sock()  	 */ -	atomic_set(&conn->c_state, RDS_CONN_RESETTING); -	wait_event(conn->c_waitq, !test_bit(RDS_IN_XMIT, &conn->c_flags)); +	atomic_set(&cp->cp_state, RDS_CONN_RESETTING); +	wait_event(cp->cp_waitq, !test_bit(RDS_IN_XMIT, &cp->cp_flags));  	lock_sock(osock->sk);  	/* reset receive side state for rds_tcp_data_recv() for osock  */ +	cancel_delayed_work_sync(&cp->cp_send_w); +	cancel_delayed_work_sync(&cp->cp_recv_w);  	if (tc->t_tinc) {  		rds_inc_put(&tc->t_tinc->ti_inc);  		tc->t_tinc = NULL;  	}  	tc->t_tinc_hdr_rem = sizeof(struct rds_header);  	tc->t_tinc_data_rem = 0; -	tc->t_sock = NULL; - -	write_lock_bh(&osock->sk->sk_callback_lock); - -	osock->sk->sk_user_data = NULL; -	osock->sk->sk_data_ready = tc->t_orig_data_ready; -	osock->sk->sk_write_space = tc->t_orig_write_space; -	osock->sk->sk_state_change = tc->t_orig_state_change; -	write_unlock_bh(&osock->sk->sk_callback_lock); +	rds_tcp_restore_callbacks(osock, tc);  	release_sock(osock->sk);  	sock_release(osock);  newsock: -	rds_send_reset(conn); +	rds_send_path_reset(cp);  	lock_sock(sock->sk); -	write_lock_bh(&sock->sk->sk_callback_lock); -	tc->t_sock = sock; -	sock->sk->sk_user_data = conn; -	sock->sk->sk_data_ready = rds_tcp_data_ready; -	sock->sk->sk_write_space = rds_tcp_write_space; -	sock->sk->sk_state_change = rds_tcp_state_change; - -	write_unlock_bh(&sock->sk->sk_callback_lock); +	rds_tcp_set_callbacks(sock, cp);  	release_sock(sock->sk);  } @@ -202,9 +189,9 @@ newsock:   * above rds_tcp_reset_callbacks for notes about synchronization   * with data path   */ -void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn) +void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp)  { -	struct rds_tcp_connection *tc = conn->c_transport_data; +	struct rds_tcp_connection *tc = cp->cp_transport_data;  	rdsdebug("setting sock %p callbacks to tc %p\n", sock, tc);  	write_lock_bh(&sock->sk->sk_callback_lock); @@ -220,12 +207,12 @@ void rds_tcp_set_callbacks(struct socket *sock, struct rds_connection *conn)  		sock->sk->sk_data_ready = sock->sk->sk_user_data;  	tc->t_sock = sock; -	tc->conn = conn; +	tc->t_cpath = cp;  	tc->t_orig_data_ready = sock->sk->sk_data_ready;  	tc->t_orig_write_space = sock->sk->sk_write_space;  	tc->t_orig_state_change = sock->sk->sk_state_change; -	sock->sk->sk_user_data = conn; +	sock->sk->sk_user_data = cp;  	sock->sk->sk_data_ready = rds_tcp_data_ready;  	sock->sk->sk_write_space = rds_tcp_write_space;  	sock->sk->sk_state_change = rds_tcp_state_change; @@ -283,24 +270,29 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)  static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)  {  	struct rds_tcp_connection *tc; +	int i; -	tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); -	if (!tc) -		return -ENOMEM; +	for (i = 0; i < RDS_MPATH_WORKERS; i++) { +		tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); +		if (!tc) +			return -ENOMEM; -	mutex_init(&tc->t_conn_lock); -	tc->t_sock = NULL; -	tc->t_tinc = NULL; -	tc->t_tinc_hdr_rem = sizeof(struct rds_header); -	tc->t_tinc_data_rem = 0; +		mutex_init(&tc->t_conn_path_lock); +		tc->t_sock = NULL; +		tc->t_tinc = NULL; +		tc->t_tinc_hdr_rem = sizeof(struct rds_header); +		tc->t_tinc_data_rem = 0; -	conn->c_transport_data = tc; +		conn->c_path[i].cp_transport_data = tc; +		tc->t_cpath = &conn->c_path[i]; -	spin_lock_irq(&rds_tcp_conn_lock); -	list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); -	spin_unlock_irq(&rds_tcp_conn_lock); +		spin_lock_irq(&rds_tcp_conn_lock); +		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); +		spin_unlock_irq(&rds_tcp_conn_lock); +		rdsdebug("rds_conn_path [%d] tc %p\n", i, +			 conn->c_path[i].cp_transport_data); +	} -	rdsdebug("alloced tc %p\n", conn->c_transport_data);  	return 0;  } @@ -317,6 +309,17 @@ static void rds_tcp_conn_free(void *arg)  	kmem_cache_free(rds_tcp_conn_slab, tc);  } +static bool list_has_conn(struct list_head *list, struct rds_connection *conn) +{ +	struct rds_tcp_connection *tc, *_tc; + +	list_for_each_entry_safe(tc, _tc, list, t_tcp_node) { +		if (tc->t_cpath->cp_conn == conn) +			return true; +	} +	return false; +} +  static void rds_tcp_destroy_conns(void)  {  	struct rds_tcp_connection *tc, *_tc; @@ -324,29 +327,28 @@ static void rds_tcp_destroy_conns(void)  	/* avoid calling conn_destroy with irqs off */  	spin_lock_irq(&rds_tcp_conn_lock); -	list_splice(&rds_tcp_conn_list, &tmp_list); -	INIT_LIST_HEAD(&rds_tcp_conn_list); +	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { +		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) +			list_move_tail(&tc->t_tcp_node, &tmp_list); +	}  	spin_unlock_irq(&rds_tcp_conn_lock); -	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { -		if (tc->conn->c_passive) -			rds_conn_destroy(tc->conn->c_passive); -		rds_conn_destroy(tc->conn); -	} +	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) +		rds_conn_destroy(tc->t_cpath->cp_conn);  }  static void rds_tcp_exit(void);  struct rds_transport rds_tcp_transport = {  	.laddr_check		= rds_tcp_laddr_check, -	.xmit_prepare		= rds_tcp_xmit_prepare, -	.xmit_complete		= rds_tcp_xmit_complete, +	.xmit_path_prepare	= rds_tcp_xmit_path_prepare, +	.xmit_path_complete	= rds_tcp_xmit_path_complete,  	.xmit			= rds_tcp_xmit, -	.recv			= rds_tcp_recv, +	.recv_path		= rds_tcp_recv_path,  	.conn_alloc		= rds_tcp_conn_alloc,  	.conn_free		= rds_tcp_conn_free, -	.conn_connect		= rds_tcp_conn_connect, -	.conn_shutdown		= rds_tcp_conn_shutdown, +	.conn_path_connect	= rds_tcp_conn_path_connect, +	.conn_path_shutdown	= rds_tcp_conn_path_shutdown,  	.inc_copy_to_user	= rds_tcp_inc_copy_to_user,  	.inc_free		= rds_tcp_inc_free,  	.stats_info_copy	= rds_tcp_stats_info_copy, @@ -355,6 +357,7 @@ struct rds_transport rds_tcp_transport = {  	.t_name			= "tcp",  	.t_type			= RDS_TRANS_TCP,  	.t_prefer_loopback	= 1, +	.t_mp_capable		= 1,  };  static int rds_tcp_netid; @@ -488,10 +491,30 @@ static struct pernet_operations rds_tcp_net_ops = {  	.size = sizeof(struct rds_tcp_net),  }; +/* explicitly send a RST on each socket, thereby releasing any socket refcnts + * that may otherwise hold up netns deletion. + */ +static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) +{ +	struct rds_conn_path *cp; +	struct rds_tcp_connection *tc; +	int i; +	struct sock *sk; + +	for (i = 0; i < RDS_MPATH_WORKERS; i++) { +		cp = &conn->c_path[i]; +		tc = cp->cp_transport_data; +		if (!tc->t_sock) +			continue; +		sk = tc->t_sock->sk; +		sk->sk_prot->disconnect(sk, 0); +		tcp_done(sk); +	} +} +  static void rds_tcp_kill_sock(struct net *net)  {  	struct rds_tcp_connection *tc, *_tc; -	struct sock *sk;  	LIST_HEAD(tmp_list);  	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); @@ -500,23 +523,27 @@ static void rds_tcp_kill_sock(struct net *net)  	flush_work(&rtn->rds_tcp_accept_w);  	spin_lock_irq(&rds_tcp_conn_lock);  	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { -		struct net *c_net = read_pnet(&tc->conn->c_net); +		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);  		if (net != c_net || !tc->t_sock)  			continue; -		list_move_tail(&tc->t_tcp_node, &tmp_list); +		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) +			list_move_tail(&tc->t_tcp_node, &tmp_list);  	}  	spin_unlock_irq(&rds_tcp_conn_lock);  	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { -		sk = tc->t_sock->sk; -		sk->sk_prot->disconnect(sk, 0); -		tcp_done(sk); -		if (tc->conn->c_passive) -			rds_conn_destroy(tc->conn->c_passive); -		rds_conn_destroy(tc->conn); +		rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); +		rds_conn_destroy(tc->t_cpath->cp_conn);  	}  } +void *rds_tcp_listen_sock_def_readable(struct net *net) +{ +	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid); + +	return rtn->rds_tcp_listen_sock->sk->sk_user_data; +} +  static int rds_tcp_dev_event(struct notifier_block *this,  			     unsigned long event, void *ptr)  { @@ -551,12 +578,13 @@ static void rds_tcp_sysctl_reset(struct net *net)  	spin_lock_irq(&rds_tcp_conn_lock);  	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { -		struct net *c_net = read_pnet(&tc->conn->c_net); +		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);  		if (net != c_net || !tc->t_sock)  			continue; -		rds_conn_drop(tc->conn); /* reconnect with new parameters */ +		/* reconnect with new parameters */ +		rds_conn_path_drop(tc->t_cpath);  	}  	spin_unlock_irq(&rds_tcp_conn_lock);  } @@ -616,7 +644,7 @@ static int rds_tcp_init(void)  	ret = rds_tcp_recv_init();  	if (ret) -		goto out_slab; +		goto out_pernet;  	ret = rds_trans_register(&rds_tcp_transport);  	if (ret) @@ -628,8 +656,9 @@ static int rds_tcp_init(void)  out_recv:  	rds_tcp_recv_exit(); -out_slab: +out_pernet:  	unregister_pernet_subsys(&rds_tcp_net_ops); +out_slab:  	kmem_cache_destroy(rds_tcp_conn_slab);  out:  	return ret;  |