diff options
Diffstat (limited to 'net/rds')
| -rw-r--r-- | net/rds/af_rds.c | 20 | ||||
| -rw-r--r-- | net/rds/bind.c | 1 | ||||
| -rw-r--r-- | net/rds/cong.c | 10 | ||||
| -rw-r--r-- | net/rds/connection.c | 32 | ||||
| -rw-r--r-- | net/rds/ib.c | 26 | ||||
| -rw-r--r-- | net/rds/ib_cm.c | 1 | ||||
| -rw-r--r-- | net/rds/rds.h | 17 | ||||
| -rw-r--r-- | net/rds/send.c | 37 | ||||
| -rw-r--r-- | net/rds/tcp.c | 113 | ||||
| -rw-r--r-- | net/rds/tcp.h | 1 | ||||
| -rw-r--r-- | net/rds/tcp_connect.c | 2 | ||||
| -rw-r--r-- | net/rds/tcp_recv.c | 8 | ||||
| -rw-r--r-- | net/rds/tcp_send.c | 5 | ||||
| -rw-r--r-- | net/rds/threads.c | 20 | 
14 files changed, 196 insertions, 97 deletions
| diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c index b405f77d664c..744c637c86b0 100644 --- a/net/rds/af_rds.c +++ b/net/rds/af_rds.c @@ -137,27 +137,27 @@ static int rds_getname(struct socket *sock, struct sockaddr *uaddr,  /*   * RDS' poll is without a doubt the least intuitive part of the interface, - * as POLLIN and POLLOUT do not behave entirely as you would expect from + * as EPOLLIN and EPOLLOUT do not behave entirely as you would expect from   * a network protocol.   * - * POLLIN is asserted if + * EPOLLIN is asserted if   *  -	there is data on the receive queue.   *  -	to signal that a previously congested destination may have become   *	uncongested   *  -	A notification has been queued to the socket (this can be a congestion   *	update, or a RDMA completion).   * - * POLLOUT is asserted if there is room on the send queue. This does not mean + * EPOLLOUT is asserted if there is room on the send queue. This does not mean   * however, that the next sendmsg() call will succeed. If the application tries   * to send to a congested destination, the system call may still fail (and   * return ENOBUFS).   */ -static unsigned int rds_poll(struct file *file, struct socket *sock, +static __poll_t rds_poll(struct file *file, struct socket *sock,  			     poll_table *wait)  {  	struct sock *sk = sock->sk;  	struct rds_sock *rs = rds_sk_to_rs(sk); -	unsigned int mask = 0; +	__poll_t mask = 0;  	unsigned long flags;  	poll_wait(file, sk_sleep(sk), wait); @@ -167,22 +167,22 @@ static unsigned int rds_poll(struct file *file, struct socket *sock,  	read_lock_irqsave(&rs->rs_recv_lock, flags);  	if (!rs->rs_cong_monitor) { -		/* When a congestion map was updated, we signal POLLIN for +		/* When a congestion map was updated, we signal EPOLLIN for  		 * "historical" reasons. Applications can also poll for  		 * WRBAND instead. */  		if (rds_cong_updated_since(&rs->rs_cong_track)) -			mask |= (POLLIN | POLLRDNORM | POLLWRBAND); +			mask |= (EPOLLIN | EPOLLRDNORM | EPOLLWRBAND);  	} else {  		spin_lock(&rs->rs_lock);  		if (rs->rs_cong_notify) -			mask |= (POLLIN | POLLRDNORM); +			mask |= (EPOLLIN | EPOLLRDNORM);  		spin_unlock(&rs->rs_lock);  	}  	if (!list_empty(&rs->rs_recv_queue) ||  	    !list_empty(&rs->rs_notify_queue)) -		mask |= (POLLIN | POLLRDNORM); +		mask |= (EPOLLIN | EPOLLRDNORM);  	if (rs->rs_snd_bytes < rds_sk_sndbuf(rs)) -		mask |= (POLLOUT | POLLWRNORM); +		mask |= (EPOLLOUT | EPOLLWRNORM);  	read_unlock_irqrestore(&rs->rs_recv_lock, flags);  	/* clear state any time we wake a seen-congested socket */ diff --git a/net/rds/bind.c b/net/rds/bind.c index 75d43dc8e96b..5aa3a64aa4f0 100644 --- a/net/rds/bind.c +++ b/net/rds/bind.c @@ -114,6 +114,7 @@ static int rds_add_bound(struct rds_sock *rs, __be32 addr, __be16 *port)  			  rs, &addr, (int)ntohs(*port));  			break;  		} else { +			rs->rs_bound_addr = 0;  			rds_sock_put(rs);  			ret = -ENOMEM;  			break; diff --git a/net/rds/cong.c b/net/rds/cong.c index 8398fee7c866..63da9d2f142d 100644 --- a/net/rds/cong.c +++ b/net/rds/cong.c @@ -219,7 +219,11 @@ void rds_cong_queue_updates(struct rds_cong_map *map)  	spin_lock_irqsave(&rds_cong_lock, flags);  	list_for_each_entry(conn, &map->m_conn_list, c_map_item) { -		if (!test_and_set_bit(0, &conn->c_map_queued)) { +		struct rds_conn_path *cp = &conn->c_path[0]; + +		rcu_read_lock(); +		if (!test_and_set_bit(0, &conn->c_map_queued) && +		    !rds_destroy_pending(cp->cp_conn)) {  			rds_stats_inc(s_cong_update_queued);  			/* We cannot inline the call to rds_send_xmit() here  			 * for two reasons (both pertaining to a TCP transport): @@ -235,9 +239,9 @@ void rds_cong_queue_updates(struct rds_cong_map *map)  			 *    therefore trigger warnings.  			 * Defer the xmit to rds_send_worker() instead.  			 */ -			queue_delayed_work(rds_wq, -					   &conn->c_path[0].cp_send_w, 0); +			queue_delayed_work(rds_wq, &cp->cp_send_w, 0);  		} +		rcu_read_unlock();  	}  	spin_unlock_irqrestore(&rds_cong_lock, flags); diff --git a/net/rds/connection.c b/net/rds/connection.c index 7ee2d5d68b78..2da3176bf792 100644 --- a/net/rds/connection.c +++ b/net/rds/connection.c @@ -220,8 +220,13 @@ static struct rds_connection *__rds_conn_create(struct net *net,  				     is_outgoing);  		conn->c_path[i].cp_index = i;  	} -	ret = trans->conn_alloc(conn, gfp); +	rcu_read_lock(); +	if (rds_destroy_pending(conn)) +		ret = -ENETDOWN; +	else +		ret = trans->conn_alloc(conn, GFP_ATOMIC);  	if (ret) { +		rcu_read_unlock();  		kfree(conn->c_path);  		kmem_cache_free(rds_conn_slab, conn);  		conn = ERR_PTR(ret); @@ -230,8 +235,8 @@ static struct rds_connection *__rds_conn_create(struct net *net,  	rdsdebug("allocated conn %p for %pI4 -> %pI4 over %s %s\n",  	  conn, &laddr, &faddr, -	  trans->t_name ? trans->t_name : "[unknown]", -	  is_outgoing ? "(outgoing)" : ""); +	  strnlen(trans->t_name, sizeof(trans->t_name)) ? trans->t_name : +	  "[unknown]", is_outgoing ? "(outgoing)" : "");  	/*  	 * Since we ran without holding the conn lock, someone could @@ -283,6 +288,7 @@ static struct rds_connection *__rds_conn_create(struct net *net,  		}  	}  	spin_unlock_irqrestore(&rds_conn_lock, flags); +	rcu_read_unlock();  out:  	return conn; @@ -403,6 +409,11 @@ static void rds_conn_path_destroy(struct rds_conn_path *cp)  	if (cp->cp_xmit_rm)  		rds_message_put(cp->cp_xmit_rm); +	WARN_ON(delayed_work_pending(&cp->cp_send_w)); +	WARN_ON(delayed_work_pending(&cp->cp_recv_w)); +	WARN_ON(delayed_work_pending(&cp->cp_conn_w)); +	WARN_ON(work_pending(&cp->cp_down_w)); +  	cp->cp_conn->c_trans->conn_free(cp->cp_transport_data);  } @@ -424,7 +435,6 @@ void rds_conn_destroy(struct rds_connection *conn)  		 "%pI4\n", conn, &conn->c_laddr,  		 &conn->c_faddr); -	conn->c_destroy_in_prog = 1;  	/* Ensure conn will not be scheduled for reconnect */  	spin_lock_irq(&rds_conn_lock);  	hlist_del_init_rcu(&conn->c_hash_node); @@ -445,7 +455,6 @@ void rds_conn_destroy(struct rds_connection *conn)  	 */  	rds_cong_remove_conn(conn); -	put_net(conn->c_net);  	kfree(conn->c_path);  	kmem_cache_free(rds_conn_slab, conn); @@ -684,10 +693,13 @@ void rds_conn_path_drop(struct rds_conn_path *cp, bool destroy)  {  	atomic_set(&cp->cp_state, RDS_CONN_ERROR); -	if (!destroy && cp->cp_conn->c_destroy_in_prog) +	rcu_read_lock(); +	if (!destroy && rds_destroy_pending(cp->cp_conn)) { +		rcu_read_unlock();  		return; - +	}  	queue_work(rds_wq, &cp->cp_down_w); +	rcu_read_unlock();  }  EXPORT_SYMBOL_GPL(rds_conn_path_drop); @@ -704,9 +716,15 @@ EXPORT_SYMBOL_GPL(rds_conn_drop);   */  void rds_conn_path_connect_if_down(struct rds_conn_path *cp)  { +	rcu_read_lock(); +	if (rds_destroy_pending(cp->cp_conn)) { +		rcu_read_unlock(); +		return; +	}  	if (rds_conn_path_state(cp) == RDS_CONN_DOWN &&  	    !test_and_set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags))  		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); +	rcu_read_unlock();  }  EXPORT_SYMBOL_GPL(rds_conn_path_connect_if_down); diff --git a/net/rds/ib.c b/net/rds/ib.c index 36dd2099048a..50a88f3e7e39 100644 --- a/net/rds/ib.c +++ b/net/rds/ib.c @@ -48,6 +48,7 @@  static unsigned int rds_ib_mr_1m_pool_size = RDS_MR_1M_POOL_SIZE;  static unsigned int rds_ib_mr_8k_pool_size = RDS_MR_8K_POOL_SIZE;  unsigned int rds_ib_retry_count = RDS_IB_DEFAULT_RETRY_COUNT; +static atomic_t rds_ib_unloading;  module_param(rds_ib_mr_1m_pool_size, int, 0444);  MODULE_PARM_DESC(rds_ib_mr_1m_pool_size, " Max number of 1M mr per HCA"); @@ -301,13 +302,11 @@ static int rds_ib_conn_info_visitor(struct rds_connection *conn,  	memset(&iinfo->dst_gid, 0, sizeof(iinfo->dst_gid));  	if (rds_conn_state(conn) == RDS_CONN_UP) {  		struct rds_ib_device *rds_ibdev; -		struct rdma_dev_addr *dev_addr;  		ic = conn->c_transport_data; -		dev_addr = &ic->i_cm_id->route.addr.dev_addr; -		rdma_addr_get_sgid(dev_addr, (union ib_gid *) &iinfo->src_gid); -		rdma_addr_get_dgid(dev_addr, (union ib_gid *) &iinfo->dst_gid); +		rdma_read_gids(ic->i_cm_id, (union ib_gid *)&iinfo->src_gid, +			       (union ib_gid *)&iinfo->dst_gid);  		rds_ibdev = ic->rds_ibdev;  		iinfo->max_send_wr = ic->i_send_ring.w_nr; @@ -347,7 +346,8 @@ static int rds_ib_laddr_check(struct net *net, __be32 addr)  	/* Create a CMA ID and try to bind it. This catches both  	 * IB and iWARP capable NICs.  	 */ -	cm_id = rdma_create_id(&init_net, NULL, NULL, RDMA_PS_TCP, IB_QPT_RC); +	cm_id = rdma_create_id(&init_net, rds_rdma_cm_event_handler, +			       NULL, RDMA_PS_TCP, IB_QPT_RC);  	if (IS_ERR(cm_id))  		return PTR_ERR(cm_id); @@ -379,8 +379,23 @@ static void rds_ib_unregister_client(void)  	flush_workqueue(rds_wq);  } +static void rds_ib_set_unloading(void) +{ +	atomic_set(&rds_ib_unloading, 1); +} + +static bool rds_ib_is_unloading(struct rds_connection *conn) +{ +	struct rds_conn_path *cp = &conn->c_path[0]; + +	return (test_bit(RDS_DESTROY_PENDING, &cp->cp_flags) || +		atomic_read(&rds_ib_unloading) != 0); +} +  void rds_ib_exit(void)  { +	rds_ib_set_unloading(); +	synchronize_rcu();  	rds_info_deregister_func(RDS_INFO_IB_CONNECTIONS, rds_ib_ic_info);  	rds_ib_unregister_client();  	rds_ib_destroy_nodev_conns(); @@ -414,6 +429,7 @@ struct rds_transport rds_ib_transport = {  	.flush_mrs		= rds_ib_flush_mrs,  	.t_owner		= THIS_MODULE,  	.t_name			= "infiniband", +	.t_unloading		= rds_ib_is_unloading,  	.t_type			= RDS_TRANS_IB  }; diff --git a/net/rds/ib_cm.c b/net/rds/ib_cm.c index 80fb6f63e768..eea1d8611b20 100644 --- a/net/rds/ib_cm.c +++ b/net/rds/ib_cm.c @@ -117,6 +117,7 @@ void rds_ib_cm_connect_complete(struct rds_connection *conn, struct rdma_cm_even  			  &conn->c_laddr, &conn->c_faddr,  			  RDS_PROTOCOL_MAJOR(conn->c_version),  			  RDS_PROTOCOL_MINOR(conn->c_version)); +		set_bit(RDS_DESTROY_PENDING, &conn->c_path[0].cp_flags);  		rds_conn_destroy(conn);  		return;  	} else { diff --git a/net/rds/rds.h b/net/rds/rds.h index c349c71babff..7301b9b01890 100644 --- a/net/rds/rds.h +++ b/net/rds/rds.h @@ -88,6 +88,7 @@ enum {  #define RDS_RECONNECT_PENDING	1  #define RDS_IN_XMIT		2  #define RDS_RECV_REFILL		3 +#define	RDS_DESTROY_PENDING	4  /* Max number of multipaths per RDS connection. Must be a power of 2 */  #define	RDS_MPATH_WORKERS	8 @@ -139,8 +140,7 @@ struct rds_connection {  	__be32			c_faddr;  	unsigned int		c_loopback:1,  				c_ping_triggered:1, -				c_destroy_in_prog:1, -				c_pad_to_32:29; +				c_pad_to_32:30;  	int			c_npaths;  	struct rds_connection	*c_passive;  	struct rds_transport	*c_trans; @@ -150,7 +150,7 @@ struct rds_connection {  	/* Protocol version */  	unsigned int		c_version; -	struct net		*c_net; +	possible_net_t		c_net;  	struct list_head	c_map_item;  	unsigned long		c_map_queued; @@ -165,13 +165,13 @@ struct rds_connection {  static inline  struct net *rds_conn_net(struct rds_connection *conn)  { -	return conn->c_net; +	return read_pnet(&conn->c_net);  }  static inline  void rds_conn_net_set(struct rds_connection *conn, struct net *net)  { -	conn->c_net = get_net(net); +	write_pnet(&conn->c_net, net);  }  #define RDS_FLAG_CONG_BITMAP	0x01 @@ -518,6 +518,7 @@ struct rds_transport {  	void (*sync_mr)(void *trans_private, int direction);  	void (*free_mr)(void *trans_private, int invalidate);  	void (*flush_mrs)(void); +	bool (*t_unloading)(struct rds_connection *conn);  };  struct rds_sock { @@ -862,6 +863,12 @@ static inline void rds_mr_put(struct rds_mr *mr)  		__rds_put_mr_final(mr);  } +static inline bool rds_destroy_pending(struct rds_connection *conn) +{ +	return !check_net(rds_conn_net(conn)) || +	       (conn->c_trans->t_unloading && conn->c_trans->t_unloading(conn)); +} +  /* stats.c */  DECLARE_PER_CPU_SHARED_ALIGNED(struct rds_statistics, rds_stats);  #define rds_stats_inc_which(which, member) do {		\ diff --git a/net/rds/send.c b/net/rds/send.c index f72466c63f0c..b1b0022b8370 100644 --- a/net/rds/send.c +++ b/net/rds/send.c @@ -162,6 +162,12 @@ restart:  		goto out;  	} +	if (rds_destroy_pending(cp->cp_conn)) { +		release_in_xmit(cp); +		ret = -ENETUNREACH; /* dont requeue send work */ +		goto out; +	} +  	/*  	 * we record the send generation after doing the xmit acquire.  	 * if someone else manages to jump in and do some work, we'll use @@ -437,7 +443,12 @@ over_batch:  		    !list_empty(&cp->cp_send_queue)) && !raced) {  			if (batch_count < send_batch_count)  				goto restart; -			queue_delayed_work(rds_wq, &cp->cp_send_w, 1); +			rcu_read_lock(); +			if (rds_destroy_pending(cp->cp_conn)) +				ret = -ENETUNREACH; +			else +				queue_delayed_work(rds_wq, &cp->cp_send_w, 1); +			rcu_read_unlock();  		} else if (raced) {  			rds_stats_inc(s_send_lock_queue_raced);  		} @@ -1151,6 +1162,11 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)  	else  		cpath = &conn->c_path[0]; +	if (rds_destroy_pending(conn)) { +		ret = -EAGAIN; +		goto out; +	} +  	rds_conn_path_connect_if_down(cpath);  	ret = rds_cong_wait(conn->c_fcong, dport, nonblock, rs); @@ -1190,9 +1206,17 @@ int rds_sendmsg(struct socket *sock, struct msghdr *msg, size_t payload_len)  	rds_stats_inc(s_send_queued);  	ret = rds_send_xmit(cpath); -	if (ret == -ENOMEM || ret == -EAGAIN) -		queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); - +	if (ret == -ENOMEM || ret == -EAGAIN) { +		ret = 0; +		rcu_read_lock(); +		if (rds_destroy_pending(cpath->cp_conn)) +			ret = -ENETUNREACH; +		else +			queue_delayed_work(rds_wq, &cpath->cp_send_w, 1); +		rcu_read_unlock(); +	} +	if (ret) +		goto out;  	rds_message_put(rm);  	return payload_len; @@ -1270,7 +1294,10 @@ rds_send_probe(struct rds_conn_path *cp, __be16 sport,  	rds_stats_inc(s_send_pong);  	/* schedule the send work on rds_wq */ -	queue_delayed_work(rds_wq, &cp->cp_send_w, 1); +	rcu_read_lock(); +	if (!rds_destroy_pending(cp->cp_conn)) +		queue_delayed_work(rds_wq, &cp->cp_send_w, 1); +	rcu_read_unlock();  	rds_message_put(rm);  	return 0; diff --git a/net/rds/tcp.c b/net/rds/tcp.c index ab7356e0ba83..44c4652721af 100644 --- a/net/rds/tcp.c +++ b/net/rds/tcp.c @@ -49,6 +49,7 @@ static unsigned int rds_tcp_tc_count;  /* Track rds_tcp_connection structs so they can be cleaned up */  static DEFINE_SPINLOCK(rds_tcp_conn_lock);  static LIST_HEAD(rds_tcp_conn_list); +static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);  static struct kmem_cache *rds_tcp_conn_slab; @@ -271,16 +272,32 @@ static int rds_tcp_laddr_check(struct net *net, __be32 addr)  	return -EADDRNOTAVAIL;  } +static void rds_tcp_conn_free(void *arg) +{ +	struct rds_tcp_connection *tc = arg; + +	rdsdebug("freeing tc %p\n", tc); + +	spin_lock_bh(&rds_tcp_conn_lock); +	if (!tc->t_tcp_node_detached) +		list_del(&tc->t_tcp_node); +	spin_unlock_bh(&rds_tcp_conn_lock); + +	kmem_cache_free(rds_tcp_conn_slab, tc); +} +  static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)  {  	struct rds_tcp_connection *tc; -	int i; +	int i, j; +	int ret = 0;  	for (i = 0; i < RDS_MPATH_WORKERS; i++) {  		tc = kmem_cache_alloc(rds_tcp_conn_slab, gfp); -		if (!tc) -			return -ENOMEM; - +		if (!tc) { +			ret = -ENOMEM; +			goto fail; +		}  		mutex_init(&tc->t_conn_path_lock);  		tc->t_sock = NULL;  		tc->t_tinc = NULL; @@ -289,28 +306,24 @@ static int rds_tcp_conn_alloc(struct rds_connection *conn, gfp_t gfp)  		conn->c_path[i].cp_transport_data = tc;  		tc->t_cpath = &conn->c_path[i]; +		tc->t_tcp_node_detached = true; -		spin_lock_irq(&rds_tcp_conn_lock); -		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); -		spin_unlock_irq(&rds_tcp_conn_lock);  		rdsdebug("rds_conn_path [%d] tc %p\n", i,  			 conn->c_path[i].cp_transport_data);  	} - -	return 0; -} - -static void rds_tcp_conn_free(void *arg) -{ -	struct rds_tcp_connection *tc = arg; -	unsigned long flags; -	rdsdebug("freeing tc %p\n", tc); - -	spin_lock_irqsave(&rds_tcp_conn_lock, flags); -	list_del(&tc->t_tcp_node); -	spin_unlock_irqrestore(&rds_tcp_conn_lock, flags); - -	kmem_cache_free(rds_tcp_conn_slab, tc); +	spin_lock_bh(&rds_tcp_conn_lock); +	for (i = 0; i < RDS_MPATH_WORKERS; i++) { +		tc = conn->c_path[i].cp_transport_data; +		tc->t_tcp_node_detached = false; +		list_add_tail(&tc->t_tcp_node, &rds_tcp_conn_list); +	} +	spin_unlock_bh(&rds_tcp_conn_lock); +fail: +	if (ret) { +		for (j = 0; j < i; j++) +			rds_tcp_conn_free(conn->c_path[j].cp_transport_data); +	} +	return ret;  }  static bool list_has_conn(struct list_head *list, struct rds_connection *conn) @@ -324,6 +337,16 @@ static bool list_has_conn(struct list_head *list, struct rds_connection *conn)  	return false;  } +static void rds_tcp_set_unloading(void) +{ +	atomic_set(&rds_tcp_unloading, 1); +} + +static bool rds_tcp_is_unloading(struct rds_connection *conn) +{ +	return atomic_read(&rds_tcp_unloading) != 0; +} +  static void rds_tcp_destroy_conns(void)  {  	struct rds_tcp_connection *tc, *_tc; @@ -362,6 +385,7 @@ struct rds_transport rds_tcp_transport = {  	.t_type			= RDS_TRANS_TCP,  	.t_prefer_loopback	= 1,  	.t_mp_capable		= 1, +	.t_unloading		= rds_tcp_is_unloading,  };  static unsigned int rds_tcp_netid; @@ -496,27 +520,6 @@ static struct pernet_operations rds_tcp_net_ops = {  	.size = sizeof(struct rds_tcp_net),  }; -/* explicitly send a RST on each socket, thereby releasing any socket refcnts - * that may otherwise hold up netns deletion. - */ -static void rds_tcp_conn_paths_destroy(struct rds_connection *conn) -{ -	struct rds_conn_path *cp; -	struct rds_tcp_connection *tc; -	int i; -	struct sock *sk; - -	for (i = 0; i < RDS_MPATH_WORKERS; i++) { -		cp = &conn->c_path[i]; -		tc = cp->cp_transport_data; -		if (!tc->t_sock) -			continue; -		sk = tc->t_sock->sk; -		sk->sk_prot->disconnect(sk, 0); -		tcp_done(sk); -	} -} -  static void rds_tcp_kill_sock(struct net *net)  {  	struct rds_tcp_connection *tc, *_tc; @@ -526,20 +529,22 @@ static void rds_tcp_kill_sock(struct net *net)  	rtn->rds_tcp_listen_sock = NULL;  	rds_tcp_listen_stop(lsock, &rtn->rds_tcp_accept_w); -	spin_lock_irq(&rds_tcp_conn_lock); +	spin_lock_bh(&rds_tcp_conn_lock);  	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { -		struct net *c_net = tc->t_cpath->cp_conn->c_net; +		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);  		if (net != c_net || !tc->t_sock)  			continue; -		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) +		if (!list_has_conn(&tmp_list, tc->t_cpath->cp_conn)) {  			list_move_tail(&tc->t_tcp_node, &tmp_list); +		} else { +			list_del(&tc->t_tcp_node); +			tc->t_tcp_node_detached = true; +		}  	} -	spin_unlock_irq(&rds_tcp_conn_lock); -	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node) { -		rds_tcp_conn_paths_destroy(tc->t_cpath->cp_conn); +	spin_unlock_bh(&rds_tcp_conn_lock); +	list_for_each_entry_safe(tc, _tc, &tmp_list, t_tcp_node)  		rds_conn_destroy(tc->t_cpath->cp_conn); -	}  }  void *rds_tcp_listen_sock_def_readable(struct net *net) @@ -585,9 +590,9 @@ static void rds_tcp_sysctl_reset(struct net *net)  {  	struct rds_tcp_connection *tc, *_tc; -	spin_lock_irq(&rds_tcp_conn_lock); +	spin_lock_bh(&rds_tcp_conn_lock);  	list_for_each_entry_safe(tc, _tc, &rds_tcp_conn_list, t_tcp_node) { -		struct net *c_net = tc->t_cpath->cp_conn->c_net; +		struct net *c_net = read_pnet(&tc->t_cpath->cp_conn->c_net);  		if (net != c_net || !tc->t_sock)  			continue; @@ -595,7 +600,7 @@ static void rds_tcp_sysctl_reset(struct net *net)  		/* reconnect with new parameters */  		rds_conn_path_drop(tc->t_cpath, false);  	} -	spin_unlock_irq(&rds_tcp_conn_lock); +	spin_unlock_bh(&rds_tcp_conn_lock);  }  static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write, @@ -618,6 +623,8 @@ static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,  static void rds_tcp_exit(void)  { +	rds_tcp_set_unloading(); +	synchronize_rcu();  	rds_info_deregister_func(RDS_INFO_TCP_SOCKETS, rds_tcp_tc_info);  	unregister_pernet_subsys(&rds_tcp_net_ops);  	if (unregister_netdevice_notifier(&rds_tcp_dev_notifier)) diff --git a/net/rds/tcp.h b/net/rds/tcp.h index 864ca7d8f019..c6fa080e9b6d 100644 --- a/net/rds/tcp.h +++ b/net/rds/tcp.h @@ -12,6 +12,7 @@ struct rds_tcp_incoming {  struct rds_tcp_connection {  	struct list_head	t_tcp_node; +	bool			t_tcp_node_detached;  	struct rds_conn_path	*t_cpath;  	/* t_conn_path_lock synchronizes the connection establishment between  	 * rds_tcp_accept_one and rds_tcp_conn_path_connect diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c index 46f74dad0e16..d999e7075645 100644 --- a/net/rds/tcp_connect.c +++ b/net/rds/tcp_connect.c @@ -170,7 +170,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)  		 cp->cp_conn, tc, sock);  	if (sock) { -		if (cp->cp_conn->c_destroy_in_prog) +		if (rds_destroy_pending(cp->cp_conn))  			rds_tcp_set_linger(sock);  		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);  		lock_sock(sock->sk); diff --git a/net/rds/tcp_recv.c b/net/rds/tcp_recv.c index e006ef8e6d40..b9fbd2ee74ef 100644 --- a/net/rds/tcp_recv.c +++ b/net/rds/tcp_recv.c @@ -321,8 +321,12 @@ void rds_tcp_data_ready(struct sock *sk)  	ready = tc->t_orig_data_ready;  	rds_tcp_stats_inc(s_tcp_data_ready_calls); -	if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) -		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); +	if (rds_tcp_read_sock(cp, GFP_ATOMIC) == -ENOMEM) { +		rcu_read_lock(); +		if (!rds_destroy_pending(cp->cp_conn)) +			queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); +		rcu_read_unlock(); +	}  out:  	read_unlock_bh(&sk->sk_callback_lock);  	ready(sk); diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c index 9b76e0fa1722..7df869d37afd 100644 --- a/net/rds/tcp_send.c +++ b/net/rds/tcp_send.c @@ -202,8 +202,11 @@ void rds_tcp_write_space(struct sock *sk)  	tc->t_last_seen_una = rds_tcp_snd_una(tc);  	rds_send_path_drop_acked(cp, rds_tcp_snd_una(tc), rds_tcp_is_acked); -	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf) +	rcu_read_lock(); +	if ((refcount_read(&sk->sk_wmem_alloc) << 1) <= sk->sk_sndbuf && +	    !rds_destroy_pending(cp->cp_conn))  		queue_delayed_work(rds_wq, &cp->cp_send_w, 0); +	rcu_read_unlock();  out:  	read_unlock_bh(&sk->sk_callback_lock); diff --git a/net/rds/threads.c b/net/rds/threads.c index f121daa402c8..c52861d77a59 100644 --- a/net/rds/threads.c +++ b/net/rds/threads.c @@ -87,8 +87,12 @@ void rds_connect_path_complete(struct rds_conn_path *cp, int curr)  	cp->cp_reconnect_jiffies = 0;  	set_bit(0, &cp->cp_conn->c_map_queued); -	queue_delayed_work(rds_wq, &cp->cp_send_w, 0); -	queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); +	rcu_read_lock(); +	if (!rds_destroy_pending(cp->cp_conn)) { +		queue_delayed_work(rds_wq, &cp->cp_send_w, 0); +		queue_delayed_work(rds_wq, &cp->cp_recv_w, 0); +	} +	rcu_read_unlock();  }  EXPORT_SYMBOL_GPL(rds_connect_path_complete); @@ -133,7 +137,10 @@ void rds_queue_reconnect(struct rds_conn_path *cp)  	set_bit(RDS_RECONNECT_PENDING, &cp->cp_flags);  	if (cp->cp_reconnect_jiffies == 0) {  		cp->cp_reconnect_jiffies = rds_sysctl_reconnect_min_jiffies; -		queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); +		rcu_read_lock(); +		if (!rds_destroy_pending(cp->cp_conn)) +			queue_delayed_work(rds_wq, &cp->cp_conn_w, 0); +		rcu_read_unlock();  		return;  	} @@ -141,8 +148,11 @@ void rds_queue_reconnect(struct rds_conn_path *cp)  	rdsdebug("%lu delay %lu ceil conn %p for %pI4 -> %pI4\n",  		 rand % cp->cp_reconnect_jiffies, cp->cp_reconnect_jiffies,  		 conn, &conn->c_laddr, &conn->c_faddr); -	queue_delayed_work(rds_wq, &cp->cp_conn_w, -			   rand % cp->cp_reconnect_jiffies); +	rcu_read_lock(); +	if (!rds_destroy_pending(cp->cp_conn)) +		queue_delayed_work(rds_wq, &cp->cp_conn_w, +				   rand % cp->cp_reconnect_jiffies); +	rcu_read_unlock();  	cp->cp_reconnect_jiffies = min(cp->cp_reconnect_jiffies * 2,  					rds_sysctl_reconnect_max_jiffies); |