diff options
Diffstat (limited to 'net/unix/garbage.c')
| -rw-r--r-- | net/unix/garbage.c | 601 | 
1 files changed, 419 insertions, 182 deletions
diff --git a/net/unix/garbage.c b/net/unix/garbage.c index fa39b6265238..dfe94a90ece4 100644 --- a/net/unix/garbage.c +++ b/net/unix/garbage.c @@ -101,261 +101,498 @@ struct unix_sock *unix_get_socket(struct file *filp)  	return NULL;  } -DEFINE_SPINLOCK(unix_gc_lock); +static struct unix_vertex *unix_edge_successor(struct unix_edge *edge) +{ +	/* If an embryo socket has a fd, +	 * the listener indirectly holds the fd's refcnt. +	 */ +	if (edge->successor->listener) +		return unix_sk(edge->successor->listener)->vertex; + +	return edge->successor->vertex; +} + +static bool unix_graph_maybe_cyclic; +static bool unix_graph_grouped; + +static void unix_update_graph(struct unix_vertex *vertex) +{ +	/* If the receiver socket is not inflight, no cyclic +	 * reference could be formed. +	 */ +	if (!vertex) +		return; + +	unix_graph_maybe_cyclic = true; +	unix_graph_grouped = false; +} + +static LIST_HEAD(unix_unvisited_vertices); + +enum unix_vertex_index { +	UNIX_VERTEX_INDEX_MARK1, +	UNIX_VERTEX_INDEX_MARK2, +	UNIX_VERTEX_INDEX_START, +}; + +static unsigned long unix_vertex_unvisited_index = UNIX_VERTEX_INDEX_MARK1; + +static void unix_add_edge(struct scm_fp_list *fpl, struct unix_edge *edge) +{ +	struct unix_vertex *vertex = edge->predecessor->vertex; + +	if (!vertex) { +		vertex = list_first_entry(&fpl->vertices, typeof(*vertex), entry); +		vertex->index = unix_vertex_unvisited_index; +		vertex->out_degree = 0; +		INIT_LIST_HEAD(&vertex->edges); +		INIT_LIST_HEAD(&vertex->scc_entry); + +		list_move_tail(&vertex->entry, &unix_unvisited_vertices); +		edge->predecessor->vertex = vertex; +	} + +	vertex->out_degree++; +	list_add_tail(&edge->vertex_entry, &vertex->edges); + +	unix_update_graph(unix_edge_successor(edge)); +} + +static void unix_del_edge(struct scm_fp_list *fpl, struct unix_edge *edge) +{ +	struct unix_vertex *vertex = edge->predecessor->vertex; + +	if (!fpl->dead) +		unix_update_graph(unix_edge_successor(edge)); + +	list_del(&edge->vertex_entry); +	vertex->out_degree--; + +	if (!vertex->out_degree) { +		edge->predecessor->vertex = NULL; +		list_move_tail(&vertex->entry, &fpl->vertices); +	} +} + +static void unix_free_vertices(struct scm_fp_list *fpl) +{ +	struct unix_vertex *vertex, *next_vertex; + +	list_for_each_entry_safe(vertex, next_vertex, &fpl->vertices, entry) { +		list_del(&vertex->entry); +		kfree(vertex); +	} +} + +static DEFINE_SPINLOCK(unix_gc_lock);  unsigned int unix_tot_inflight; -static LIST_HEAD(gc_candidates); -static LIST_HEAD(gc_inflight_list); -/* Keep the number of times in flight count for the file - * descriptor if it is for an AF_UNIX socket. - */ -void unix_inflight(struct user_struct *user, struct file *filp) +void unix_add_edges(struct scm_fp_list *fpl, struct unix_sock *receiver)  { -	struct unix_sock *u = unix_get_socket(filp); +	int i = 0, j = 0;  	spin_lock(&unix_gc_lock); -	if (u) { -		if (!u->inflight) { -			WARN_ON_ONCE(!list_empty(&u->link)); -			list_add_tail(&u->link, &gc_inflight_list); -		} else { -			WARN_ON_ONCE(list_empty(&u->link)); -		} -		u->inflight++; +	if (!fpl->count_unix) +		goto out; -		/* Paired with READ_ONCE() in wait_for_unix_gc() */ -		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + 1); -	} +	do { +		struct unix_sock *inflight = unix_get_socket(fpl->fp[j++]); +		struct unix_edge *edge; + +		if (!inflight) +			continue; -	WRITE_ONCE(user->unix_inflight, user->unix_inflight + 1); +		edge = fpl->edges + i++; +		edge->predecessor = inflight; +		edge->successor = receiver; + +		unix_add_edge(fpl, edge); +	} while (i < fpl->count_unix); + +	receiver->scm_stat.nr_unix_fds += fpl->count_unix; +	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight + fpl->count_unix); +out: +	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight + fpl->count);  	spin_unlock(&unix_gc_lock); + +	fpl->inflight = true; + +	unix_free_vertices(fpl);  } -void unix_notinflight(struct user_struct *user, struct file *filp) +void unix_del_edges(struct scm_fp_list *fpl)  { -	struct unix_sock *u = unix_get_socket(filp); +	struct unix_sock *receiver; +	int i = 0;  	spin_lock(&unix_gc_lock); -	if (u) { -		WARN_ON_ONCE(!u->inflight); -		WARN_ON_ONCE(list_empty(&u->link)); +	if (!fpl->count_unix) +		goto out; -		u->inflight--; -		if (!u->inflight) -			list_del_init(&u->link); +	do { +		struct unix_edge *edge = fpl->edges + i++; -		/* Paired with READ_ONCE() in wait_for_unix_gc() */ -		WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - 1); -	} +		unix_del_edge(fpl, edge); +	} while (i < fpl->count_unix); -	WRITE_ONCE(user->unix_inflight, user->unix_inflight - 1); +	if (!fpl->dead) { +		receiver = fpl->edges[0].successor; +		receiver->scm_stat.nr_unix_fds -= fpl->count_unix; +	} +	WRITE_ONCE(unix_tot_inflight, unix_tot_inflight - fpl->count_unix); +out: +	WRITE_ONCE(fpl->user->unix_inflight, fpl->user->unix_inflight - fpl->count);  	spin_unlock(&unix_gc_lock); + +	fpl->inflight = false;  } -static void scan_inflight(struct sock *x, void (*func)(struct unix_sock *), -			  struct sk_buff_head *hitlist) +void unix_update_edges(struct unix_sock *receiver)  { -	struct sk_buff *skb; -	struct sk_buff *next; - -	spin_lock(&x->sk_receive_queue.lock); -	skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { -		/* Do we have file descriptors ? */ -		if (UNIXCB(skb).fp) { -			bool hit = false; -			/* Process the descriptors of this socket */ -			int nfd = UNIXCB(skb).fp->count; -			struct file **fp = UNIXCB(skb).fp->fp; - -			while (nfd--) { -				/* Get the socket the fd matches if it indeed does so */ -				struct unix_sock *u = unix_get_socket(*fp++); - -				/* Ignore non-candidates, they could have been added -				 * to the queues after starting the garbage collection -				 */ -				if (u && test_bit(UNIX_GC_CANDIDATE, &u->gc_flags)) { -					hit = true; - -					func(u); -				} -			} -			if (hit && hitlist != NULL) { -				__skb_unlink(skb, &x->sk_receive_queue); -				__skb_queue_tail(hitlist, skb); -			} -		} +	/* nr_unix_fds is only updated under unix_state_lock(). +	 * If it's 0 here, the embryo socket is not part of the +	 * inflight graph, and GC will not see it, so no lock needed. +	 */ +	if (!receiver->scm_stat.nr_unix_fds) { +		receiver->listener = NULL; +	} else { +		spin_lock(&unix_gc_lock); +		unix_update_graph(unix_sk(receiver->listener)->vertex); +		receiver->listener = NULL; +		spin_unlock(&unix_gc_lock);  	} -	spin_unlock(&x->sk_receive_queue.lock);  } -static void scan_children(struct sock *x, void (*func)(struct unix_sock *), -			  struct sk_buff_head *hitlist) +int unix_prepare_fpl(struct scm_fp_list *fpl)  { -	if (x->sk_state != TCP_LISTEN) { -		scan_inflight(x, func, hitlist); -	} else { -		struct sk_buff *skb; -		struct sk_buff *next; -		struct unix_sock *u; -		LIST_HEAD(embryos); +	struct unix_vertex *vertex; +	int i; -		/* For a listening socket collect the queued embryos -		 * and perform a scan on them as well. -		 */ -		spin_lock(&x->sk_receive_queue.lock); -		skb_queue_walk_safe(&x->sk_receive_queue, skb, next) { -			u = unix_sk(skb->sk); +	if (!fpl->count_unix) +		return 0; -			/* An embryo cannot be in-flight, so it's safe -			 * to use the list link. -			 */ -			WARN_ON_ONCE(!list_empty(&u->link)); -			list_add_tail(&u->link, &embryos); -		} -		spin_unlock(&x->sk_receive_queue.lock); +	for (i = 0; i < fpl->count_unix; i++) { +		vertex = kmalloc(sizeof(*vertex), GFP_KERNEL); +		if (!vertex) +			goto err; -		while (!list_empty(&embryos)) { -			u = list_entry(embryos.next, struct unix_sock, link); -			scan_inflight(&u->sk, func, hitlist); -			list_del_init(&u->link); -		} +		list_add(&vertex->entry, &fpl->vertices);  	} + +	fpl->edges = kvmalloc_array(fpl->count_unix, sizeof(*fpl->edges), +				    GFP_KERNEL_ACCOUNT); +	if (!fpl->edges) +		goto err; + +	return 0; + +err: +	unix_free_vertices(fpl); +	return -ENOMEM;  } -static void dec_inflight(struct unix_sock *usk) +void unix_destroy_fpl(struct scm_fp_list *fpl)  { -	usk->inflight--; +	if (fpl->inflight) +		unix_del_edges(fpl); + +	kvfree(fpl->edges); +	unix_free_vertices(fpl);  } -static void inc_inflight(struct unix_sock *usk) +static bool unix_vertex_dead(struct unix_vertex *vertex)  { -	usk->inflight++; +	struct unix_edge *edge; +	struct unix_sock *u; +	long total_ref; + +	list_for_each_entry(edge, &vertex->edges, vertex_entry) { +		struct unix_vertex *next_vertex = unix_edge_successor(edge); + +		/* The vertex's fd can be received by a non-inflight socket. */ +		if (!next_vertex) +			return false; + +		/* The vertex's fd can be received by an inflight socket in +		 * another SCC. +		 */ +		if (next_vertex->scc_index != vertex->scc_index) +			return false; +	} + +	/* No receiver exists out of the same SCC. */ + +	edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry); +	u = edge->predecessor; +	total_ref = file_count(u->sk.sk_socket->file); + +	/* If not close()d, total_ref > out_degree. */ +	if (total_ref != vertex->out_degree) +		return false; + +	return true;  } -static void inc_inflight_move_tail(struct unix_sock *u) +enum unix_recv_queue_lock_class { +	U_RECVQ_LOCK_NORMAL, +	U_RECVQ_LOCK_EMBRYO, +}; + +static void unix_collect_queue(struct unix_sock *u, struct sk_buff_head *hitlist)  { -	u->inflight++; +	skb_queue_splice_init(&u->sk.sk_receive_queue, hitlist); -	/* If this still might be part of a cycle, move it to the end -	 * of the list, so that it's checked even if it was already -	 * passed over -	 */ -	if (test_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags)) -		list_move_tail(&u->link, &gc_candidates); +#if IS_ENABLED(CONFIG_AF_UNIX_OOB) +	if (u->oob_skb) { +		WARN_ON_ONCE(skb_unref(u->oob_skb)); +		u->oob_skb = NULL; +	} +#endif  } -static bool gc_in_progress; - -static void __unix_gc(struct work_struct *work) +static void unix_collect_skb(struct list_head *scc, struct sk_buff_head *hitlist)  { -	struct sk_buff_head hitlist; -	struct unix_sock *u, *next; -	LIST_HEAD(not_cycle_list); -	struct list_head cursor; +	struct unix_vertex *vertex; -	spin_lock(&unix_gc_lock); +	list_for_each_entry_reverse(vertex, scc, scc_entry) { +		struct sk_buff_head *queue; +		struct unix_edge *edge; +		struct unix_sock *u; -	/* First, select candidates for garbage collection.  Only -	 * in-flight sockets are considered, and from those only ones -	 * which don't have any external reference. -	 * -	 * Holding unix_gc_lock will protect these candidates from -	 * being detached, and hence from gaining an external -	 * reference.  Since there are no possible receivers, all -	 * buffers currently on the candidates' queues stay there -	 * during the garbage collection. -	 * -	 * We also know that no new candidate can be added onto the -	 * receive queues.  Other, non candidate sockets _can_ be -	 * added to queue, so we must make sure only to touch -	 * candidates. -	 */ -	list_for_each_entry_safe(u, next, &gc_inflight_list, link) { -		long total_refs; +		edge = list_first_entry(&vertex->edges, typeof(*edge), vertex_entry); +		u = edge->predecessor; +		queue = &u->sk.sk_receive_queue; + +		spin_lock(&queue->lock); + +		if (u->sk.sk_state == TCP_LISTEN) { +			struct sk_buff *skb; -		total_refs = file_count(u->sk.sk_socket->file); +			skb_queue_walk(queue, skb) { +				struct sk_buff_head *embryo_queue = &skb->sk->sk_receive_queue; -		WARN_ON_ONCE(!u->inflight); -		WARN_ON_ONCE(total_refs < u->inflight); -		if (total_refs == u->inflight) { -			list_move_tail(&u->link, &gc_candidates); -			__set_bit(UNIX_GC_CANDIDATE, &u->gc_flags); -			__set_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); +				/* listener -> embryo order, the inversion never happens. */ +				spin_lock_nested(&embryo_queue->lock, U_RECVQ_LOCK_EMBRYO); +				unix_collect_queue(unix_sk(skb->sk), hitlist); +				spin_unlock(&embryo_queue->lock); +			} +		} else { +			unix_collect_queue(u, hitlist);  		} + +		spin_unlock(&queue->lock);  	} +} -	/* Now remove all internal in-flight reference to children of -	 * the candidates. -	 */ -	list_for_each_entry(u, &gc_candidates, link) -		scan_children(&u->sk, dec_inflight, NULL); +static bool unix_scc_cyclic(struct list_head *scc) +{ +	struct unix_vertex *vertex; +	struct unix_edge *edge; -	/* Restore the references for children of all candidates, -	 * which have remaining references.  Do this recursively, so -	 * only those remain, which form cyclic references. -	 * -	 * Use a "cursor" link, to make the list traversal safe, even -	 * though elements might be moved about. +	/* SCC containing multiple vertices ? */ +	if (!list_is_singular(scc)) +		return true; + +	vertex = list_first_entry(scc, typeof(*vertex), scc_entry); + +	/* Self-reference or a embryo-listener circle ? */ +	list_for_each_entry(edge, &vertex->edges, vertex_entry) { +		if (unix_edge_successor(edge) == vertex) +			return true; +	} + +	return false; +} + +static LIST_HEAD(unix_visited_vertices); +static unsigned long unix_vertex_grouped_index = UNIX_VERTEX_INDEX_MARK2; + +static void __unix_walk_scc(struct unix_vertex *vertex, unsigned long *last_index, +			    struct sk_buff_head *hitlist) +{ +	LIST_HEAD(vertex_stack); +	struct unix_edge *edge; +	LIST_HEAD(edge_stack); + +next_vertex: +	/* Push vertex to vertex_stack and mark it as on-stack +	 * (index >= UNIX_VERTEX_INDEX_START). +	 * The vertex will be popped when finalising SCC later.  	 */ -	list_add(&cursor, &gc_candidates); -	while (cursor.next != &gc_candidates) { -		u = list_entry(cursor.next, struct unix_sock, link); +	list_add(&vertex->scc_entry, &vertex_stack); + +	vertex->index = *last_index; +	vertex->scc_index = *last_index; +	(*last_index)++; + +	/* Explore neighbour vertices (receivers of the current vertex's fd). */ +	list_for_each_entry(edge, &vertex->edges, vertex_entry) { +		struct unix_vertex *next_vertex = unix_edge_successor(edge); + +		if (!next_vertex) +			continue; + +		if (next_vertex->index == unix_vertex_unvisited_index) { +			/* Iterative deepening depth first search +			 * +			 *   1. Push a forward edge to edge_stack and set +			 *      the successor to vertex for the next iteration. +			 */ +			list_add(&edge->stack_entry, &edge_stack); + +			vertex = next_vertex; +			goto next_vertex; + +			/*   2. Pop the edge directed to the current vertex +			 *      and restore the ancestor for backtracking. +			 */ +prev_vertex: +			edge = list_first_entry(&edge_stack, typeof(*edge), stack_entry); +			list_del_init(&edge->stack_entry); -		/* Move cursor to after the current position. */ -		list_move(&cursor, &u->link); +			next_vertex = vertex; +			vertex = edge->predecessor->vertex; -		if (u->inflight) { -			list_move_tail(&u->link, ¬_cycle_list); -			__clear_bit(UNIX_GC_MAYBE_CYCLE, &u->gc_flags); -			scan_children(&u->sk, inc_inflight_move_tail, NULL); +			/* If the successor has a smaller scc_index, two vertices +			 * are in the same SCC, so propagate the smaller scc_index +			 * to skip SCC finalisation. +			 */ +			vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index); +		} else if (next_vertex->index != unix_vertex_grouped_index) { +			/* Loop detected by a back/cross edge. +			 * +			 * The successor is on vertex_stack, so two vertices are in +			 * the same SCC.  If the successor has a smaller *scc_index*, +			 * propagate it to skip SCC finalisation. +			 */ +			vertex->scc_index = min(vertex->scc_index, next_vertex->scc_index); +		} else { +			/* The successor was already grouped as another SCC */  		}  	} -	list_del(&cursor); -	/* Now gc_candidates contains only garbage.  Restore original -	 * inflight counters for these as well, and remove the skbuffs -	 * which are creating the cycle(s). -	 */ -	skb_queue_head_init(&hitlist); -	list_for_each_entry(u, &gc_candidates, link) { -		scan_children(&u->sk, inc_inflight, &hitlist); +	if (vertex->index == vertex->scc_index) { +		struct list_head scc; +		bool scc_dead = true; -#if IS_ENABLED(CONFIG_AF_UNIX_OOB) -		if (u->oob_skb) { -			kfree_skb(u->oob_skb); -			u->oob_skb = NULL; +		/* SCC finalised. +		 * +		 * If the scc_index was not updated, all the vertices above on +		 * vertex_stack are in the same SCC.  Group them using scc_entry. +		 */ +		__list_cut_position(&scc, &vertex_stack, &vertex->scc_entry); + +		list_for_each_entry_reverse(vertex, &scc, scc_entry) { +			/* Don't restart DFS from this vertex in unix_walk_scc(). */ +			list_move_tail(&vertex->entry, &unix_visited_vertices); + +			/* Mark vertex as off-stack. */ +			vertex->index = unix_vertex_grouped_index; + +			if (scc_dead) +				scc_dead = unix_vertex_dead(vertex);  		} -#endif + +		if (scc_dead) +			unix_collect_skb(&scc, hitlist); +		else if (!unix_graph_maybe_cyclic) +			unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + +		list_del(&scc);  	} -	/* not_cycle_list contains those sockets which do not make up a -	 * cycle.  Restore these to the inflight list. +	/* Need backtracking ? */ +	if (!list_empty(&edge_stack)) +		goto prev_vertex; +} + +static void unix_walk_scc(struct sk_buff_head *hitlist) +{ +	unsigned long last_index = UNIX_VERTEX_INDEX_START; + +	unix_graph_maybe_cyclic = false; + +	/* Visit every vertex exactly once. +	 * __unix_walk_scc() moves visited vertices to unix_visited_vertices.  	 */ -	while (!list_empty(¬_cycle_list)) { -		u = list_entry(not_cycle_list.next, struct unix_sock, link); -		__clear_bit(UNIX_GC_CANDIDATE, &u->gc_flags); -		list_move_tail(&u->link, &gc_inflight_list); +	while (!list_empty(&unix_unvisited_vertices)) { +		struct unix_vertex *vertex; + +		vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); +		__unix_walk_scc(vertex, &last_index, hitlist);  	} -	spin_unlock(&unix_gc_lock); +	list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); +	swap(unix_vertex_unvisited_index, unix_vertex_grouped_index); -	/* Here we are. Hitlist is filled. Die. */ -	__skb_queue_purge(&hitlist); +	unix_graph_grouped = true; +} + +static void unix_walk_scc_fast(struct sk_buff_head *hitlist) +{ +	unix_graph_maybe_cyclic = false; + +	while (!list_empty(&unix_unvisited_vertices)) { +		struct unix_vertex *vertex; +		struct list_head scc; +		bool scc_dead = true; + +		vertex = list_first_entry(&unix_unvisited_vertices, typeof(*vertex), entry); +		list_add(&scc, &vertex->scc_entry); + +		list_for_each_entry_reverse(vertex, &scc, scc_entry) { +			list_move_tail(&vertex->entry, &unix_visited_vertices); + +			if (scc_dead) +				scc_dead = unix_vertex_dead(vertex); +		} + +		if (scc_dead) +			unix_collect_skb(&scc, hitlist); +		else if (!unix_graph_maybe_cyclic) +			unix_graph_maybe_cyclic = unix_scc_cyclic(&scc); + +		list_del(&scc); +	} + +	list_replace_init(&unix_visited_vertices, &unix_unvisited_vertices); +} + +static bool gc_in_progress; + +static void __unix_gc(struct work_struct *work) +{ +	struct sk_buff_head hitlist; +	struct sk_buff *skb;  	spin_lock(&unix_gc_lock); -	/* All candidates should have been detached by now. */ -	WARN_ON_ONCE(!list_empty(&gc_candidates)); +	if (!unix_graph_maybe_cyclic) { +		spin_unlock(&unix_gc_lock); +		goto skip_gc; +	} -	/* Paired with READ_ONCE() in wait_for_unix_gc(). */ -	WRITE_ONCE(gc_in_progress, false); +	__skb_queue_head_init(&hitlist); + +	if (unix_graph_grouped) +		unix_walk_scc_fast(&hitlist); +	else +		unix_walk_scc(&hitlist);  	spin_unlock(&unix_gc_lock); + +	skb_queue_walk(&hitlist, skb) { +		if (UNIXCB(skb).fp) +			UNIXCB(skb).fp->dead = true; +	} + +	__skb_queue_purge(&hitlist); +skip_gc: +	WRITE_ONCE(gc_in_progress, false);  }  static DECLARE_WORK(unix_gc_work, __unix_gc);  |