diff options
Diffstat (limited to 'net/unix/af_unix.c')
-rw-r--r-- | net/unix/af_unix.c | 182 |
1 files changed, 107 insertions, 75 deletions
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 0748e7ea5210..e4af6616e1df 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -118,8 +118,6 @@ #include <linux/btf_ids.h> #include <linux/bpf-cgroup.h> -#include "scm.h" - static atomic_long_t unix_nr_socks; static struct hlist_head bsd_socket_buckets[UNIX_HASH_SIZE / 2]; static spinlock_t bsd_socket_locks[UNIX_HASH_SIZE / 2]; @@ -548,7 +546,7 @@ static void unix_write_space(struct sock *sk) if (skwq_has_sleeper(wq)) wake_up_interruptible_sync_poll(&wq->wait, EPOLLOUT | EPOLLWRNORM | EPOLLWRBAND); - sk_wake_async(sk, SOCK_WAKE_SPACE, POLL_OUT); + sk_wake_async_rcu(sk, SOCK_WAKE_SPACE, POLL_OUT); } rcu_read_unlock(); } @@ -757,7 +755,7 @@ static int unix_bind(struct socket *, struct sockaddr *, int); static int unix_stream_connect(struct socket *, struct sockaddr *, int addr_len, int flags); static int unix_socketpair(struct socket *, struct socket *); -static int unix_accept(struct socket *, struct socket *, int, bool); +static int unix_accept(struct socket *, struct socket *, struct proto_accept_arg *arg); static int unix_getname(struct socket *, struct sockaddr *, int); static __poll_t unix_poll(struct file *, struct socket *, poll_table *); static __poll_t unix_dgram_poll(struct file *, struct socket *, @@ -980,12 +978,12 @@ static struct sock *unix_create1(struct net *net, struct socket *sock, int kern, sk->sk_write_space = unix_write_space; sk->sk_max_ack_backlog = net->unx.sysctl_max_dgram_qlen; sk->sk_destruct = unix_sock_destructor; - u = unix_sk(sk); + u = unix_sk(sk); + u->listener = NULL; + u->vertex = NULL; u->path.dentry = NULL; u->path.mnt = NULL; spin_lock_init(&u->lock); - atomic_long_set(&u->inflight, 0); - INIT_LIST_HEAD(&u->link); mutex_init(&u->iolock); /* single task reading lock */ mutex_init(&u->bindlock); /* single task binding lock */ init_waitqueue_head(&u->peer_wait); @@ -1599,6 +1597,7 @@ restart: newsk->sk_type = sk->sk_type; init_peercred(newsk); newu = unix_sk(newsk); + newu->listener = other; RCU_INIT_POINTER(newsk->sk_wq, &newu->peer_wq); otheru = unix_sk(other); @@ -1690,19 +1689,18 @@ static void unix_sock_inherit_flags(const struct socket *old, set_bit(SOCK_PASSSEC, &new->flags); } -static int unix_accept(struct socket *sock, struct socket *newsock, int flags, - bool kern) +static int unix_accept(struct socket *sock, struct socket *newsock, + struct proto_accept_arg *arg) { struct sock *sk = sock->sk; - struct sock *tsk; struct sk_buff *skb; - int err; + struct sock *tsk; - err = -EOPNOTSUPP; + arg->err = -EOPNOTSUPP; if (sock->type != SOCK_STREAM && sock->type != SOCK_SEQPACKET) goto out; - err = -EINVAL; + arg->err = -EINVAL; if (sk->sk_state != TCP_LISTEN) goto out; @@ -1710,12 +1708,12 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, * so that no locks are necessary. */ - skb = skb_recv_datagram(sk, (flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, - &err); + skb = skb_recv_datagram(sk, (arg->flags & O_NONBLOCK) ? MSG_DONTWAIT : 0, + &arg->err); if (!skb) { /* This means receive shutdown. */ - if (err == 0) - err = -EINVAL; + if (arg->err == 0) + arg->err = -EINVAL; goto out; } @@ -1725,6 +1723,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, /* attach accepted sock to socket */ unix_state_lock(tsk); + unix_update_edges(unix_sk(tsk)); newsock->state = SS_CONNECTED; unix_sock_inherit_flags(sock, newsock); sock_graft(tsk, newsock); @@ -1732,7 +1731,7 @@ static int unix_accept(struct socket *sock, struct socket *newsock, int flags, return 0; out: - return err; + return arg->err; } @@ -1775,51 +1774,60 @@ out: return err; } +/* The "user->unix_inflight" variable is protected by the garbage + * collection lock, and we just read it locklessly here. If you go + * over the limit, there might be a tiny race in actually noticing + * it across threads. Tough. + */ +static inline bool too_many_unix_fds(struct task_struct *p) +{ + struct user_struct *user = current_user(); + + if (unlikely(READ_ONCE(user->unix_inflight) > task_rlimit(p, RLIMIT_NOFILE))) + return !capable(CAP_SYS_RESOURCE) && !capable(CAP_SYS_ADMIN); + return false; +} + +static int unix_attach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + if (too_many_unix_fds(current)) + return -ETOOMANYREFS; + + UNIXCB(skb).fp = scm->fp; + scm->fp = NULL; + + if (unix_prepare_fpl(UNIXCB(skb).fp)) + return -ENOMEM; + + return 0; +} + +static void unix_detach_fds(struct scm_cookie *scm, struct sk_buff *skb) +{ + scm->fp = UNIXCB(skb).fp; + UNIXCB(skb).fp = NULL; + + unix_destroy_fpl(scm->fp); +} + static void unix_peek_fds(struct scm_cookie *scm, struct sk_buff *skb) { scm->fp = scm_fp_dup(UNIXCB(skb).fp); +} - /* - * Garbage collection of unix sockets starts by selecting a set of - * candidate sockets which have reference only from being in flight - * (total_refs == inflight_refs). This condition is checked once during - * the candidate collection phase, and candidates are marked as such, so - * that non-candidates can later be ignored. While inflight_refs is - * protected by unix_gc_lock, total_refs (file count) is not, hence this - * is an instantaneous decision. - * - * Once a candidate, however, the socket must not be reinstalled into a - * file descriptor while the garbage collection is in progress. - * - * If the above conditions are met, then the directed graph of - * candidates (*) does not change while unix_gc_lock is held. - * - * Any operations that changes the file count through file descriptors - * (dup, close, sendmsg) does not change the graph since candidates are - * not installed in fds. - * - * Dequeing a candidate via recvmsg would install it into an fd, but - * that takes unix_gc_lock to decrement the inflight count, so it's - * serialized with garbage collection. - * - * MSG_PEEK is special in that it does not change the inflight count, - * yet does install the socket into an fd. The following lock/unlock - * pair is to ensure serialization with garbage collection. It must be - * done between incrementing the file count and installing the file into - * an fd. - * - * If garbage collection starts after the barrier provided by the - * lock/unlock, then it will see the elevated refcount and not mark this - * as a candidate. If a garbage collection is already in progress - * before the file count was incremented, then the lock/unlock pair will - * ensure that garbage collection is finished before progressing to - * installing the fd. - * - * (*) A -> B where B is on the queue of A or B is on the queue of C - * which is on the queue of listening socket A. - */ - spin_lock(&unix_gc_lock); - spin_unlock(&unix_gc_lock); +static void unix_destruct_scm(struct sk_buff *skb) +{ + struct scm_cookie scm; + + memset(&scm, 0, sizeof(scm)); + scm.pid = UNIXCB(skb).pid; + if (UNIXCB(skb).fp) + unix_detach_fds(&scm, skb); + + /* Alas, it calls VFS */ + /* So fscking what? fput() had been SMP-safe since the last Summer */ + scm_destroy(&scm); + sock_wfree(skb); } static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool send_fds) @@ -1878,8 +1886,10 @@ static void scm_stat_add(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_add(fp->count, &u->scm_stat.nr_fds); + unix_add_edges(fp, u); + } } static void scm_stat_del(struct sock *sk, struct sk_buff *skb) @@ -1887,8 +1897,10 @@ static void scm_stat_del(struct sock *sk, struct sk_buff *skb) struct scm_fp_list *fp = UNIXCB(skb).fp; struct unix_sock *u = unix_sk(sk); - if (unlikely(fp && fp->count)) + if (unlikely(fp && fp->count)) { atomic_sub(fp->count, &u->scm_stat.nr_fds); + unix_del_edges(fp); + } } /* @@ -1908,11 +1920,12 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, long timeo; int err; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags&MSG_OOB) goto out; @@ -2157,13 +2170,15 @@ static int queue_oob(struct socket *sock, struct msghdr *msg, struct sock *other maybe_add_creds(skb, sock, other); skb_get(skb); + scm_stat_add(other, skb); + + spin_lock(&other->sk_receive_queue.lock); if (ousk->oob_skb) consume_skb(ousk->oob_skb); - WRITE_ONCE(ousk->oob_skb, skb); + __skb_queue_tail(&other->sk_receive_queue, skb); + spin_unlock(&other->sk_receive_queue.lock); - scm_stat_add(other, skb); - skb_queue_tail(&other->sk_receive_queue, skb); sk_send_sigurg(other); unix_state_unlock(other); other->sk_data_ready(other); @@ -2184,11 +2199,12 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, bool fds_sent = false; int data_len; - wait_for_unix_gc(); err = scm_send(sock, msg, &scm, false); if (err < 0) return err; + wait_for_unix_gc(scm.fp); + err = -EOPNOTSUPP; if (msg->msg_flags & MSG_OOB) { #if IS_ENABLED(CONFIG_AF_UNIX_OOB) @@ -2209,7 +2225,7 @@ static int unix_stream_sendmsg(struct socket *sock, struct msghdr *msg, goto out_err; } - if (sk->sk_shutdown & SEND_SHUTDOWN) + if (READ_ONCE(sk->sk_shutdown) & SEND_SHUTDOWN) goto pipe_err; while (sent < len) { @@ -2553,8 +2569,10 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) mutex_lock(&u->iolock); unix_state_lock(sk); + spin_lock(&sk->sk_receive_queue.lock); if (sock_flag(sk, SOCK_URGINLINE) || !u->oob_skb) { + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); mutex_unlock(&u->iolock); return -EINVAL; @@ -2566,6 +2584,8 @@ static int unix_stream_recv_urg(struct unix_stream_read_state *state) WRITE_ONCE(u->oob_skb, NULL); else skb_get(oob_skb); + + spin_unlock(&sk->sk_receive_queue.lock); unix_state_unlock(sk); chunk = state->recv_actor(oob_skb, 0, chunk, state); @@ -2594,6 +2614,10 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, consume_skb(skb); skb = NULL; } else { + struct sk_buff *unlinked_skb = NULL; + + spin_lock(&sk->sk_receive_queue.lock); + if (skb == u->oob_skb) { if (copied) { skb = NULL; @@ -2602,12 +2626,22 @@ static struct sk_buff *manage_oob(struct sk_buff *skb, struct sock *sk, WRITE_ONCE(u->oob_skb, NULL); consume_skb(skb); } - } else if (!(flags & MSG_PEEK)) { - skb_unlink(skb, &sk->sk_receive_queue); - consume_skb(skb); + } else if (flags & MSG_PEEK) { + skb = NULL; + } else { + __skb_unlink(skb, &sk->sk_receive_queue); + WRITE_ONCE(u->oob_skb, NULL); + unlinked_skb = skb; skb = skb_peek(&sk->sk_receive_queue); } } + + spin_unlock(&sk->sk_receive_queue.lock); + + if (unlinked_skb) { + WARN_ON_ONCE(skb_unref(unlinked_skb)); + kfree_skb(unlinked_skb); + } } return skb; } @@ -2678,18 +2712,16 @@ redo: last = skb = skb_peek(&sk->sk_receive_queue); last_len = last ? last->len : 0; +again: #if IS_ENABLED(CONFIG_AF_UNIX_OOB) if (skb) { skb = manage_oob(skb, sk, flags, copied); - if (!skb) { + if (!skb && copied) { unix_state_unlock(sk); - if (copied) - break; - goto redo; + break; } } #endif -again: if (skb == NULL) { if (copied >= target) goto unlock; |