From 11aa9c28b4209242a9de0a661a7b3405adb568a0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 8 May 2015 21:09:13 -0500 Subject: net: Pass kern from net_proto_family.create to sk_alloc In preparation for changing how struct net is refcounted on kernel sockets pass the knowledge that we are creating a kernel socket from sock_create_kern through to sk_alloc. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- net/unix/af_unix.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 5266ea7b922b..941b3d26e3bf 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -620,7 +620,7 @@ static struct proto unix_proto = { */ static struct lock_class_key af_unix_sk_receive_queue_lock_key; -static struct sock *unix_create1(struct net *net, struct socket *sock) +static struct sock *unix_create1(struct net *net, struct socket *sock, int kern) { struct sock *sk = NULL; struct unix_sock *u; @@ -629,7 +629,7 @@ static struct sock *unix_create1(struct net *net, struct socket *sock) if (atomic_long_read(&unix_nr_socks) > 2 * get_max_files()) goto out; - sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto); + sk = sk_alloc(net, PF_UNIX, GFP_KERNEL, &unix_proto, kern); if (!sk) goto out; @@ -688,7 +688,7 @@ static int unix_create(struct net *net, struct socket *sock, int protocol, return -ESOCKTNOSUPPORT; } - return unix_create1(net, sock) ? 0 : -ENOMEM; + return unix_create1(net, sock, kern) ? 0 : -ENOMEM; } static int unix_release(struct socket *sock) @@ -1088,7 +1088,7 @@ static int unix_stream_connect(struct socket *sock, struct sockaddr *uaddr, err = -ENOMEM; /* create new sock for complete connection */ - newsk = unix_create1(sock_net(sk), NULL); + newsk = unix_create1(sock_net(sk), NULL, 0); if (newsk == NULL) goto out; -- cgit From 869e7c62486ec0e170a9771acaa251d1a33b5871 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 16:59:59 +0200 Subject: net: af_unix: implement stream sendpage support This patch implements sendpage support for AF_UNIX SOCK_STREAM sockets. This is also required for a complete splice implementation. The implementation is a bit tricky because we append to already existing skbs and so have to hold unix_sk->readlock to protect the reading side from either advancing UNIXCB.consumed or freeing the skb at the socket receive tail. Signed-off-by: Hannes Frederic Sowa Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- net/unix/af_unix.c | 99 +++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 98 insertions(+), 1 deletion(-) (limited to 'net/unix/af_unix.c') diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 941b3d26e3bf..7762c0b46721 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -518,6 +518,8 @@ static int unix_ioctl(struct socket *, unsigned int, unsigned long); static int unix_shutdown(struct socket *, int); static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); +static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, + size_t size, int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, @@ -558,7 +560,7 @@ static const struct proto_ops unix_stream_ops = { .sendmsg = unix_stream_sendmsg, .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, - .sendpage = sock_no_sendpage, + .sendpage = unix_stream_sendpage, .set_peek_off = unix_set_peek_off, }; @@ -1720,6 +1722,101 @@ out_err: return sent ? : err; } +static ssize_t unix_stream_sendpage(struct socket *socket, struct page *page, + int offset, size_t size, int flags) +{ + int err = 0; + bool send_sigpipe = true; + struct sock *other, *sk = socket->sk; + struct sk_buff *skb, *newskb = NULL, *tail = NULL; + + if (flags & MSG_OOB) + return -EOPNOTSUPP; + + other = unix_peer(sk); + if (!other || sk->sk_state != TCP_ESTABLISHED) + return -ENOTCONN; + + if (false) { +alloc_skb: + unix_state_unlock(other); + mutex_unlock(&unix_sk(other)->readlock); + newskb = sock_alloc_send_pskb(sk, 0, 0, flags & MSG_DONTWAIT, + &err, 0); + if (!newskb) + return err; + } + + /* we must acquire readlock as we modify already present + * skbs in the sk_receive_queue and mess with skb->len + */ + err = mutex_lock_interruptible(&unix_sk(other)->readlock); + if (err) { + err = flags & MSG_DONTWAIT ? -EAGAIN : -ERESTARTSYS; + send_sigpipe = false; + goto err; + } + + if (sk->sk_shutdown & SEND_SHUTDOWN) { + err = -EPIPE; + goto err_unlock; + } + + unix_state_lock(other); + + if (sock_flag(other, SOCK_DEAD) || + other->sk_shutdown & RCV_SHUTDOWN) { + err = -EPIPE; + goto err_state_unlock; + } + + skb = skb_peek_tail(&other->sk_receive_queue); + if (tail && tail == skb) { + skb = newskb; + } else if (!skb) { + if (newskb) + skb = newskb; + else + goto alloc_skb; + } else if (newskb) { + /* this is fast path, we don't necessarily need to + * call to kfree_skb even though with newskb == NULL + * this - does no harm + */ + consume_skb(newskb); + } + + if (skb_append_pagefrags(skb, page, offset, size)) { + tail = skb; + goto alloc_skb; + } + + skb->len += size; + skb->data_len += size; + skb->truesize += size; + atomic_add(size, &sk->sk_wmem_alloc); + + if (newskb) + __skb_queue_tail(&other->sk_receive_queue, newskb); + + unix_state_unlock(other); + mutex_unlock(&unix_sk(other)->readlock); + + other->sk_data_ready(other); + + return size; + +err_state_unlock: + unix_state_unlock(other); +err_unlock: + mutex_unlock(&unix_sk(other)->readlock); +err: + kfree_skb(newskb); + if (send_sigpipe && !(flags & MSG_NOSIGNAL)) + send_sig(SIGPIPE, current, 0); + return err; +} + static int unix_seqpacket_sendmsg(struct socket *sock, struct msghdr *msg, size_t len) { -- cgit From 2b514574f7e88c8498027ee366fd6e7aae5aa4b5 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 21 May 2015 17:00:01 +0200 Subject: net: af_unix: implement splice for stream af_unix sockets unix_stream_recvmsg is refactored to unix_stream_read_generic in this patch and enhanced to deal with pipe splicing. The refactoring is inneglible, we mostly have to deal with a non-existing struct msghdr argument. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- fs/splice.c | 1 + net/core/skbuff.c | 1 + net/unix/af_unix.c | 140 +++++++++++++++++++++++++++++++++++++++++++++-------- 3 files changed, 121 insertions(+), 21 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/fs/splice.c b/fs/splice.c index bfe62ae40f40..4f355a1c1a9e 100644 --- a/fs/splice.c +++ b/fs/splice.c @@ -261,6 +261,7 @@ ssize_t splice_to_pipe(struct pipe_inode_info *pipe, return ret; } +EXPORT_SYMBOL_GPL(splice_to_pipe); void spd_release_page(struct splice_pipe_desc *spd, unsigned int i) { diff --git a/net/core/skbuff.c b/net/core/skbuff.c index 02769fa4f5c8..9bac0e6f8dfa 100644 --- a/net/core/skbuff.c +++ b/net/core/skbuff.c @@ -1942,6 +1942,7 @@ done: return ret; } +EXPORT_SYMBOL_GPL(skb_splice_bits); /** * skb_store_bits - store bits from kernel buffer to skb diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index 7762c0b46721..b8c44076c776 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -520,6 +520,9 @@ static int unix_stream_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_stream_recvmsg(struct socket *, struct msghdr *, size_t, int); static ssize_t unix_stream_sendpage(struct socket *, struct page *, int offset, size_t size, int flags); +static ssize_t unix_stream_splice_read(struct socket *, loff_t *ppos, + struct pipe_inode_info *, size_t size, + unsigned int flags); static int unix_dgram_sendmsg(struct socket *, struct msghdr *, size_t); static int unix_dgram_recvmsg(struct socket *, struct msghdr *, size_t, int); static int unix_dgram_connect(struct socket *, struct sockaddr *, @@ -561,6 +564,7 @@ static const struct proto_ops unix_stream_ops = { .recvmsg = unix_stream_recvmsg, .mmap = sock_no_mmap, .sendpage = unix_stream_sendpage, + .splice_read = unix_stream_splice_read, .set_peek_off = unix_set_peek_off, }; @@ -1957,8 +1961,9 @@ out: * Sleep until more data has arrived. But check for races.. */ static long unix_stream_data_wait(struct sock *sk, long timeo, - struct sk_buff *last) + struct sk_buff *last, unsigned int last_len) { + struct sk_buff *tail; DEFINE_WAIT(wait); unix_state_lock(sk); @@ -1966,7 +1971,9 @@ static long unix_stream_data_wait(struct sock *sk, long timeo, for (;;) { prepare_to_wait(sk_sleep(sk), &wait, TASK_INTERRUPTIBLE); - if (skb_peek_tail(&sk->sk_receive_queue) != last || + tail = skb_peek_tail(&sk->sk_receive_queue); + if (tail != last || + (tail && tail->len != last_len) || sk->sk_err || (sk->sk_shutdown & RCV_SHUTDOWN) || signal_pending(current) || @@ -1990,38 +1997,50 @@ static unsigned int unix_skb_len(const struct sk_buff *skb) return skb->len - UNIXCB(skb).consumed; } -static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, - size_t size, int flags) +struct unix_stream_read_state { + int (*recv_actor)(struct sk_buff *, int, int, + struct unix_stream_read_state *); + struct socket *socket; + struct msghdr *msg; + struct pipe_inode_info *pipe; + size_t size; + int flags; + unsigned int splice_flags; +}; + +static int unix_stream_read_generic(struct unix_stream_read_state *state) { struct scm_cookie scm; + struct socket *sock = state->socket; struct sock *sk = sock->sk; struct unix_sock *u = unix_sk(sk); - DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, msg->msg_name); int copied = 0; + int flags = state->flags; int noblock = flags & MSG_DONTWAIT; - int check_creds = 0; + bool check_creds = false; int target; int err = 0; long timeo; int skip; + size_t size = state->size; + unsigned int last_len; err = -EINVAL; if (sk->sk_state != TCP_ESTABLISHED) goto out; err = -EOPNOTSUPP; - if (flags&MSG_OOB) + if (flags & MSG_OOB) goto out; - target = sock_rcvlowat(sk, flags&MSG_WAITALL, size); + target = sock_rcvlowat(sk, flags & MSG_WAITALL, size); timeo = sock_rcvtimeo(sk, noblock); + memset(&scm, 0, sizeof(scm)); + /* Lock the socket to prevent queue disordering * while sleeps in memcpy_tomsg */ - - memset(&scm, 0, sizeof(scm)); - err = mutex_lock_interruptible(&u->readlock); if (unlikely(err)) { /* recvmsg() in non blocking mode is supposed to return -EAGAIN @@ -2037,6 +2056,7 @@ static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, unix_state_lock(sk); last = skb = skb_peek(&sk->sk_receive_queue); + last_len = last ? last->len : 0; again: if (skb == NULL) { unix_sk(sk)->recursion_level = 0; @@ -2059,16 +2079,17 @@ again: break; mutex_unlock(&u->readlock); - timeo = unix_stream_data_wait(sk, timeo, last); + timeo = unix_stream_data_wait(sk, timeo, last, + last_len); - if (signal_pending(current) - || mutex_lock_interruptible(&u->readlock)) { + if (signal_pending(current) || + mutex_lock_interruptible(&u->readlock)) { err = sock_intr_errno(timeo); goto out; } continue; - unlock: +unlock: unix_state_unlock(sk); break; } @@ -2077,6 +2098,7 @@ again: while (skip >= unix_skb_len(skb)) { skip -= unix_skb_len(skb); last = skb; + last_len = skb->len; skb = skb_peek_next(skb, &sk->sk_receive_queue); if (!skb) goto again; @@ -2093,18 +2115,20 @@ again: } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); - check_creds = 1; + check_creds = true; } /* Copy address just once */ - if (sunaddr) { - unix_copy_addr(msg, skb->sk); + if (state->msg && state->msg->msg_name) { + DECLARE_SOCKADDR(struct sockaddr_un *, sunaddr, + state->msg->msg_name); + unix_copy_addr(state->msg, skb->sk); sunaddr = NULL; } chunk = min_t(unsigned int, unix_skb_len(skb) - skip, size); - if (skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, - msg, chunk)) { + chunk = state->recv_actor(skb, skip, chunk, state); + if (chunk < 0) { if (copied == 0) copied = -EFAULT; break; @@ -2142,11 +2166,85 @@ again: } while (size); mutex_unlock(&u->readlock); - scm_recv(sock, msg, &scm, flags); + if (state->msg) + scm_recv(sock, state->msg, &scm, flags); + else + scm_destroy(&scm); out: return copied ? : err; } +static int unix_stream_read_actor(struct sk_buff *skb, + int skip, int chunk, + struct unix_stream_read_state *state) +{ + int ret; + + ret = skb_copy_datagram_msg(skb, UNIXCB(skb).consumed + skip, + state->msg, chunk); + return ret ?: chunk; +} + +static int unix_stream_recvmsg(struct socket *sock, struct msghdr *msg, + size_t size, int flags) +{ + struct unix_stream_read_state state = { + .recv_actor = unix_stream_read_actor, + .socket = sock, + .msg = msg, + .size = size, + .flags = flags + }; + + return unix_stream_read_generic(&state); +} + +static ssize_t skb_unix_socket_splice(struct sock *sk, + struct pipe_inode_info *pipe, + struct splice_pipe_desc *spd) +{ + int ret; + struct unix_sock *u = unix_sk(sk); + + mutex_unlock(&u->readlock); + ret = splice_to_pipe(pipe, spd); + mutex_lock(&u->readlock); + + return ret; +} + +static int unix_stream_splice_actor(struct sk_buff *skb, + int skip, int chunk, + struct unix_stream_read_state *state) +{ + return skb_splice_bits(skb, state->socket->sk, + UNIXCB(skb).consumed + skip, + state->pipe, chunk, state->splice_flags, + skb_unix_socket_splice); +} + +static ssize_t unix_stream_splice_read(struct socket *sock, loff_t *ppos, + struct pipe_inode_info *pipe, + size_t size, unsigned int flags) +{ + struct unix_stream_read_state state = { + .recv_actor = unix_stream_splice_actor, + .socket = sock, + .pipe = pipe, + .size = size, + .splice_flags = flags, + }; + + if (unlikely(*ppos)) + return -ESPIPE; + + if (sock->file->f_flags & O_NONBLOCK || + flags & SPLICE_F_NONBLOCK) + state.flags = MSG_DONTWAIT; + + return unix_stream_read_generic(&state); +} + static int unix_shutdown(struct socket *sock, int mode) { struct sock *sk = sock->sk; -- cgit From 37a9a8df8ce9de6ea73349c9ac8bdf6ba4ec4f70 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Wed, 10 Jun 2015 08:44:59 -0400 Subject: net/unix: support SCM_SECURITY for stream sockets SCM_SECURITY was originally only implemented for datagram sockets, not for stream sockets. However, SCM_CREDENTIALS is supported on Unix stream sockets. For consistency, implement Unix stream support for SCM_SECURITY as well. Also clean up the existing code and get rid of the superfluous UNIXSID macro. Motivated by https://bugzilla.redhat.com/show_bug.cgi?id=1224211, where systemd was using SCM_CREDENTIALS and assumed wrongly that SCM_SECURITY was also supported on Unix stream sockets. Signed-off-by: Stephen Smalley Acked-by: Paul Moore Signed-off-by: David S. Miller --- include/net/af_unix.h | 1 - net/unix/af_unix.c | 20 ++++++++++++++++---- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'net/unix/af_unix.c') diff --git a/include/net/af_unix.h b/include/net/af_unix.h index a175ba4a7adb..4a167b30a12f 100644 --- a/include/net/af_unix.h +++ b/include/net/af_unix.h @@ -39,7 +39,6 @@ struct unix_skb_parms { }; #define UNIXCB(skb) (*(struct unix_skb_parms *)&((skb)->cb)) -#define UNIXSID(skb) (&UNIXCB((skb)).secid) #define unix_state_lock(s) spin_lock(&unix_sk(s)->lock) #define unix_state_unlock(s) spin_unlock(&unix_sk(s)->lock) diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c index f25e1675b865..03ee4d359f6a 100644 --- a/net/unix/af_unix.c +++ b/net/unix/af_unix.c @@ -140,12 +140,17 @@ static struct hlist_head *unix_sockets_unbound(void *addr) #ifdef CONFIG_SECURITY_NETWORK static void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - memcpy(UNIXSID(skb), &scm->secid, sizeof(u32)); + UNIXCB(skb).secid = scm->secid; } static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { - scm->secid = *UNIXSID(skb); + scm->secid = UNIXCB(skb).secid; +} + +static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) +{ + return (scm->secid == UNIXCB(skb).secid); } #else static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) @@ -153,6 +158,11 @@ static inline void unix_get_secdata(struct scm_cookie *scm, struct sk_buff *skb) static inline void unix_set_secdata(struct scm_cookie *scm, struct sk_buff *skb) { } + +static inline bool unix_secdata_eq(struct scm_cookie *scm, struct sk_buff *skb) +{ + return true; +} #endif /* CONFIG_SECURITY_NETWORK */ /* @@ -1414,6 +1424,7 @@ static int unix_scm_to_skb(struct scm_cookie *scm, struct sk_buff *skb, bool sen UNIXCB(skb).uid = scm->creds.uid; UNIXCB(skb).gid = scm->creds.gid; UNIXCB(skb).fp = NULL; + unix_get_secdata(scm, skb); if (scm->fp && send_fds) err = unix_attach_fds(scm, skb); @@ -1509,7 +1520,6 @@ static int unix_dgram_sendmsg(struct socket *sock, struct msghdr *msg, if (err < 0) goto out_free; max_level = err + 1; - unix_get_secdata(&scm, skb); skb_put(skb, len - data_len); skb->data_len = data_len; @@ -2118,11 +2128,13 @@ unlock: /* Never glue messages from different writers */ if ((UNIXCB(skb).pid != scm.pid) || !uid_eq(UNIXCB(skb).uid, scm.creds.uid) || - !gid_eq(UNIXCB(skb).gid, scm.creds.gid)) + !gid_eq(UNIXCB(skb).gid, scm.creds.gid) || + !unix_secdata_eq(&scm, skb)) break; } else if (test_bit(SOCK_PASSCRED, &sock->flags)) { /* Copy credentials */ scm_set_cred(&scm, UNIXCB(skb).pid, UNIXCB(skb).uid, UNIXCB(skb).gid); + unix_set_secdata(&scm, skb); check_creds = true; } -- cgit