diff options
Diffstat (limited to 'net/socket.c')
-rw-r--r-- | net/socket.c | 128 |
1 files changed, 87 insertions, 41 deletions
diff --git a/net/socket.c b/net/socket.c index 2270b941bcc7..ac2219f90d5d 100644 --- a/net/socket.c +++ b/net/socket.c @@ -156,7 +156,7 @@ static const struct file_operations socket_file_ops = { */ static DEFINE_SPINLOCK(net_family_lock); -static const struct net_proto_family *net_families[NPROTO] __read_mostly; +static const struct net_proto_family __rcu *net_families[NPROTO] __read_mostly; /* * Statistics counters of the socket lists @@ -209,8 +209,8 @@ int move_addr_to_kernel(void __user *uaddr, int ulen, struct sockaddr *kaddr) * specified. Zero is returned for a success. */ -int move_addr_to_user(struct sockaddr *kaddr, int klen, void __user *uaddr, - int __user *ulen) +static int move_addr_to_user(struct sockaddr *kaddr, int klen, + void __user *uaddr, int __user *ulen) { int err; int len; @@ -262,6 +262,7 @@ static struct inode *sock_alloc_inode(struct super_block *sb) } + static void wq_free_rcu(struct rcu_head *head) { struct socket_wq *wq = container_of(head, struct socket_wq, rcu); @@ -305,22 +306,6 @@ static const struct super_operations sockfs_ops = { .statfs = simple_statfs, }; -static int sockfs_get_sb(struct file_system_type *fs_type, - int flags, const char *dev_name, void *data, - struct vfsmount *mnt) -{ - return get_sb_pseudo(fs_type, "socket:", &sockfs_ops, SOCKFS_MAGIC, - mnt); -} - -static struct vfsmount *sock_mnt __read_mostly; - -static struct file_system_type sock_fs_type = { - .name = "sockfs", - .get_sb = sockfs_get_sb, - .kill_sb = kill_anon_super, -}; - /* * sockfs_dname() is called from d_path(). */ @@ -334,6 +319,21 @@ static const struct dentry_operations sockfs_dentry_operations = { .d_dname = sockfs_dname, }; +static struct dentry *sockfs_mount(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data) +{ + return mount_pseudo(fs_type, "socket:", &sockfs_ops, + &sockfs_dentry_operations, SOCKFS_MAGIC); +} + +static struct vfsmount *sock_mnt __read_mostly; + +static struct file_system_type sock_fs_type = { + .name = "sockfs", + .mount = sockfs_mount, + .kill_sb = kill_anon_super, +}; + /* * Obtains the first available file descriptor and sets it up for use. * @@ -362,14 +362,13 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) if (unlikely(fd < 0)) return fd; - path.dentry = d_alloc(sock_mnt->mnt_sb->s_root, &name); + path.dentry = d_alloc_pseudo(sock_mnt->mnt_sb, &name); if (unlikely(!path.dentry)) { put_unused_fd(fd); return -ENOMEM; } path.mnt = mntget(sock_mnt); - path.dentry->d_op = &sockfs_dentry_operations; d_instantiate(path.dentry, SOCK_INODE(sock)); SOCK_INODE(sock)->i_fop = &socket_file_ops; @@ -377,7 +376,7 @@ static int sock_alloc_file(struct socket *sock, struct file **f, int flags) &socket_file_ops); if (unlikely(!file)) { /* drop dentry, keep inode */ - atomic_inc(&path.dentry->d_inode->i_count); + ihold(path.dentry->d_inode); path_put(&path); put_unused_fd(fd); return -ENFILE; @@ -480,6 +479,7 @@ static struct socket *sock_alloc(void) sock = SOCKET_I(inode); kmemcheck_annotate_bitfield(sock, type); + inode->i_ino = get_next_ino(); inode->i_mode = S_IFSOCK | S_IRWXUGO; inode->i_uid = current_fsuid(); inode->i_gid = current_fsgid(); @@ -502,6 +502,7 @@ static int sock_no_open(struct inode *irrelevant, struct file *dontcare) const struct file_operations bad_sock_fops = { .owner = THIS_MODULE, .open = sock_no_open, + .llseek = noop_llseek, }; /** @@ -535,14 +536,13 @@ void sock_release(struct socket *sock) } EXPORT_SYMBOL(sock_release); -int sock_tx_timestamp(struct msghdr *msg, struct sock *sk, - union skb_shared_tx *shtx) +int sock_tx_timestamp(struct sock *sk, __u8 *tx_flags) { - shtx->flags = 0; + *tx_flags = 0; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_HARDWARE)) - shtx->hardware = 1; + *tx_flags |= SKBTX_HW_TSTAMP; if (sock_flag(sk, SOCK_TIMESTAMPING_TX_SOFTWARE)) - shtx->software = 1; + *tx_flags |= SKBTX_SW_TSTAMP; return 0; } EXPORT_SYMBOL(sock_tx_timestamp); @@ -662,7 +662,8 @@ void __sock_recv_timestamp(struct msghdr *msg, struct sock *sk, } EXPORT_SYMBOL_GPL(__sock_recv_timestamp); -inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, struct sk_buff *skb) +static inline void sock_recv_drops(struct msghdr *msg, struct sock *sk, + struct sk_buff *skb) { if (sock_flag(sk, SOCK_RXQ_OVFL) && skb && skb->dropcount) put_cmsg(msg, SOL_SOCKET, SO_RXQ_OVFL, @@ -732,6 +733,21 @@ static int sock_recvmsg_nosec(struct socket *sock, struct msghdr *msg, return ret; } +/** + * kernel_recvmsg - Receive a message from a socket (kernel space) + * @sock: The socket to receive the message from + * @msg: Received message + * @vec: Input s/g array for message data + * @num: Size of input s/g array + * @size: Number of bytes to read + * @flags: Message flags (MSG_DONTWAIT, etc...) + * + * On return the msg structure contains the scatter/gather array passed in the + * vec argument. The array is modified so that it consists of the unfilled + * portion of the original array. + * + * The returned value is the total number of bytes received, or an error. + */ int kernel_recvmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t size, int flags) { @@ -1144,7 +1160,7 @@ call_kill: } EXPORT_SYMBOL(sock_wake_async); -static int __sock_create(struct net *net, int family, int type, int protocol, +int __sock_create(struct net *net, int family, int type, int protocol, struct socket **res, int kern) { int err; @@ -1200,7 +1216,7 @@ static int __sock_create(struct net *net, int family, int type, int protocol, * requested real, full-featured networking support upon configuration. * Otherwise module support will break! */ - if (net_families[family] == NULL) + if (rcu_access_pointer(net_families[family]) == NULL) request_module("net-pf-%d", family); #endif @@ -1256,6 +1272,7 @@ out_release: rcu_read_unlock(); goto out_sock_release; } +EXPORT_SYMBOL(__sock_create); int sock_create(int family, int type, int protocol, struct socket **res) { @@ -1651,6 +1668,8 @@ SYSCALL_DEFINE6(sendto, int, fd, void __user *, buff, size_t, len, struct iovec iov; int fput_needed; + if (len > INT_MAX) + len = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1708,6 +1727,8 @@ SYSCALL_DEFINE6(recvfrom, int, fd, void __user *, ubuf, size_t, size, int err, err2; int fput_needed; + if (size > INT_MAX) + size = INT_MAX; sock = sockfd_lookup_light(fd, &err, &fput_needed); if (!sock) goto out; @@ -1919,7 +1940,8 @@ SYSCALL_DEFINE3(sendmsg, int, fd, struct msghdr __user *, msg, unsigned, flags) * Afterwards, it will be a kernel pointer. Thus the compiler-assisted * checking falls down on this. */ - if (copy_from_user(ctl_buf, (void __user *)msg_sys.msg_control, + if (copy_from_user(ctl_buf, + (void __user __force *)msg_sys.msg_control, ctl_len)) goto out_freectl; msg_sys.msg_control = ctl_buf; @@ -2326,10 +2348,11 @@ int sock_register(const struct net_proto_family *ops) } spin_lock(&net_family_lock); - if (net_families[ops->family]) + if (rcu_dereference_protected(net_families[ops->family], + lockdep_is_held(&net_family_lock))) err = -EEXIST; else { - net_families[ops->family] = ops; + rcu_assign_pointer(net_families[ops->family], ops); err = 0; } spin_unlock(&net_family_lock); @@ -2357,7 +2380,7 @@ void sock_unregister(int family) BUG_ON(family < 0 || family >= NPROTO); spin_lock(&net_family_lock); - net_families[family] = NULL; + rcu_assign_pointer(net_families[family], NULL); spin_unlock(&net_family_lock); synchronize_rcu(); @@ -2368,6 +2391,8 @@ EXPORT_SYMBOL(sock_unregister); static int __init sock_init(void) { + int err; + /* * Initialize sock SLAB cache. */ @@ -2384,8 +2409,15 @@ static int __init sock_init(void) */ init_inodecache(); - register_filesystem(&sock_fs_type); + + err = register_filesystem(&sock_fs_type); + if (err) + goto out_fs; sock_mnt = kern_mount(&sock_fs_type); + if (IS_ERR(sock_mnt)) { + err = PTR_ERR(sock_mnt); + goto out_mount; + } /* The real protocol initialization is performed in later initcalls. */ @@ -2398,7 +2430,13 @@ static int __init sock_init(void) skb_timestamping_init(); #endif - return 0; +out: + return err; + +out_mount: + unregister_filesystem(&sock_fs_type); +out_fs: + goto out; } core_initcall(sock_init); /* early initcall */ @@ -3054,14 +3092,19 @@ int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval, int *optlen) { mm_segment_t oldfs = get_fs(); + char __user *uoptval; + int __user *uoptlen; int err; + uoptval = (char __user __force *) optval; + uoptlen = (int __user __force *) optlen; + set_fs(KERNEL_DS); if (level == SOL_SOCKET) - err = sock_getsockopt(sock, level, optname, optval, optlen); + err = sock_getsockopt(sock, level, optname, uoptval, uoptlen); else - err = sock->ops->getsockopt(sock, level, optname, optval, - optlen); + err = sock->ops->getsockopt(sock, level, optname, uoptval, + uoptlen); set_fs(oldfs); return err; } @@ -3071,13 +3114,16 @@ int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval, unsigned int optlen) { mm_segment_t oldfs = get_fs(); + char __user *uoptval; int err; + uoptval = (char __user __force *) optval; + set_fs(KERNEL_DS); if (level == SOL_SOCKET) - err = sock_setsockopt(sock, level, optname, optval, optlen); + err = sock_setsockopt(sock, level, optname, uoptval, optlen); else - err = sock->ops->setsockopt(sock, level, optname, optval, + err = sock->ops->setsockopt(sock, level, optname, uoptval, optlen); set_fs(oldfs); return err; |