aboutsummaryrefslogtreecommitdiff
path: root/net/netlink/af_netlink.c
diff options
context:
space:
mode:
Diffstat (limited to 'net/netlink/af_netlink.c')
-rw-r--r--net/netlink/af_netlink.c354
1 files changed, 228 insertions, 126 deletions
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 05919bf3f670..d8e2e3918ce2 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -76,20 +76,21 @@ struct listeners {
};
/* state bits */
-#define NETLINK_CONGESTED 0x0
+#define NETLINK_S_CONGESTED 0x0
/* flags */
-#define NETLINK_KERNEL_SOCKET 0x1
-#define NETLINK_RECV_PKTINFO 0x2
-#define NETLINK_BROADCAST_SEND_ERROR 0x4
-#define NETLINK_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_KERNEL_SOCKET 0x1
+#define NETLINK_F_RECV_PKTINFO 0x2
+#define NETLINK_F_BROADCAST_SEND_ERROR 0x4
+#define NETLINK_F_RECV_NO_ENOBUFS 0x8
+#define NETLINK_F_LISTEN_ALL_NSID 0x10
static inline int netlink_is_kernel(struct sock *sk)
{
- return nlk_sk(sk)->flags & NETLINK_KERNEL_SOCKET;
+ return nlk_sk(sk)->flags & NETLINK_F_KERNEL_SOCKET;
}
-struct netlink_table *nl_table;
+struct netlink_table *nl_table __read_mostly;
EXPORT_SYMBOL_GPL(nl_table);
static DECLARE_WAIT_QUEUE_HEAD(nl_table_wait);
@@ -116,6 +117,8 @@ static ATOMIC_NOTIFIER_HEAD(netlink_chain);
static DEFINE_SPINLOCK(netlink_tap_lock);
static struct list_head netlink_tap_all __read_mostly;
+static const struct rhashtable_params netlink_rhashtable_params;
+
static inline u32 netlink_group_mask(u32 group)
{
return group ? 1 << (group - 1) : 0;
@@ -155,7 +158,7 @@ static int __netlink_remove_tap(struct netlink_tap *nt)
out:
spin_unlock(&netlink_tap_lock);
- if (found && nt->module)
+ if (found)
module_put(nt->module);
return found ? 0 : -ENODEV;
@@ -254,8 +257,9 @@ static void netlink_overrun(struct sock *sk)
{
struct netlink_sock *nlk = nlk_sk(sk);
- if (!(nlk->flags & NETLINK_RECV_NO_ENOBUFS)) {
- if (!test_and_set_bit(NETLINK_CONGESTED, &nlk_sk(sk)->state)) {
+ if (!(nlk->flags & NETLINK_F_RECV_NO_ENOBUFS)) {
+ if (!test_and_set_bit(NETLINK_S_CONGESTED,
+ &nlk_sk(sk)->state)) {
sk->sk_err = ENOBUFS;
sk->sk_error_report(sk);
}
@@ -268,8 +272,8 @@ static void netlink_rcv_wake(struct sock *sk)
struct netlink_sock *nlk = nlk_sk(sk);
if (skb_queue_empty(&sk->sk_receive_queue))
- clear_bit(NETLINK_CONGESTED, &nlk->state);
- if (!test_bit(NETLINK_CONGESTED, &nlk->state))
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
+ if (!test_bit(NETLINK_S_CONGESTED, &nlk->state))
wake_up_interruptible(&nlk->wait);
}
@@ -353,25 +357,52 @@ err1:
return NULL;
}
+
+static void
+__netlink_set_ring(struct sock *sk, struct nl_mmap_req *req, bool tx_ring, void **pg_vec,
+ unsigned int order)
+{
+ struct netlink_sock *nlk = nlk_sk(sk);
+ struct sk_buff_head *queue;
+ struct netlink_ring *ring;
+
+ queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
+ ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
+
+ spin_lock_bh(&queue->lock);
+
+ ring->frame_max = req->nm_frame_nr - 1;
+ ring->head = 0;
+ ring->frame_size = req->nm_frame_size;
+ ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
+
+ swap(ring->pg_vec_len, req->nm_block_nr);
+ swap(ring->pg_vec_order, order);
+ swap(ring->pg_vec, pg_vec);
+
+ __skb_queue_purge(queue);
+ spin_unlock_bh(&queue->lock);
+
+ WARN_ON(atomic_read(&nlk->mapped));
+
+ if (pg_vec)
+ free_pg_vec(pg_vec, order, req->nm_block_nr);
+}
+
static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
- bool closing, bool tx_ring)
+ bool tx_ring)
{
struct netlink_sock *nlk = nlk_sk(sk);
struct netlink_ring *ring;
- struct sk_buff_head *queue;
void **pg_vec = NULL;
unsigned int order = 0;
- int err;
ring = tx_ring ? &nlk->tx_ring : &nlk->rx_ring;
- queue = tx_ring ? &sk->sk_write_queue : &sk->sk_receive_queue;
- if (!closing) {
- if (atomic_read(&nlk->mapped))
- return -EBUSY;
- if (atomic_read(&ring->pending))
- return -EBUSY;
- }
+ if (atomic_read(&nlk->mapped))
+ return -EBUSY;
+ if (atomic_read(&ring->pending))
+ return -EBUSY;
if (req->nm_block_nr) {
if (ring->pg_vec != NULL)
@@ -403,31 +434,19 @@ static int netlink_set_ring(struct sock *sk, struct nl_mmap_req *req,
return -EINVAL;
}
- err = -EBUSY;
mutex_lock(&nlk->pg_vec_lock);
- if (closing || atomic_read(&nlk->mapped) == 0) {
- err = 0;
- spin_lock_bh(&queue->lock);
-
- ring->frame_max = req->nm_frame_nr - 1;
- ring->head = 0;
- ring->frame_size = req->nm_frame_size;
- ring->pg_vec_pages = req->nm_block_size / PAGE_SIZE;
-
- swap(ring->pg_vec_len, req->nm_block_nr);
- swap(ring->pg_vec_order, order);
- swap(ring->pg_vec, pg_vec);
-
- __skb_queue_purge(queue);
- spin_unlock_bh(&queue->lock);
-
- WARN_ON(atomic_read(&nlk->mapped));
+ if (atomic_read(&nlk->mapped) == 0) {
+ __netlink_set_ring(sk, req, tx_ring, pg_vec, order);
+ mutex_unlock(&nlk->pg_vec_lock);
+ return 0;
}
+
mutex_unlock(&nlk->pg_vec_lock);
if (pg_vec)
free_pg_vec(pg_vec, order, req->nm_block_nr);
- return err;
+
+ return -EBUSY;
}
static void netlink_mm_open(struct vm_area_struct *vma)
@@ -896,10 +915,10 @@ static void netlink_sock_destruct(struct sock *sk)
memset(&req, 0, sizeof(req));
if (nlk->rx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, false);
+ __netlink_set_ring(sk, &req, false, NULL, 0);
memset(&req, 0, sizeof(req));
if (nlk->tx_ring.pg_vec)
- netlink_set_ring(sk, &req, true, true);
+ __netlink_set_ring(sk, &req, true, NULL, 0);
}
#endif /* CONFIG_NETLINK_MMAP */
@@ -970,41 +989,50 @@ netlink_unlock_table(void)
struct netlink_compare_arg
{
- struct net *net;
+ possible_net_t pnet;
u32 portid;
};
-static bool netlink_compare(void *ptr, void *arg)
+/* Doing sizeof directly may yield 4 extra bytes on 64-bit. */
+#define netlink_compare_arg_len \
+ (offsetof(struct netlink_compare_arg, portid) + sizeof(u32))
+
+static inline int netlink_compare(struct rhashtable_compare_arg *arg,
+ const void *ptr)
{
- struct netlink_compare_arg *x = arg;
- struct sock *sk = ptr;
+ const struct netlink_compare_arg *x = arg->key;
+ const struct netlink_sock *nlk = ptr;
+
+ return nlk->portid != x->portid ||
+ !net_eq(sock_net(&nlk->sk), read_pnet(&x->pnet));
+}
- return nlk_sk(sk)->portid == x->portid &&
- net_eq(sock_net(sk), x->net);
+static void netlink_compare_arg_init(struct netlink_compare_arg *arg,
+ struct net *net, u32 portid)
+{
+ memset(arg, 0, sizeof(*arg));
+ write_pnet(&arg->pnet, net);
+ arg->portid = portid;
}
static struct sock *__netlink_lookup(struct netlink_table *table, u32 portid,
struct net *net)
{
- struct netlink_compare_arg arg = {
- .net = net,
- .portid = portid,
- };
+ struct netlink_compare_arg arg;
- return rhashtable_lookup_compare(&table->hash, &portid,
- &netlink_compare, &arg);
+ netlink_compare_arg_init(&arg, net, portid);
+ return rhashtable_lookup_fast(&table->hash, &arg,
+ netlink_rhashtable_params);
}
-static bool __netlink_insert(struct netlink_table *table, struct sock *sk)
+static int __netlink_insert(struct netlink_table *table, struct sock *sk)
{
- struct netlink_compare_arg arg = {
- .net = sock_net(sk),
- .portid = nlk_sk(sk)->portid,
- };
+ struct netlink_compare_arg arg;
- return rhashtable_lookup_compare_insert(&table->hash,
- &nlk_sk(sk)->node,
- &netlink_compare, &arg);
+ netlink_compare_arg_init(&arg, sock_net(sk), nlk_sk(sk)->portid);
+ return rhashtable_lookup_insert_key(&table->hash, &arg,
+ &nlk_sk(sk)->node,
+ netlink_rhashtable_params);
}
static struct sock *netlink_lookup(struct net *net, int protocol, u32 portid)
@@ -1066,9 +1094,11 @@ static int netlink_insert(struct sock *sk, u32 portid)
nlk_sk(sk)->portid = portid;
sock_hold(sk);
- err = 0;
- if (!__netlink_insert(table, sk)) {
- err = -EADDRINUSE;
+ err = __netlink_insert(table, sk);
+ if (err) {
+ if (err == -EEXIST)
+ err = -EADDRINUSE;
+ nlk_sk(sk)->portid = 0;
sock_put(sk);
}
@@ -1082,7 +1112,8 @@ static void netlink_remove(struct sock *sk)
struct netlink_table *table;
table = &nl_table[sk->sk_protocol];
- if (rhashtable_remove(&table->hash, &nlk_sk(sk)->node)) {
+ if (!rhashtable_remove_fast(&table->hash, &nlk_sk(sk)->node,
+ netlink_rhashtable_params)) {
WARN_ON(atomic_read(&sk->sk_refcnt) == 1);
__sock_put(sk);
}
@@ -1104,14 +1135,15 @@ static struct proto netlink_proto = {
};
static int __netlink_create(struct net *net, struct socket *sock,
- struct mutex *cb_mutex, int protocol)
+ struct mutex *cb_mutex, int protocol,
+ int kern)
{
struct sock *sk;
struct netlink_sock *nlk;
sock->ops = &netlink_ops;
- sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto);
+ sk = sk_alloc(net, PF_NETLINK, GFP_KERNEL, &netlink_proto, kern);
if (!sk)
return -ENOMEM;
@@ -1173,7 +1205,7 @@ static int netlink_create(struct net *net, struct socket *sock, int protocol,
if (err < 0)
goto out;
- err = __netlink_create(net, sock, cb_mutex, protocol);
+ err = __netlink_create(net, sock, cb_mutex, protocol, kern);
if (err < 0)
goto out_module;
@@ -1283,20 +1315,24 @@ static int netlink_autobind(struct socket *sock)
struct netlink_table *table = &nl_table[sk->sk_protocol];
s32 portid = task_tgid_vnr(current);
int err;
- static s32 rover = -4097;
+ s32 rover = -4096;
+ bool ok;
retry:
cond_resched();
rcu_read_lock();
- if (__netlink_lookup(table, portid, net)) {
+ ok = !__netlink_lookup(table, portid, net);
+ rcu_read_unlock();
+ if (!ok) {
/* Bind collision, search negative portid values. */
- portid = rover--;
- if (rover > -4097)
+ if (rover == -4096)
+ /* rover will be in range [S32_MIN, -4097] */
+ rover = S32_MIN + prandom_u32_max(-4096 - S32_MIN);
+ else if (rover >= -4096)
rover = -4097;
- rcu_read_unlock();
+ portid = rover--;
goto retry;
}
- rcu_read_unlock();
err = netlink_insert(sk, portid);
if (err == -EADDRINUSE)
@@ -1616,13 +1652,11 @@ static struct sk_buff *netlink_alloc_large_skb(unsigned int size,
if (data == NULL)
return NULL;
- skb = build_skb(data, size);
+ skb = __build_skb(data, size);
if (skb == NULL)
vfree(data);
- else {
- skb->head_frag = 0;
+ else
skb->destructor = netlink_skb_destructor;
- }
return skb;
}
@@ -1645,7 +1679,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
nlk = nlk_sk(sk);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!netlink_skb_is_mmaped(skb)) {
DECLARE_WAITQUEUE(wait, current);
if (!*timeo) {
@@ -1660,7 +1694,7 @@ int netlink_attachskb(struct sock *sk, struct sk_buff *skb,
add_wait_queue(&nlk->wait, &wait);
if ((atomic_read(&sk->sk_rmem_alloc) > sk->sk_rcvbuf ||
- test_bit(NETLINK_CONGESTED, &nlk->state)) &&
+ test_bit(NETLINK_S_CONGESTED, &nlk->state)) &&
!sock_flag(sk, SOCK_DEAD))
*timeo = schedule_timeout(*timeo);
@@ -1884,7 +1918,7 @@ static int netlink_broadcast_deliver(struct sock *sk, struct sk_buff *skb)
struct netlink_sock *nlk = nlk_sk(sk);
if (atomic_read(&sk->sk_rmem_alloc) <= sk->sk_rcvbuf &&
- !test_bit(NETLINK_CONGESTED, &nlk->state)) {
+ !test_bit(NETLINK_S_CONGESTED, &nlk->state)) {
netlink_skb_set_owner_r(skb, sk);
__netlink_sendskb(sk, skb);
return atomic_read(&sk->sk_rmem_alloc) > (sk->sk_rcvbuf >> 1);
@@ -1920,8 +1954,17 @@ static void do_one_broadcast(struct sock *sk,
!test_bit(p->group - 1, nlk->groups))
return;
- if (!net_eq(sock_net(sk), p->net))
- return;
+ if (!net_eq(sock_net(sk), p->net)) {
+ if (!(nlk->flags & NETLINK_F_LISTEN_ALL_NSID))
+ return;
+
+ if (!peernet_has_id(sock_net(sk), p->net))
+ return;
+
+ if (!file_ns_capable(sk->sk_socket->file, p->net->user_ns,
+ CAP_NET_BROADCAST))
+ return;
+ }
if (p->failure) {
netlink_overrun(sk);
@@ -1945,23 +1988,33 @@ static void do_one_broadcast(struct sock *sk,
netlink_overrun(sk);
/* Clone failed. Notify ALL listeners. */
p->failure = 1;
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
- } else if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
+ goto out;
+ }
+ if (p->tx_filter && p->tx_filter(sk, p->skb2, p->tx_data)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if (sk_filter(sk, p->skb2)) {
+ goto out;
+ }
+ if (sk_filter(sk, p->skb2)) {
kfree_skb(p->skb2);
p->skb2 = NULL;
- } else if ((val = netlink_broadcast_deliver(sk, p->skb2)) < 0) {
+ goto out;
+ }
+ NETLINK_CB(p->skb2).nsid = peernet2id(sock_net(sk), p->net);
+ NETLINK_CB(p->skb2).nsid_is_set = true;
+ val = netlink_broadcast_deliver(sk, p->skb2);
+ if (val < 0) {
netlink_overrun(sk);
- if (nlk->flags & NETLINK_BROADCAST_SEND_ERROR)
+ if (nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR)
p->delivery_failure = 1;
} else {
p->congested |= val;
p->delivered = 1;
p->skb2 = NULL;
}
+out:
sock_put(sk);
}
@@ -2046,7 +2099,7 @@ static int do_one_set_err(struct sock *sk, struct netlink_set_err_data *p)
!test_bit(p->group - 1, nlk->groups))
goto out;
- if (p->code == ENOBUFS && nlk->flags & NETLINK_RECV_NO_ENOBUFS) {
+ if (p->code == ENOBUFS && nlk->flags & NETLINK_F_RECV_NO_ENOBUFS) {
ret = 1;
goto out;
}
@@ -2065,7 +2118,7 @@ out:
* @code: error code, must be negative (as usual in kernelspace)
*
* This function returns the number of broadcast listeners that have set the
- * NETLINK_RECV_NO_ENOBUFS socket option.
+ * NETLINK_NO_ENOBUFS socket option.
*/
int netlink_set_err(struct sock *ssk, u32 portid, u32 group, int code)
{
@@ -2125,9 +2178,9 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
switch (optname) {
case NETLINK_PKTINFO:
if (val)
- nlk->flags |= NETLINK_RECV_PKTINFO;
+ nlk->flags |= NETLINK_F_RECV_PKTINFO;
else
- nlk->flags &= ~NETLINK_RECV_PKTINFO;
+ nlk->flags &= ~NETLINK_F_RECV_PKTINFO;
err = 0;
break;
case NETLINK_ADD_MEMBERSHIP:
@@ -2156,18 +2209,18 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
}
case NETLINK_BROADCAST_ERROR:
if (val)
- nlk->flags |= NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags |= NETLINK_F_BROADCAST_SEND_ERROR;
else
- nlk->flags &= ~NETLINK_BROADCAST_SEND_ERROR;
+ nlk->flags &= ~NETLINK_F_BROADCAST_SEND_ERROR;
err = 0;
break;
case NETLINK_NO_ENOBUFS:
if (val) {
- nlk->flags |= NETLINK_RECV_NO_ENOBUFS;
- clear_bit(NETLINK_CONGESTED, &nlk->state);
+ nlk->flags |= NETLINK_F_RECV_NO_ENOBUFS;
+ clear_bit(NETLINK_S_CONGESTED, &nlk->state);
wake_up_interruptible(&nlk->wait);
} else {
- nlk->flags &= ~NETLINK_RECV_NO_ENOBUFS;
+ nlk->flags &= ~NETLINK_F_RECV_NO_ENOBUFS;
}
err = 0;
break;
@@ -2185,11 +2238,21 @@ static int netlink_setsockopt(struct socket *sock, int level, int optname,
return -EINVAL;
if (copy_from_user(&req, optval, sizeof(req)))
return -EFAULT;
- err = netlink_set_ring(sk, &req, false,
+ err = netlink_set_ring(sk, &req,
optname == NETLINK_TX_RING);
break;
}
#endif /* CONFIG_NETLINK_MMAP */
+ case NETLINK_LISTEN_ALL_NSID:
+ if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_BROADCAST))
+ return -EPERM;
+
+ if (val)
+ nlk->flags |= NETLINK_F_LISTEN_ALL_NSID;
+ else
+ nlk->flags &= ~NETLINK_F_LISTEN_ALL_NSID;
+ err = 0;
+ break;
default:
err = -ENOPROTOOPT;
}
@@ -2216,7 +2279,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_PKTINFO ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_PKTINFO ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2226,7 +2289,7 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_BROADCAST_SEND_ERROR ? 1 : 0;
+ val = nlk->flags & NETLINK_F_BROADCAST_SEND_ERROR ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
@@ -2236,12 +2299,34 @@ static int netlink_getsockopt(struct socket *sock, int level, int optname,
if (len < sizeof(int))
return -EINVAL;
len = sizeof(int);
- val = nlk->flags & NETLINK_RECV_NO_ENOBUFS ? 1 : 0;
+ val = nlk->flags & NETLINK_F_RECV_NO_ENOBUFS ? 1 : 0;
if (put_user(len, optlen) ||
put_user(val, optval))
return -EFAULT;
err = 0;
break;
+ case NETLINK_LIST_MEMBERSHIPS: {
+ int pos, idx, shift;
+
+ err = 0;
+ netlink_table_grab();
+ for (pos = 0; pos * 8 < nlk->ngroups; pos += sizeof(u32)) {
+ if (len - pos < sizeof(u32))
+ break;
+
+ idx = pos / sizeof(unsigned long);
+ shift = (pos % sizeof(unsigned long)) * 8;
+ if (put_user((u32)(nlk->groups[idx] >> shift),
+ (u32 __user *)(optval + pos))) {
+ err = -EFAULT;
+ break;
+ }
+ }
+ if (put_user(ALIGN(nlk->ngroups / 8, sizeof(u32)), optlen))
+ err = -EFAULT;
+ netlink_table_ungrab();
+ break;
+ }
default:
err = -ENOPROTOOPT;
}
@@ -2256,8 +2341,17 @@ static void netlink_cmsg_recv_pktinfo(struct msghdr *msg, struct sk_buff *skb)
put_cmsg(msg, SOL_NETLINK, NETLINK_PKTINFO, sizeof(info), &info);
}
-static int netlink_sendmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len)
+static void netlink_cmsg_listen_all_nsid(struct sock *sk, struct msghdr *msg,
+ struct sk_buff *skb)
+{
+ if (!NETLINK_CB(skb).nsid_is_set)
+ return;
+
+ put_cmsg(msg, SOL_NETLINK, NETLINK_LISTEN_ALL_NSID, sizeof(int),
+ &NETLINK_CB(skb).nsid);
+}
+
+static int netlink_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
struct sock *sk = sock->sk;
struct netlink_sock *nlk = nlk_sk(sk);
@@ -2346,8 +2440,7 @@ out:
return err;
}
-static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
- struct msghdr *msg, size_t len,
+static int netlink_recvmsg(struct socket *sock, struct msghdr *msg, size_t len,
int flags)
{
struct scm_cookie scm;
@@ -2409,8 +2502,10 @@ static int netlink_recvmsg(struct kiocb *kiocb, struct socket *sock,
msg->msg_namelen = sizeof(*addr);
}
- if (nlk->flags & NETLINK_RECV_PKTINFO)
+ if (nlk->flags & NETLINK_F_RECV_PKTINFO)
netlink_cmsg_recv_pktinfo(msg, skb);
+ if (nlk->flags & NETLINK_F_LISTEN_ALL_NSID)
+ netlink_cmsg_listen_all_nsid(sk, msg, skb);
memset(&scm, 0, sizeof(scm));
scm.creds = *NETLINK_CREDS(skb);
@@ -2464,17 +2559,10 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
if (sock_create_lite(PF_NETLINK, SOCK_DGRAM, unit, &sock))
return NULL;
- /*
- * We have to just have a reference on the net from sk, but don't
- * get_net it. Besides, we cannot get and then put the net here.
- * So we create one inside init_net and the move it to net.
- */
-
- if (__netlink_create(&init_net, sock, cb_mutex, unit) < 0)
+ if (__netlink_create(net, sock, cb_mutex, unit, 1) < 0)
goto out_sock_release_nosk;
sk = sock->sk;
- sk_change_net(sk, net);
if (!cfg || cfg->groups < 32)
groups = 32;
@@ -2493,7 +2581,7 @@ __netlink_kernel_create(struct net *net, int unit, struct module *module,
goto out_sock_release;
nlk = nlk_sk(sk);
- nlk->flags |= NETLINK_KERNEL_SOCKET;
+ nlk->flags |= NETLINK_F_KERNEL_SOCKET;
netlink_table_grab();
if (!nl_table[unit].registered) {
@@ -2530,7 +2618,10 @@ EXPORT_SYMBOL(__netlink_kernel_create);
void
netlink_kernel_release(struct sock *sk)
{
- sk_release_kernel(sk);
+ if (sk == NULL || sk->sk_socket == NULL)
+ return;
+
+ sock_release(sk->sk_socket);
}
EXPORT_SYMBOL(netlink_kernel_release);
@@ -3116,17 +3207,27 @@ static struct pernet_operations __net_initdata netlink_net_ops = {
.exit = netlink_net_exit,
};
+static inline u32 netlink_hash(const void *data, u32 len, u32 seed)
+{
+ const struct netlink_sock *nlk = data;
+ struct netlink_compare_arg arg;
+
+ netlink_compare_arg_init(&arg, sock_net(&nlk->sk), nlk->portid);
+ return jhash2((u32 *)&arg, netlink_compare_arg_len / sizeof(u32), seed);
+}
+
+static const struct rhashtable_params netlink_rhashtable_params = {
+ .head_offset = offsetof(struct netlink_sock, node),
+ .key_len = netlink_compare_arg_len,
+ .obj_hashfn = netlink_hash,
+ .obj_cmpfn = netlink_compare,
+ .automatic_shrinking = true,
+};
+
static int __init netlink_proto_init(void)
{
int i;
int err = proto_register(&netlink_proto, 0);
- struct rhashtable_params ht_params = {
- .head_offset = offsetof(struct netlink_sock, node),
- .key_offset = offsetof(struct netlink_sock, portid),
- .key_len = sizeof(u32), /* portid */
- .hashfn = jhash,
- .max_shift = 16, /* 64K */
- };
if (err != 0)
goto out;
@@ -3138,7 +3239,8 @@ static int __init netlink_proto_init(void)
goto panic;
for (i = 0; i < MAX_LINKS; i++) {
- if (rhashtable_init(&nl_table[i].hash, &ht_params) < 0) {
+ if (rhashtable_init(&nl_table[i].hash,
+ &netlink_rhashtable_params) < 0) {
while (--i > 0)
rhashtable_destroy(&nl_table[i].hash);
kfree(nl_table);