aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2024-11-14 10:05:33 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2024-11-14 10:05:33 -0800
commitcfaaa7d010d1fc58f9717fcc8591201e741d2d49 (patch)
treed2dda42587f7d6c5f714aafe1f262321a135291c /net
parent4abcd80f23357808b0444d261ed08e5a77dbaa9a (diff)
parentca34aceb322bfcd6ab498884f1805ee12f983259 (diff)
Merge tag 'net-6.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net
Pull networking fixes from Paolo Abeni: "Including fixes from bluetooth. Quite calm week. No new regression under investigation. Current release - regressions: - eth: revert "igb: Disable threaded IRQ for igb_msix_other" Current release - new code bugs: - bluetooth: btintel: direct exception event to bluetooth stack Previous releases - regressions: - core: fix data-races around sk->sk_forward_alloc - netlink: terminate outstanding dump on socket close - mptcp: error out earlier on disconnect - vsock: fix accept_queue memory leak - phylink: ensure PHY momentary link-fails are handled - eth: mlx5: - fix null-ptr-deref in add rule err flow - lock FTE when checking if active - eth: dwmac-mediatek: fix inverted handling of mediatek,mac-wol Previous releases - always broken: - sched: fix u32's systematic failure to free IDR entries for hnodes. - sctp: fix possible UAF in sctp_v6_available() - eth: bonding: add ns target multicast address to slave device - eth: mlx5: fix msix vectors to respect platform limit - eth: icssg-prueth: fix 1 PPS sync" * tag 'net-6.12-rc8' of git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net: (38 commits) net: sched: u32: Add test case for systematic hnode IDR leaks selftests: bonding: add ns multicast group testing bonding: add ns target multicast address to slave device net: ti: icssg-prueth: Fix 1 PPS sync stmmac: dwmac-intel-plat: fix call balance of tx_clk handling routines net: Make copy_safe_from_sockptr() match documentation net: stmmac: dwmac-mediatek: Fix inverted handling of mediatek,mac-wol ipmr: Fix access to mfc_cache_list without lock held samples: pktgen: correct dev to DEV net: phylink: ensure PHY momentary link-fails are handled mptcp: pm: use _rcu variant under rcu_read_lock mptcp: hold pm lock when deleting entry mptcp: update local address flags when setting it net: sched: cls_u32: Fix u32's systematic failure to free IDR entries for hnodes. MAINTAINERS: Re-add cancelled Renesas driver sections Revert "igb: Disable threaded IRQ for igb_msix_other" Bluetooth: btintel: Direct exception event to bluetooth stack Bluetooth: hci_core: Fix calling mgmt_device_connected virtio/vsock: Improve MSG_ZEROCOPY error handling vsock: Fix sk_error_queue memory leak ...
Diffstat (limited to 'net')
-rw-r--r--net/bluetooth/hci_core.c2
-rw-r--r--net/core/sock.c42
-rw-r--r--net/dccp/ipv6.c2
-rw-r--r--net/ipv4/ipmr_base.c3
-rw-r--r--net/ipv6/tcp_ipv6.c4
-rw-r--r--net/mptcp/pm_netlink.c3
-rw-r--r--net/mptcp/pm_userspace.c15
-rw-r--r--net/mptcp/protocol.c16
-rw-r--r--net/netlink/af_netlink.c31
-rw-r--r--net/netlink/af_netlink.h2
-rw-r--r--net/sched/cls_u32.c18
-rw-r--r--net/sctp/ipv6.c19
-rw-r--r--net/vmw_vsock/af_vsock.c3
-rw-r--r--net/vmw_vsock/virtio_transport_common.c9
14 files changed, 103 insertions, 66 deletions
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 96d097b21d13..0ac354db8177 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3788,8 +3788,6 @@ static void hci_acldata_packet(struct hci_dev *hdev, struct sk_buff *skb)
hci_dev_lock(hdev);
conn = hci_conn_hash_lookup_handle(hdev, handle);
- if (conn && hci_dev_test_flag(hdev, HCI_MGMT))
- mgmt_device_connected(hdev, conn, NULL, 0);
hci_dev_unlock(hdev);
if (conn) {
diff --git a/net/core/sock.c b/net/core/sock.c
index 039be95c40cf..da50df485090 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1052,32 +1052,34 @@ static int sock_reserve_memory(struct sock *sk, int bytes)
#ifdef CONFIG_PAGE_POOL
-/* This is the number of tokens that the user can SO_DEVMEM_DONTNEED in
- * 1 syscall. The limit exists to limit the amount of memory the kernel
- * allocates to copy these tokens.
+/* This is the number of tokens and frags that the user can SO_DEVMEM_DONTNEED
+ * in 1 syscall. The limit exists to limit the amount of memory the kernel
+ * allocates to copy these tokens, and to prevent looping over the frags for
+ * too long.
*/
#define MAX_DONTNEED_TOKENS 128
+#define MAX_DONTNEED_FRAGS 1024
static noinline_for_stack int
sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
{
unsigned int num_tokens, i, j, k, netmem_num = 0;
struct dmabuf_token *tokens;
+ int ret = 0, num_frags = 0;
netmem_ref netmems[16];
- int ret = 0;
if (!sk_is_tcp(sk))
return -EBADF;
- if (optlen % sizeof(struct dmabuf_token) ||
+ if (optlen % sizeof(*tokens) ||
optlen > sizeof(*tokens) * MAX_DONTNEED_TOKENS)
return -EINVAL;
- tokens = kvmalloc_array(optlen, sizeof(*tokens), GFP_KERNEL);
+ num_tokens = optlen / sizeof(*tokens);
+ tokens = kvmalloc_array(num_tokens, sizeof(*tokens), GFP_KERNEL);
if (!tokens)
return -ENOMEM;
- num_tokens = optlen / sizeof(struct dmabuf_token);
if (copy_from_sockptr(tokens, optval, optlen)) {
kvfree(tokens);
return -EFAULT;
@@ -1086,24 +1088,28 @@ sock_devmem_dontneed(struct sock *sk, sockptr_t optval, unsigned int optlen)
xa_lock_bh(&sk->sk_user_frags);
for (i = 0; i < num_tokens; i++) {
for (j = 0; j < tokens[i].token_count; j++) {
+ if (++num_frags > MAX_DONTNEED_FRAGS)
+ goto frag_limit_reached;
+
netmem_ref netmem = (__force netmem_ref)__xa_erase(
&sk->sk_user_frags, tokens[i].token_start + j);
- if (netmem &&
- !WARN_ON_ONCE(!netmem_is_net_iov(netmem))) {
- netmems[netmem_num++] = netmem;
- if (netmem_num == ARRAY_SIZE(netmems)) {
- xa_unlock_bh(&sk->sk_user_frags);
- for (k = 0; k < netmem_num; k++)
- WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
- netmem_num = 0;
- xa_lock_bh(&sk->sk_user_frags);
- }
- ret++;
+ if (!netmem || WARN_ON_ONCE(!netmem_is_net_iov(netmem)))
+ continue;
+
+ netmems[netmem_num++] = netmem;
+ if (netmem_num == ARRAY_SIZE(netmems)) {
+ xa_unlock_bh(&sk->sk_user_frags);
+ for (k = 0; k < netmem_num; k++)
+ WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
+ netmem_num = 0;
+ xa_lock_bh(&sk->sk_user_frags);
}
+ ret++;
}
}
+frag_limit_reached:
xa_unlock_bh(&sk->sk_user_frags);
for (k = 0; k < netmem_num; k++)
WARN_ON_ONCE(!napi_pp_put_page(netmems[k]));
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index da5dba120bc9..d6649246188d 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -618,7 +618,7 @@ static int dccp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != DCCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == DCCP_OPEN) { /* Fast path */
diff --git a/net/ipv4/ipmr_base.c b/net/ipv4/ipmr_base.c
index 271dc03fc6db..f0af12a2f70b 100644
--- a/net/ipv4/ipmr_base.c
+++ b/net/ipv4/ipmr_base.c
@@ -310,7 +310,8 @@ int mr_table_dump(struct mr_table *mrt, struct sk_buff *skb,
if (filter->filter_set)
flags |= NLM_F_DUMP_FILTERED;
- list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list) {
+ list_for_each_entry_rcu(mfc, &mrt->mfc_cache_list, list,
+ lockdep_rtnl_is_held()) {
if (e < s_e)
goto next_entry;
if (filter->dev &&
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index d71ab4e1efe1..c9de5ef8f267 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -1618,7 +1618,7 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
by tcp. Feel free to propose better solution.
--ANK (980728)
*/
- if (np->rxopt.all)
+ if (np->rxopt.all && sk->sk_state != TCP_LISTEN)
opt_skb = skb_clone_and_charge_r(skb, sk);
if (sk->sk_state == TCP_ESTABLISHED) { /* Fast path */
@@ -1656,8 +1656,6 @@ int tcp_v6_do_rcv(struct sock *sk, struct sk_buff *skb)
if (reason)
goto reset;
}
- if (opt_skb)
- __kfree_skb(opt_skb);
return 0;
}
} else
diff --git a/net/mptcp/pm_netlink.c b/net/mptcp/pm_netlink.c
index db586a5b3866..45a2b5f05d38 100644
--- a/net/mptcp/pm_netlink.c
+++ b/net/mptcp/pm_netlink.c
@@ -524,7 +524,8 @@ __lookup_addr(struct pm_nl_pernet *pernet, const struct mptcp_addr_info *info)
{
struct mptcp_pm_addr_entry *entry;
- list_for_each_entry(entry, &pernet->local_addr_list, list) {
+ list_for_each_entry_rcu(entry, &pernet->local_addr_list, list,
+ lockdep_is_held(&pernet->lock)) {
if (mptcp_addresses_equal(&entry->addr, info, entry->addr.port))
return entry;
}
diff --git a/net/mptcp/pm_userspace.c b/net/mptcp/pm_userspace.c
index 56dfea9862b7..e35178f5205f 100644
--- a/net/mptcp/pm_userspace.c
+++ b/net/mptcp/pm_userspace.c
@@ -308,14 +308,17 @@ int mptcp_pm_nl_remove_doit(struct sk_buff *skb, struct genl_info *info)
lock_sock(sk);
+ spin_lock_bh(&msk->pm.lock);
match = mptcp_userspace_pm_lookup_addr_by_id(msk, id_val);
if (!match) {
GENL_SET_ERR_MSG(info, "address with specified id not found");
+ spin_unlock_bh(&msk->pm.lock);
release_sock(sk);
goto out;
}
list_move(&match->list, &free_list);
+ spin_unlock_bh(&msk->pm.lock);
mptcp_pm_remove_addrs(msk, &free_list);
@@ -560,6 +563,7 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
struct nlattr *token = info->attrs[MPTCP_PM_ATTR_TOKEN];
struct nlattr *attr = info->attrs[MPTCP_PM_ATTR_ADDR];
struct net *net = sock_net(skb->sk);
+ struct mptcp_pm_addr_entry *entry;
struct mptcp_sock *msk;
int ret = -EINVAL;
struct sock *sk;
@@ -601,6 +605,17 @@ int mptcp_userspace_pm_set_flags(struct sk_buff *skb, struct genl_info *info)
if (loc.flags & MPTCP_PM_ADDR_FLAG_BACKUP)
bkup = 1;
+ spin_lock_bh(&msk->pm.lock);
+ list_for_each_entry(entry, &msk->pm.userspace_pm_local_addr_list, list) {
+ if (mptcp_addresses_equal(&entry->addr, &loc.addr, false)) {
+ if (bkup)
+ entry->flags |= MPTCP_PM_ADDR_FLAG_BACKUP;
+ else
+ entry->flags &= ~MPTCP_PM_ADDR_FLAG_BACKUP;
+ }
+ }
+ spin_unlock_bh(&msk->pm.lock);
+
lock_sock(sk);
ret = mptcp_pm_nl_mp_prio_send_ack(msk, &loc.addr, &rem.addr, bkup);
release_sock(sk);
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index d263091659e0..48d480982b78 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2082,7 +2082,8 @@ static void mptcp_rcv_space_adjust(struct mptcp_sock *msk, int copied)
slow = lock_sock_fast(ssk);
WRITE_ONCE(ssk->sk_rcvbuf, rcvbuf);
WRITE_ONCE(tcp_sk(ssk)->window_clamp, window_clamp);
- tcp_cleanup_rbuf(ssk, 1);
+ if (tcp_can_send_ack(ssk))
+ tcp_cleanup_rbuf(ssk, 1);
unlock_sock_fast(ssk, slow);
}
}
@@ -2205,7 +2206,7 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
cmsg_flags = MPTCP_CMSG_INQ;
while (copied < len) {
- int bytes_read;
+ int err, bytes_read;
bytes_read = __mptcp_recvmsg_mskq(msk, msg, len - copied, flags, &tss, &cmsg_flags);
if (unlikely(bytes_read < 0)) {
@@ -2267,9 +2268,16 @@ static int mptcp_recvmsg(struct sock *sk, struct msghdr *msg, size_t len,
}
pr_debug("block timeout %ld\n", timeo);
- sk_wait_data(sk, &timeo, NULL);
+ mptcp_rcv_space_adjust(msk, copied);
+ err = sk_wait_data(sk, &timeo, NULL);
+ if (err < 0) {
+ err = copied ? : err;
+ goto out_err;
+ }
}
+ mptcp_rcv_space_adjust(msk, copied);
+
out_err:
if (cmsg_flags && copied >= 0) {
if (cmsg_flags & MPTCP_CMSG_TS)
@@ -2285,8 +2293,6 @@ out_err:
pr_debug("msk=%p rx queue empty=%d:%d copied=%d\n",
msk, skb_queue_empty_lockless(&sk->sk_receive_queue),
skb_queue_empty(&msk->receive_queue), copied);
- if (!(flags & MSG_PEEK))
- mptcp_rcv_space_adjust(msk, copied);
release_sock(sk);
return copied;
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 0a9287fadb47..f84aad420d44 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -393,15 +393,6 @@ static void netlink_skb_set_owner_r(struct sk_buff *skb, struct sock *sk)
static void netlink_sock_destruct(struct sock *sk)
{
- struct netlink_sock *nlk = nlk_sk(sk);
-
- if (nlk->cb_running) {
- if (nlk->cb.done)
- nlk->cb.done(&nlk->cb);
- module_put(nlk->cb.module);
- kfree_skb(nlk->cb.skb);
- }
-
skb_queue_purge(&sk->sk_receive_queue);
if (!sock_flag(sk, SOCK_DEAD)) {
@@ -414,14 +405,6 @@ static void netlink_sock_destruct(struct sock *sk)
WARN_ON(nlk_sk(sk)->groups);
}
-static void netlink_sock_destruct_work(struct work_struct *work)
-{
- struct netlink_sock *nlk = container_of(work, struct netlink_sock,
- work);
-
- sk_free(&nlk->sk);
-}
-
/* This lock without WQ_FLAG_EXCLUSIVE is good on UP and it is _very_ bad on
* SMP. Look, when several writers sleep and reader wakes them up, all but one
* immediately hit write lock and grab all the cpus. Exclusive sleep solves
@@ -731,12 +714,6 @@ static void deferred_put_nlk_sk(struct rcu_head *head)
if (!refcount_dec_and_test(&sk->sk_refcnt))
return;
- if (nlk->cb_running && nlk->cb.done) {
- INIT_WORK(&nlk->work, netlink_sock_destruct_work);
- schedule_work(&nlk->work);
- return;
- }
-
sk_free(sk);
}
@@ -788,6 +765,14 @@ static int netlink_release(struct socket *sock)
NETLINK_URELEASE, &n);
}
+ /* Terminate any outstanding dump */
+ if (nlk->cb_running) {
+ if (nlk->cb.done)
+ nlk->cb.done(&nlk->cb);
+ module_put(nlk->cb.module);
+ kfree_skb(nlk->cb.skb);
+ }
+
module_put(nlk->module);
if (netlink_is_kernel(sk)) {
diff --git a/net/netlink/af_netlink.h b/net/netlink/af_netlink.h
index 5b0e4e62ab8b..778a3809361f 100644
--- a/net/netlink/af_netlink.h
+++ b/net/netlink/af_netlink.h
@@ -4,7 +4,6 @@
#include <linux/rhashtable.h>
#include <linux/atomic.h>
-#include <linux/workqueue.h>
#include <net/sock.h>
/* flags */
@@ -50,7 +49,6 @@ struct netlink_sock {
struct rhash_head node;
struct rcu_head rcu;
- struct work_struct work;
};
static inline struct netlink_sock *nlk_sk(struct sock *sk)
diff --git a/net/sched/cls_u32.c b/net/sched/cls_u32.c
index 9412d88a99bc..d3a03c57545b 100644
--- a/net/sched/cls_u32.c
+++ b/net/sched/cls_u32.c
@@ -92,6 +92,16 @@ struct tc_u_common {
long knodes;
};
+static u32 handle2id(u32 h)
+{
+ return ((h & 0x80000000) ? ((h >> 20) & 0x7FF) : h);
+}
+
+static u32 id2handle(u32 id)
+{
+ return (id | 0x800U) << 20;
+}
+
static inline unsigned int u32_hash_fold(__be32 key,
const struct tc_u32_sel *sel,
u8 fshift)
@@ -310,7 +320,7 @@ static u32 gen_new_htid(struct tc_u_common *tp_c, struct tc_u_hnode *ptr)
int id = idr_alloc_cyclic(&tp_c->handle_idr, ptr, 1, 0x7FF, GFP_KERNEL);
if (id < 0)
return 0;
- return (id | 0x800U) << 20;
+ return id2handle(id);
}
static struct hlist_head *tc_u_common_hash;
@@ -360,7 +370,7 @@ static int u32_init(struct tcf_proto *tp)
return -ENOBUFS;
refcount_set(&root_ht->refcnt, 1);
- root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : 0x80000000;
+ root_ht->handle = tp_c ? gen_new_htid(tp_c, root_ht) : id2handle(0);
root_ht->prio = tp->prio;
root_ht->is_root = true;
idr_init(&root_ht->handle_idr);
@@ -612,7 +622,7 @@ static int u32_destroy_hnode(struct tcf_proto *tp, struct tc_u_hnode *ht,
if (phn == ht) {
u32_clear_hw_hnode(tp, ht, extack);
idr_destroy(&ht->handle_idr);
- idr_remove(&tp_c->handle_idr, ht->handle);
+ idr_remove(&tp_c->handle_idr, handle2id(ht->handle));
RCU_INIT_POINTER(*hn, ht->next);
kfree_rcu(ht, rcu);
return 0;
@@ -989,7 +999,7 @@ static int u32_change(struct net *net, struct sk_buff *in_skb,
err = u32_replace_hw_hnode(tp, ht, userflags, extack);
if (err) {
- idr_remove(&tp_c->handle_idr, handle);
+ idr_remove(&tp_c->handle_idr, handle2id(handle));
kfree(ht);
return err;
}
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index f7b809c0d142..38e2fbdcbeac 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -683,7 +683,7 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
struct sock *sk = &sp->inet.sk;
struct net *net = sock_net(sk);
struct net_device *dev = NULL;
- int type;
+ int type, res, bound_dev_if;
type = ipv6_addr_type(in6);
if (IPV6_ADDR_ANY == type)
@@ -697,14 +697,21 @@ static int sctp_v6_available(union sctp_addr *addr, struct sctp_sock *sp)
if (!(type & IPV6_ADDR_UNICAST))
return 0;
- if (sk->sk_bound_dev_if) {
- dev = dev_get_by_index_rcu(net, sk->sk_bound_dev_if);
+ rcu_read_lock();
+ bound_dev_if = READ_ONCE(sk->sk_bound_dev_if);
+ if (bound_dev_if) {
+ res = 0;
+ dev = dev_get_by_index_rcu(net, bound_dev_if);
if (!dev)
- return 0;
+ goto out;
}
- return ipv6_can_nonlocal_bind(net, &sp->inet) ||
- ipv6_chk_addr(net, in6, dev, 0);
+ res = ipv6_can_nonlocal_bind(net, &sp->inet) ||
+ ipv6_chk_addr(net, in6, dev, 0);
+
+out:
+ rcu_read_unlock();
+ return res;
}
/* This function checks if the address is a valid address to be used for
diff --git a/net/vmw_vsock/af_vsock.c b/net/vmw_vsock/af_vsock.c
index 35681adedd9a..dfd29160fe11 100644
--- a/net/vmw_vsock/af_vsock.c
+++ b/net/vmw_vsock/af_vsock.c
@@ -836,6 +836,9 @@ static void vsock_sk_destruct(struct sock *sk)
{
struct vsock_sock *vsk = vsock_sk(sk);
+ /* Flush MSG_ZEROCOPY leftovers. */
+ __skb_queue_purge(&sk->sk_error_queue);
+
vsock_deassign_transport(vsk);
/* When clearing these addresses, there's no need to set the family and
diff --git a/net/vmw_vsock/virtio_transport_common.c b/net/vmw_vsock/virtio_transport_common.c
index fc5666c8298f..9acc13ab3f82 100644
--- a/net/vmw_vsock/virtio_transport_common.c
+++ b/net/vmw_vsock/virtio_transport_common.c
@@ -400,6 +400,7 @@ static int virtio_transport_send_pkt_info(struct vsock_sock *vsk,
if (virtio_transport_init_zcopy_skb(vsk, skb,
info->msg,
can_zcopy)) {
+ kfree_skb(skb);
ret = -ENOMEM;
break;
}
@@ -1513,6 +1514,14 @@ virtio_transport_recv_listen(struct sock *sk, struct sk_buff *skb,
return -ENOMEM;
}
+ /* __vsock_release() might have already flushed accept_queue.
+ * Subsequent enqueues would lead to a memory leak.
+ */
+ if (sk->sk_shutdown == SHUTDOWN_MASK) {
+ virtio_transport_reset_no_sock(t, skb);
+ return -ESHUTDOWN;
+ }
+
child = vsock_create_connected(sk);
if (!child) {
virtio_transport_reset_no_sock(t, skb);