aboutsummaryrefslogtreecommitdiff
path: root/net
diff options
context:
space:
mode:
Diffstat (limited to 'net')
-rw-r--r--net/8021q/vlan_netlink.c4
-rw-r--r--net/batman-adv/multicast.c3
-rw-r--r--net/bridge/br_multicast.c20
-rw-r--r--net/bridge/br_private.h4
-rw-r--r--net/ceph/messenger_v1.c33
-rw-r--r--net/ceph/messenger_v2.c4
-rw-r--r--net/ceph/osd_client.c27
-rw-r--r--net/core/datagram.c2
-rw-r--r--net/core/dev.c9
-rw-r--r--net/core/dev.h3
-rw-r--r--net/core/filter.c44
-rw-r--r--net/core/request_sock.c3
-rw-r--r--net/core/sock.c11
-rw-r--r--net/devlink/core.c12
-rw-r--r--net/devlink/port.c2
-rw-r--r--net/hsr/hsr_device.c4
-rw-r--r--net/ipv4/af_inet.c9
-rw-r--r--net/ipv4/inet_connection_sock.c4
-rw-r--r--net/ipv4/ip_output.c12
-rw-r--r--net/ipv4/ip_sockglue.c6
-rw-r--r--net/ipv4/ip_tunnel_core.c2
-rw-r--r--net/ipv4/ipmr.c2
-rw-r--r--net/ipv4/raw.c2
-rw-r--r--net/ipv4/tcp.c13
-rw-r--r--net/ipv4/udp.c2
-rw-r--r--net/ipv6/addrconf_core.c21
-rw-r--r--net/ipv6/af_inet6.c3
-rw-r--r--net/ipv6/ip6_tunnel.c21
-rw-r--r--net/llc/af_llc.c26
-rw-r--r--net/llc/llc_core.c7
-rw-r--r--net/mac80211/Kconfig1
-rw-r--r--net/mac80211/cfg.c16
-rw-r--r--net/mac80211/debugfs_netdev.c4
-rw-r--r--net/mac80211/debugfs_netdev.h5
-rw-r--r--net/mac80211/iface.c2
-rw-r--r--net/mac80211/mlme.c114
-rw-r--r--net/mac80211/scan.c30
-rw-r--r--net/mac80211/sta_info.c7
-rw-r--r--net/mac80211/tx.c9
-rw-r--r--net/mac80211/wbrf.c2
-rw-r--r--net/mptcp/protocol.c3
-rw-r--r--net/netfilter/ipset/ip_set_bitmap_gen.h14
-rw-r--r--net/netfilter/ipset/ip_set_core.c39
-rw-r--r--net/netfilter/ipset/ip_set_hash_gen.h19
-rw-r--r--net/netfilter/ipset/ip_set_list_set.c13
-rw-r--r--net/netfilter/nf_conntrack_netlink.c12
-rw-r--r--net/netfilter/nf_conntrack_proto_sctp.c2
-rw-r--r--net/netfilter/nf_conntrack_proto_tcp.c10
-rw-r--r--net/netfilter/nf_log.c7
-rw-r--r--net/netfilter/nf_tables_api.c38
-rw-r--r--net/netfilter/nfnetlink_queue.c13
-rw-r--r--net/netfilter/nft_chain_filter.c11
-rw-r--r--net/netfilter/nft_compat.c29
-rw-r--r--net/netfilter/nft_ct.c27
-rw-r--r--net/netfilter/nft_flow_offload.c5
-rw-r--r--net/netfilter/nft_limit.c23
-rw-r--r--net/netfilter/nft_nat.c5
-rw-r--r--net/netfilter/nft_rt.c5
-rw-r--r--net/netfilter/nft_set_hash.c8
-rw-r--r--net/netfilter/nft_set_pipapo.c128
-rw-r--r--net/netfilter/nft_set_pipapo.h18
-rw-r--r--net/netfilter/nft_set_pipapo_avx2.c19
-rw-r--r--net/netfilter/nft_set_rbtree.c17
-rw-r--r--net/netfilter/nft_socket.c5
-rw-r--r--net/netfilter/nft_synproxy.c7
-rw-r--r--net/netfilter/nft_tproxy.c5
-rw-r--r--net/netfilter/nft_tunnel.c1
-rw-r--r--net/netfilter/nft_xfrm.c5
-rw-r--r--net/netlink/af_netlink.c2
-rw-r--r--net/nfc/nci/core.c4
-rw-r--r--net/rds/af_rds.c2
-rw-r--r--net/rxrpc/ar-internal.h37
-rw-r--r--net/rxrpc/call_event.c12
-rw-r--r--net/rxrpc/call_object.c1
-rw-r--r--net/rxrpc/conn_event.c10
-rw-r--r--net/rxrpc/input.c115
-rw-r--r--net/rxrpc/output.c8
-rw-r--r--net/rxrpc/proc.c2
-rw-r--r--net/rxrpc/rxkad.c4
-rw-r--r--net/sched/cls_api.c9
-rw-r--r--net/sched/cls_flower.c23
-rw-r--r--net/smc/smc_core.c12
-rw-r--r--net/smc/smc_diag.c2
-rw-r--r--net/sunrpc/svc.c4
-rw-r--r--net/sunrpc/svcsock.c4
-rw-r--r--net/tipc/bearer.c6
-rw-r--r--net/unix/af_unix.c14
-rw-r--r--net/unix/diag.c2
-rw-r--r--net/unix/garbage.c11
-rw-r--r--net/wireless/Kconfig1
-rw-r--r--net/wireless/core.c3
-rw-r--r--net/wireless/nl80211.c1
-rw-r--r--net/wireless/scan.c59
-rw-r--r--net/xdp/xsk.c12
-rw-r--r--net/xdp/xsk_buff_pool.c1
95 files changed, 969 insertions, 385 deletions
diff --git a/net/8021q/vlan_netlink.c b/net/8021q/vlan_netlink.c
index 214532173536..a3b68243fd4b 100644
--- a/net/8021q/vlan_netlink.c
+++ b/net/8021q/vlan_netlink.c
@@ -118,12 +118,16 @@ static int vlan_changelink(struct net_device *dev, struct nlattr *tb[],
}
if (data[IFLA_VLAN_INGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_INGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
vlan_dev_set_ingress_priority(dev, m->to, m->from);
}
}
if (data[IFLA_VLAN_EGRESS_QOS]) {
nla_for_each_nested(attr, data[IFLA_VLAN_EGRESS_QOS], rem) {
+ if (nla_type(attr) != IFLA_VLAN_QOS_MAPPING)
+ continue;
m = nla_data(attr);
err = vlan_dev_set_egress_priority(dev, m->from, m->to);
if (err)
diff --git a/net/batman-adv/multicast.c b/net/batman-adv/multicast.c
index d982daea8329..14088c4ff2f6 100644
--- a/net/batman-adv/multicast.c
+++ b/net/batman-adv/multicast.c
@@ -2175,6 +2175,7 @@ void batadv_mcast_free(struct batadv_priv *bat_priv)
cancel_delayed_work_sync(&bat_priv->mcast.work);
batadv_tvlv_container_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
+ batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST_TRACKER, 1);
batadv_tvlv_handler_unregister(bat_priv, BATADV_TVLV_MCAST, 2);
/* safely calling outside of worker, as worker was canceled above */
@@ -2198,6 +2199,8 @@ void batadv_mcast_purge_orig(struct batadv_orig_node *orig)
BATADV_MCAST_WANT_NO_RTR4);
batadv_mcast_want_rtr6_update(bat_priv, orig,
BATADV_MCAST_WANT_NO_RTR6);
+ batadv_mcast_have_mc_ptype_update(bat_priv, orig,
+ BATADV_MCAST_HAVE_MC_PTYPE_CAPA);
spin_unlock_bh(&orig->mcast_handler_lock);
}
diff --git a/net/bridge/br_multicast.c b/net/bridge/br_multicast.c
index d7d021af1029..2d7b73242958 100644
--- a/net/bridge/br_multicast.c
+++ b/net/bridge/br_multicast.c
@@ -1762,6 +1762,10 @@ static void br_ip6_multicast_querier_expired(struct timer_list *t)
}
#endif
+static void br_multicast_query_delay_expired(struct timer_list *t)
+{
+}
+
static void br_multicast_select_own_querier(struct net_bridge_mcast *brmctx,
struct br_ip *ip,
struct sk_buff *skb)
@@ -3198,7 +3202,7 @@ br_multicast_update_query_timer(struct net_bridge_mcast *brmctx,
unsigned long max_delay)
{
if (!timer_pending(&query->timer))
- query->delay_time = jiffies + max_delay;
+ mod_timer(&query->delay_timer, jiffies + max_delay);
mod_timer(&query->timer, jiffies + brmctx->multicast_querier_interval);
}
@@ -4041,13 +4045,11 @@ void br_multicast_ctx_init(struct net_bridge *br,
brmctx->multicast_querier_interval = 255 * HZ;
brmctx->multicast_membership_interval = 260 * HZ;
- brmctx->ip4_other_query.delay_time = 0;
brmctx->ip4_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip4_querier.seq, &br->multicast_lock);
brmctx->multicast_igmp_version = 2;
#if IS_ENABLED(CONFIG_IPV6)
brmctx->multicast_mld_version = 1;
- brmctx->ip6_other_query.delay_time = 0;
brmctx->ip6_querier.port_ifidx = 0;
seqcount_spinlock_init(&brmctx->ip6_querier.seq, &br->multicast_lock);
#endif
@@ -4056,6 +4058,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip4_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip4_other_query.timer,
br_ip4_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip4_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip4_own_query.timer,
br_ip4_multicast_query_expired, 0);
#if IS_ENABLED(CONFIG_IPV6)
@@ -4063,6 +4067,8 @@ void br_multicast_ctx_init(struct net_bridge *br,
br_ip6_multicast_local_router_expired, 0);
timer_setup(&brmctx->ip6_other_query.timer,
br_ip6_multicast_querier_expired, 0);
+ timer_setup(&brmctx->ip6_other_query.delay_timer,
+ br_multicast_query_delay_expired, 0);
timer_setup(&brmctx->ip6_own_query.timer,
br_ip6_multicast_query_expired, 0);
#endif
@@ -4197,10 +4203,12 @@ static void __br_multicast_stop(struct net_bridge_mcast *brmctx)
{
del_timer_sync(&brmctx->ip4_mc_router_timer);
del_timer_sync(&brmctx->ip4_other_query.timer);
+ del_timer_sync(&brmctx->ip4_other_query.delay_timer);
del_timer_sync(&brmctx->ip4_own_query.timer);
#if IS_ENABLED(CONFIG_IPV6)
del_timer_sync(&brmctx->ip6_mc_router_timer);
del_timer_sync(&brmctx->ip6_other_query.timer);
+ del_timer_sync(&brmctx->ip6_other_query.delay_timer);
del_timer_sync(&brmctx->ip6_own_query.timer);
#endif
}
@@ -4643,13 +4651,15 @@ int br_multicast_set_querier(struct net_bridge_mcast *brmctx, unsigned long val)
max_delay = brmctx->multicast_query_response_interval;
if (!timer_pending(&brmctx->ip4_other_query.timer))
- brmctx->ip4_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip4_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip4_own_query);
#if IS_ENABLED(CONFIG_IPV6)
if (!timer_pending(&brmctx->ip6_other_query.timer))
- brmctx->ip6_other_query.delay_time = jiffies + max_delay;
+ mod_timer(&brmctx->ip6_other_query.delay_timer,
+ jiffies + max_delay);
br_multicast_start_querier(brmctx, &brmctx->ip6_own_query);
#endif
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index b0a92c344722..86ea5e6689b5 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -78,7 +78,7 @@ struct bridge_mcast_own_query {
/* other querier */
struct bridge_mcast_other_query {
struct timer_list timer;
- unsigned long delay_time;
+ struct timer_list delay_timer;
};
/* selected querier */
@@ -1159,7 +1159,7 @@ __br_multicast_querier_exists(struct net_bridge_mcast *brmctx,
own_querier_enabled = false;
}
- return time_is_before_jiffies(querier->delay_time) &&
+ return !timer_pending(&querier->delay_timer) &&
(own_querier_enabled || timer_pending(&querier->timer));
}
diff --git a/net/ceph/messenger_v1.c b/net/ceph/messenger_v1.c
index f9a50d7f0d20..0cb61c76b9b8 100644
--- a/net/ceph/messenger_v1.c
+++ b/net/ceph/messenger_v1.c
@@ -160,8 +160,9 @@ static size_t sizeof_footer(struct ceph_connection *con)
static void prepare_message_data(struct ceph_msg *msg, u32 data_len)
{
/* Initialize data cursor if it's not a sparse read */
- if (!msg->sparse_read)
- ceph_msg_data_cursor_init(&msg->cursor, msg, data_len);
+ u64 len = msg->sparse_read_total ? : data_len;
+
+ ceph_msg_data_cursor_init(&msg->cursor, msg, len);
}
/*
@@ -991,7 +992,7 @@ static inline int read_partial_message_section(struct ceph_connection *con,
return read_partial_message_chunk(con, section, sec_len, crc);
}
-static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
+static int read_partial_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_bounce = ceph_test_opt(from_msgr(con->msgr), RXBOUNCE);
@@ -1026,7 +1027,7 @@ static int read_sparse_msg_extent(struct ceph_connection *con, u32 *crc)
return 1;
}
-static int read_sparse_msg_data(struct ceph_connection *con)
+static int read_partial_sparse_msg_data(struct ceph_connection *con)
{
struct ceph_msg_data_cursor *cursor = &con->in_msg->cursor;
bool do_datacrc = !ceph_test_opt(from_msgr(con->msgr), NOCRC);
@@ -1036,31 +1037,31 @@ static int read_sparse_msg_data(struct ceph_connection *con)
if (do_datacrc)
crc = con->in_data_crc;
- do {
+ while (cursor->total_resid) {
if (con->v1.in_sr_kvec.iov_base)
ret = read_partial_message_chunk(con,
&con->v1.in_sr_kvec,
con->v1.in_sr_len,
&crc);
else if (cursor->sr_resid > 0)
- ret = read_sparse_msg_extent(con, &crc);
-
- if (ret <= 0) {
- if (do_datacrc)
- con->in_data_crc = crc;
- return ret;
- }
+ ret = read_partial_sparse_msg_extent(con, &crc);
+ if (ret <= 0)
+ break;
memset(&con->v1.in_sr_kvec, 0, sizeof(con->v1.in_sr_kvec));
ret = con->ops->sparse_read(con, cursor,
(char **)&con->v1.in_sr_kvec.iov_base);
+ if (ret <= 0) {
+ ret = ret ? ret : 1; /* must return > 0 to indicate success */
+ break;
+ }
con->v1.in_sr_len = ret;
- } while (ret > 0);
+ }
if (do_datacrc)
con->in_data_crc = crc;
- return ret < 0 ? ret : 1; /* must return > 0 to indicate success */
+ return ret;
}
static int read_partial_msg_data(struct ceph_connection *con)
@@ -1253,8 +1254,8 @@ static int read_partial_message(struct ceph_connection *con)
if (!m->num_data_items)
return -EIO;
- if (m->sparse_read)
- ret = read_sparse_msg_data(con);
+ if (m->sparse_read_total)
+ ret = read_partial_sparse_msg_data(con);
else if (ceph_test_opt(from_msgr(con->msgr), RXBOUNCE))
ret = read_partial_msg_data_bounce(con);
else
diff --git a/net/ceph/messenger_v2.c b/net/ceph/messenger_v2.c
index f8ec60e1aba3..a0ca5414b333 100644
--- a/net/ceph/messenger_v2.c
+++ b/net/ceph/messenger_v2.c
@@ -1128,7 +1128,7 @@ static int decrypt_tail(struct ceph_connection *con)
struct sg_table enc_sgt = {};
struct sg_table sgt = {};
struct page **pages = NULL;
- bool sparse = con->in_msg->sparse_read;
+ bool sparse = !!con->in_msg->sparse_read_total;
int dpos = 0;
int tail_len;
int ret;
@@ -2060,7 +2060,7 @@ static int prepare_read_tail_plain(struct ceph_connection *con)
}
if (data_len(msg)) {
- if (msg->sparse_read)
+ if (msg->sparse_read_total)
con->v2.in_state = IN_S_PREPARE_SPARSE_DATA;
else
con->v2.in_state = IN_S_PREPARE_READ_DATA;
diff --git a/net/ceph/osd_client.c b/net/ceph/osd_client.c
index 625622016f57..9d078b37fe0b 100644
--- a/net/ceph/osd_client.c
+++ b/net/ceph/osd_client.c
@@ -5510,7 +5510,7 @@ static struct ceph_msg *get_reply(struct ceph_connection *con,
}
m = ceph_msg_get(req->r_reply);
- m->sparse_read = (bool)srlen;
+ m->sparse_read_total = srlen;
dout("get_reply tid %lld %p\n", tid, m);
@@ -5777,11 +5777,8 @@ static int prep_next_sparse_read(struct ceph_connection *con,
}
if (o->o_sparse_op_idx < 0) {
- u64 srlen = sparse_data_requested(req);
-
- dout("%s: [%d] starting new sparse read req. srlen=0x%llx\n",
- __func__, o->o_osd, srlen);
- ceph_msg_data_cursor_init(cursor, con->in_msg, srlen);
+ dout("%s: [%d] starting new sparse read req\n",
+ __func__, o->o_osd);
} else {
u64 end;
@@ -5857,8 +5854,8 @@ static int osd_sparse_read(struct ceph_connection *con,
struct ceph_osd *o = con->private;
struct ceph_sparse_read *sr = &o->o_sparse_read;
u32 count = sr->sr_count;
- u64 eoff, elen;
- int ret;
+ u64 eoff, elen, len = 0;
+ int i, ret;
switch (sr->sr_state) {
case CEPH_SPARSE_READ_HDR:
@@ -5903,8 +5900,20 @@ next_op:
convert_extent_map(sr);
ret = sizeof(sr->sr_datalen);
*pbuf = (char *)&sr->sr_datalen;
- sr->sr_state = CEPH_SPARSE_READ_DATA;
+ sr->sr_state = CEPH_SPARSE_READ_DATA_PRE;
break;
+ case CEPH_SPARSE_READ_DATA_PRE:
+ /* Convert sr_datalen to host-endian */
+ sr->sr_datalen = le32_to_cpu((__force __le32)sr->sr_datalen);
+ for (i = 0; i < count; i++)
+ len += sr->sr_extent[i].len;
+ if (sr->sr_datalen != len) {
+ pr_warn_ratelimited("data len %u != extent len %llu\n",
+ sr->sr_datalen, len);
+ return -EREMOTEIO;
+ }
+ sr->sr_state = CEPH_SPARSE_READ_DATA;
+ fallthrough;
case CEPH_SPARSE_READ_DATA:
if (sr->sr_index >= count) {
sr->sr_state = CEPH_SPARSE_READ_HDR;
diff --git a/net/core/datagram.c b/net/core/datagram.c
index 103d46fa0eeb..a8b625abe242 100644
--- a/net/core/datagram.c
+++ b/net/core/datagram.c
@@ -751,7 +751,7 @@ size_t memcpy_to_iter_csum(void *iter_to, size_t progress,
size_t len, void *from, void *priv2)
{
__wsum *csum = priv2;
- __wsum next = csum_partial_copy_nocheck(from, iter_to, len);
+ __wsum next = csum_partial_copy_nocheck(from + progress, iter_to, len);
*csum = csum_block_add(*csum, next, progress);
return 0;
diff --git a/net/core/dev.c b/net/core/dev.c
index f01a9b858347..cb2dab0feee0 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -11551,6 +11551,7 @@ static struct pernet_operations __net_initdata netdev_net_ops = {
static void __net_exit default_device_exit_net(struct net *net)
{
+ struct netdev_name_node *name_node, *tmp;
struct net_device *dev, *aux;
/*
* Push all migratable network devices back to the
@@ -11573,6 +11574,14 @@ static void __net_exit default_device_exit_net(struct net *net)
snprintf(fb_name, IFNAMSIZ, "dev%d", dev->ifindex);
if (netdev_name_in_use(&init_net, fb_name))
snprintf(fb_name, IFNAMSIZ, "dev%%d");
+
+ netdev_for_each_altname_safe(dev, name_node, tmp)
+ if (netdev_name_in_use(&init_net, name_node->name)) {
+ netdev_name_node_del(name_node);
+ synchronize_rcu();
+ __netdev_name_node_alt_destroy(name_node);
+ }
+
err = dev_change_net_namespace(dev, &init_net, fb_name);
if (err) {
pr_emerg("%s: failed to move %s to init_net: %d\n",
diff --git a/net/core/dev.h b/net/core/dev.h
index cf93e188785b..7480b4c84298 100644
--- a/net/core/dev.h
+++ b/net/core/dev.h
@@ -63,6 +63,9 @@ int dev_change_name(struct net_device *dev, const char *newname);
#define netdev_for_each_altname(dev, namenode) \
list_for_each_entry((namenode), &(dev)->name_node->list, list)
+#define netdev_for_each_altname_safe(dev, namenode, next) \
+ list_for_each_entry_safe((namenode), (next), &(dev)->name_node->list, \
+ list)
int netdev_name_node_alt_create(struct net_device *dev, const char *name);
int netdev_name_node_alt_destroy(struct net_device *dev, const char *name);
diff --git a/net/core/filter.c b/net/core/filter.c
index 24061f29c9dd..ef3e78b6a39c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -83,6 +83,7 @@
#include <net/netfilter/nf_conntrack_bpf.h>
#include <net/netkit.h>
#include <linux/un.h>
+#include <net/xdp_sock_drv.h>
#include "dev.h"
@@ -4092,10 +4093,46 @@ static int bpf_xdp_frags_increase_tail(struct xdp_buff *xdp, int offset)
memset(skb_frag_address(frag) + skb_frag_size(frag), 0, offset);
skb_frag_size_add(frag, offset);
sinfo->xdp_frags_size += offset;
+ if (rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+ xsk_buff_get_tail(xdp)->data_end += offset;
return 0;
}
+static void bpf_xdp_shrink_data_zc(struct xdp_buff *xdp, int shrink,
+ struct xdp_mem_info *mem_info, bool release)
+{
+ struct xdp_buff *zc_frag = xsk_buff_get_tail(xdp);
+
+ if (release) {
+ xsk_buff_del_tail(zc_frag);
+ __xdp_return(NULL, mem_info, false, zc_frag);
+ } else {
+ zc_frag->data_end -= shrink;
+ }
+}
+
+static bool bpf_xdp_shrink_data(struct xdp_buff *xdp, skb_frag_t *frag,
+ int shrink)
+{
+ struct xdp_mem_info *mem_info = &xdp->rxq->mem;
+ bool release = skb_frag_size(frag) == shrink;
+
+ if (mem_info->type == MEM_TYPE_XSK_BUFF_POOL) {
+ bpf_xdp_shrink_data_zc(xdp, shrink, mem_info, release);
+ goto out;
+ }
+
+ if (release) {
+ struct page *page = skb_frag_page(frag);
+
+ __xdp_return(page_address(page), mem_info, false, NULL);
+ }
+
+out:
+ return release;
+}
+
static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
{
struct skb_shared_info *sinfo = xdp_get_shared_info_from_buff(xdp);
@@ -4110,12 +4147,7 @@ static int bpf_xdp_frags_shrink_tail(struct xdp_buff *xdp, int offset)
len_free += shrink;
offset -= shrink;
-
- if (skb_frag_size(frag) == shrink) {
- struct page *page = skb_frag_page(frag);
-
- __xdp_return(page_address(page), &xdp->rxq->mem,
- false, NULL);
+ if (bpf_xdp_shrink_data(xdp, frag, shrink)) {
n_frags_free++;
} else {
skb_frag_size_sub(frag, shrink);
diff --git a/net/core/request_sock.c b/net/core/request_sock.c
index f35c2e998406..63de5c635842 100644
--- a/net/core/request_sock.c
+++ b/net/core/request_sock.c
@@ -33,9 +33,6 @@
void reqsk_queue_alloc(struct request_sock_queue *queue)
{
- spin_lock_init(&queue->rskq_lock);
-
- spin_lock_init(&queue->fastopenq.lock);
queue->fastopenq.rskq_rst_head = NULL;
queue->fastopenq.rskq_rst_tail = NULL;
queue->fastopenq.qlen = 0;
diff --git a/net/core/sock.c b/net/core/sock.c
index 158dbdebce6a..0a7f46c37f0c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -107,6 +107,7 @@
#include <linux/interrupt.h>
#include <linux/poll.h>
#include <linux/tcp.h>
+#include <linux/udp.h>
#include <linux/init.h>
#include <linux/highmem.h>
#include <linux/user_namespace.h>
@@ -4144,8 +4145,14 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
{
struct sock *sk = p;
- return !skb_queue_empty_lockless(&sk->sk_receive_queue) ||
- sk_busy_loop_timeout(sk, start_time);
+ if (!skb_queue_empty_lockless(&sk->sk_receive_queue))
+ return true;
+
+ if (sk_is_udp(sk) &&
+ !skb_queue_empty_lockless(&udp_sk(sk)->reader_queue))
+ return true;
+
+ return sk_busy_loop_timeout(sk, start_time);
}
EXPORT_SYMBOL(sk_busy_loop_end);
#endif /* CONFIG_NET_RX_BUSY_POLL */
diff --git a/net/devlink/core.c b/net/devlink/core.c
index 4275a2bc6d8e..6a58342752b4 100644
--- a/net/devlink/core.c
+++ b/net/devlink/core.c
@@ -46,7 +46,7 @@ struct devlink_rel {
u32 obj_index;
devlink_rel_notify_cb_t *notify_cb;
devlink_rel_cleanup_cb_t *cleanup_cb;
- struct work_struct notify_work;
+ struct delayed_work notify_work;
} nested_in;
};
@@ -70,7 +70,7 @@ static void __devlink_rel_put(struct devlink_rel *rel)
static void devlink_rel_nested_in_notify_work(struct work_struct *work)
{
struct devlink_rel *rel = container_of(work, struct devlink_rel,
- nested_in.notify_work);
+ nested_in.notify_work.work);
struct devlink *devlink;
devlink = devlinks_xa_get(rel->nested_in.devlink_index);
@@ -96,13 +96,13 @@ rel_put:
return;
reschedule_work:
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 1);
}
static void devlink_rel_nested_in_notify_work_schedule(struct devlink_rel *rel)
{
__devlink_rel_get(rel);
- schedule_work(&rel->nested_in.notify_work);
+ schedule_delayed_work(&rel->nested_in.notify_work, 0);
}
static struct devlink_rel *devlink_rel_alloc(void)
@@ -123,8 +123,8 @@ static struct devlink_rel *devlink_rel_alloc(void)
}
refcount_set(&rel->refcount, 1);
- INIT_WORK(&rel->nested_in.notify_work,
- &devlink_rel_nested_in_notify_work);
+ INIT_DELAYED_WORK(&rel->nested_in.notify_work,
+ &devlink_rel_nested_in_notify_work);
return rel;
}
diff --git a/net/devlink/port.c b/net/devlink/port.c
index 62e54e152ecf..78592912f657 100644
--- a/net/devlink/port.c
+++ b/net/devlink/port.c
@@ -674,7 +674,7 @@ static int devlink_port_function_validate(struct devlink_port *devlink_port,
return -EOPNOTSUPP;
}
if (tb[DEVLINK_PORT_FN_ATTR_STATE] && !ops->port_fn_state_set) {
- NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FUNCTION_ATTR_HW_ADDR],
+ NL_SET_ERR_MSG_ATTR(extack, tb[DEVLINK_PORT_FN_ATTR_STATE],
"Function does not support state setting");
return -EOPNOTSUPP;
}
diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index 7ceb9ac6e730..9d71b66183da 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -308,7 +308,7 @@ static void send_hsr_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "HSR: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "HSR: Could not send supervision frame\n");
return;
}
@@ -355,7 +355,7 @@ static void send_prp_supervision_frame(struct hsr_port *master,
skb = hsr_init_skb(master);
if (!skb) {
- WARN_ONCE(1, "PRP: Could not send supervision frame\n");
+ netdev_warn_once(master->dev, "PRP: Could not send supervision frame\n");
return;
}
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 835f4f9d98d2..a5a820ee2026 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -330,6 +330,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
@@ -1625,10 +1628,12 @@ EXPORT_SYMBOL(inet_current_timestamp);
int inet_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len)
{
- if (sk->sk_family == AF_INET)
+ unsigned int family = READ_ONCE(sk->sk_family);
+
+ if (family == AF_INET)
return ip_recv_error(sk, msg, len, addr_len);
#if IS_ENABLED(CONFIG_IPV6)
- if (sk->sk_family == AF_INET6)
+ if (family == AF_INET6)
return pingv6_ops.ipv6_recv_error(sk, msg, len, addr_len);
#endif
return -EINVAL;
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 8e2eb1793685..459af1f89739 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -727,6 +727,10 @@ out:
}
if (req)
reqsk_put(req);
+
+ if (newsk)
+ inet_init_csk_locks(newsk);
+
return newsk;
out_err:
newsk = NULL;
diff --git a/net/ipv4/ip_output.c b/net/ipv4/ip_output.c
index b06f678b03a1..41537d18eecf 100644
--- a/net/ipv4/ip_output.c
+++ b/net/ipv4/ip_output.c
@@ -1287,6 +1287,12 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
if (unlikely(!rt))
return -EFAULT;
+ cork->fragsize = ip_sk_use_pmtu(sk) ?
+ dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
+
+ if (!inetdev_valid_mtu(cork->fragsize))
+ return -ENETUNREACH;
+
/*
* setup for corking.
*/
@@ -1303,12 +1309,6 @@ static int ip_setup_cork(struct sock *sk, struct inet_cork *cork,
cork->addr = ipc->addr;
}
- cork->fragsize = ip_sk_use_pmtu(sk) ?
- dst_mtu(&rt->dst) : READ_ONCE(rt->dst.dev->mtu);
-
- if (!inetdev_valid_mtu(cork->fragsize))
- return -ENETUNREACH;
-
cork->gso_size = ipc->gso_size;
cork->dst = &rt->dst;
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7aa9dc0e6760..21d2ffa919e9 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1363,12 +1363,13 @@ e_inval:
* ipv4_pktinfo_prepare - transfer some info from rtable to skb
* @sk: socket
* @skb: buffer
+ * @drop_dst: if true, drops skb dst
*
* To support IP_CMSG_PKTINFO option, we store rt_iif and specific
* destination in skb->cb[] before dst drop.
* This way, receiver doesn't make cache line misses to read rtable.
*/
-void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
+void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb, bool drop_dst)
{
struct in_pktinfo *pktinfo = PKTINFO_SKB_CB(skb);
bool prepare = inet_test_bit(PKTINFO, sk) ||
@@ -1397,7 +1398,8 @@ void ipv4_pktinfo_prepare(const struct sock *sk, struct sk_buff *skb)
pktinfo->ipi_ifindex = 0;
pktinfo->ipi_spec_dst.s_addr = 0;
}
- skb_dst_drop(skb);
+ if (drop_dst)
+ skb_dst_drop(skb);
}
int ip_setsockopt(struct sock *sk, int level, int optname, sockptr_t optval,
diff --git a/net/ipv4/ip_tunnel_core.c b/net/ipv4/ip_tunnel_core.c
index 586b1b3e35b8..80ccd6661aa3 100644
--- a/net/ipv4/ip_tunnel_core.c
+++ b/net/ipv4/ip_tunnel_core.c
@@ -332,7 +332,7 @@ static int iptunnel_pmtud_build_icmpv6(struct sk_buff *skb, int mtu)
};
skb_reset_network_header(skb);
- csum = csum_partial(icmp6h, len, 0);
+ csum = skb_checksum(skb, skb_transport_offset(skb), len, 0);
icmp6h->icmp6_cksum = csum_ipv6_magic(&nip6h->saddr, &nip6h->daddr, len,
IPPROTO_ICMPV6, csum);
diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 9d6f59531b3a..362229836510 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -1073,7 +1073,7 @@ static int ipmr_cache_report(const struct mr_table *mrt,
msg = (struct igmpmsg *)skb_network_header(skb);
msg->im_vif = vifi;
msg->im_vif_hi = vifi >> 8;
- ipv4_pktinfo_prepare(mroute_sk, pkt);
+ ipv4_pktinfo_prepare(mroute_sk, pkt, false);
memcpy(skb->cb, pkt->cb, sizeof(skb->cb));
/* Add our header */
igmp = skb_put(skb, sizeof(struct igmphdr));
diff --git a/net/ipv4/raw.c b/net/ipv4/raw.c
index 27da9d7294c0..aea89326c697 100644
--- a/net/ipv4/raw.c
+++ b/net/ipv4/raw.c
@@ -292,7 +292,7 @@ static int raw_rcv_skb(struct sock *sk, struct sk_buff *skb)
/* Charge it to the socket. */
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
if (sock_queue_rcv_skb_reason(sk, skb, &reason) < 0) {
kfree_skb_reason(skb, reason);
return NET_RX_DROP;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 1baa484d2190..7e2481b9eae1 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -722,6 +722,7 @@ void tcp_push(struct sock *sk, int flags, int mss_now,
if (!test_bit(TSQ_THROTTLED, &sk->sk_tsq_flags)) {
NET_INC_STATS(sock_net(sk), LINUX_MIB_TCPAUTOCORKING);
set_bit(TSQ_THROTTLED, &sk->sk_tsq_flags);
+ smp_mb__after_atomic();
}
/* It is possible TX completion already happened
* before we set TSQ_THROTTLED.
@@ -1785,7 +1786,17 @@ static skb_frag_t *skb_advance_to_frag(struct sk_buff *skb, u32 offset_skb,
static bool can_map_frag(const skb_frag_t *frag)
{
- return skb_frag_size(frag) == PAGE_SIZE && !skb_frag_off(frag);
+ struct page *page;
+
+ if (skb_frag_size(frag) != PAGE_SIZE || skb_frag_off(frag))
+ return false;
+
+ page = skb_frag_page(frag);
+
+ if (PageCompound(page) || page->mapping)
+ return false;
+
+ return true;
}
static int find_next_mappable_frag(const skb_frag_t *frag,
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 148ffb007969..f631b0a21af4 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -2169,7 +2169,7 @@ static int udp_queue_rcv_one_skb(struct sock *sk, struct sk_buff *skb)
udp_csum_pull_header(skb);
- ipv4_pktinfo_prepare(sk, skb);
+ ipv4_pktinfo_prepare(sk, skb, true);
return __udp_queue_rcv_skb(sk, skb);
csum_error:
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index 507a8353a6bd..c008d21925d7 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -220,19 +220,26 @@ const struct ipv6_stub *ipv6_stub __read_mostly = &(struct ipv6_stub) {
EXPORT_SYMBOL_GPL(ipv6_stub);
/* IPv6 Wildcard Address and Loopback Address defined by RFC2553 */
-const struct in6_addr in6addr_loopback = IN6ADDR_LOOPBACK_INIT;
+const struct in6_addr in6addr_loopback __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LOOPBACK_INIT;
EXPORT_SYMBOL(in6addr_loopback);
-const struct in6_addr in6addr_any = IN6ADDR_ANY_INIT;
+const struct in6_addr in6addr_any __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_ANY_INIT;
EXPORT_SYMBOL(in6addr_any);
-const struct in6_addr in6addr_linklocal_allnodes = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_linklocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allnodes);
-const struct in6_addr in6addr_linklocal_allrouters = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_linklocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_LINKLOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_linklocal_allrouters);
-const struct in6_addr in6addr_interfacelocal_allnodes = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
+const struct in6_addr in6addr_interfacelocal_allnodes __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLNODES_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allnodes);
-const struct in6_addr in6addr_interfacelocal_allrouters = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_interfacelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_INTERFACELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_interfacelocal_allrouters);
-const struct in6_addr in6addr_sitelocal_allrouters = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
+const struct in6_addr in6addr_sitelocal_allrouters __aligned(BITS_PER_LONG/8)
+ = IN6ADDR_SITELOCAL_ALLROUTERS_INIT;
EXPORT_SYMBOL(in6addr_sitelocal_allrouters);
static void snmp6_free_dev(struct inet6_dev *idev)
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 13a1833a4df5..959bfd9f6344 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -199,6 +199,9 @@ lookup_protocol:
if (INET_PROTOSW_REUSE & answer_flags)
sk->sk_reuse = SK_CAN_REUSE;
+ if (INET_PROTOSW_ICSK & answer_flags)
+ inet_init_csk_locks(sk);
+
inet = inet_sk(sk);
inet_assign_bit(IS_ICSK, sk, INET_PROTOSW_ICSK & answer_flags);
diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 46c19bd48990..9bbabf750a21 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -796,8 +796,8 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
struct sk_buff *skb),
bool log_ecn_err)
{
- const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
- int err;
+ const struct ipv6hdr *ipv6h;
+ int nh, err;
if ((!(tpi->flags & TUNNEL_CSUM) &&
(tunnel->parms.i_flags & TUNNEL_CSUM)) ||
@@ -829,7 +829,6 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
goto drop;
}
- ipv6h = ipv6_hdr(skb);
skb->protocol = eth_type_trans(skb, tunnel->dev);
skb_postpull_rcsum(skb, eth_hdr(skb), ETH_HLEN);
} else {
@@ -837,7 +836,23 @@ static int __ip6_tnl_rcv(struct ip6_tnl *tunnel, struct sk_buff *skb,
skb_reset_mac_header(skb);
}
+ /* Save offset of outer header relative to skb->head,
+ * because we are going to reset the network header to the inner header
+ * and might change skb->head.
+ */
+ nh = skb_network_header(skb) - skb->head;
+
skb_reset_network_header(skb);
+
+ if (!pskb_inet_may_pull(skb)) {
+ DEV_STATS_INC(tunnel->dev, rx_length_errors);
+ DEV_STATS_INC(tunnel->dev, rx_errors);
+ goto drop;
+ }
+
+ /* Get the outer header. */
+ ipv6h = (struct ipv6hdr *)(skb->head + nh);
+
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
__skb_tunnel_rx(skb, tunnel->dev, tunnel->net);
diff --git a/net/llc/af_llc.c b/net/llc/af_llc.c
index 9b06c380866b..fde1140d899e 100644
--- a/net/llc/af_llc.c
+++ b/net/llc/af_llc.c
@@ -226,6 +226,8 @@ static int llc_ui_release(struct socket *sock)
}
netdev_put(llc->dev, &llc->dev_tracker);
sock_put(sk);
+ sock_orphan(sk);
+ sock->sk = NULL;
llc_sk_free(sk);
out:
return 0;
@@ -928,14 +930,15 @@ copy_uaddr:
*/
static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
{
+ DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
struct sock *sk = sock->sk;
struct llc_sock *llc = llc_sk(sk);
- DECLARE_SOCKADDR(struct sockaddr_llc *, addr, msg->msg_name);
int flags = msg->msg_flags;
int noblock = flags & MSG_DONTWAIT;
+ int rc = -EINVAL, copied = 0, hdrlen, hh_len;
struct sk_buff *skb = NULL;
+ struct net_device *dev;
size_t size = 0;
- int rc = -EINVAL, copied = 0, hdrlen;
dprintk("%s: sending from %02X to %02X\n", __func__,
llc->laddr.lsap, llc->daddr.lsap);
@@ -955,22 +958,29 @@ static int llc_ui_sendmsg(struct socket *sock, struct msghdr *msg, size_t len)
if (rc)
goto out;
}
- hdrlen = llc->dev->hard_header_len + llc_ui_header_len(sk, addr);
+ dev = llc->dev;
+ hh_len = LL_RESERVED_SPACE(dev);
+ hdrlen = llc_ui_header_len(sk, addr);
size = hdrlen + len;
- if (size > llc->dev->mtu)
- size = llc->dev->mtu;
+ size = min_t(size_t, size, READ_ONCE(dev->mtu));
copied = size - hdrlen;
rc = -EINVAL;
if (copied < 0)
goto out;
release_sock(sk);
- skb = sock_alloc_send_skb(sk, size, noblock, &rc);
+ skb = sock_alloc_send_skb(sk, hh_len + size, noblock, &rc);
lock_sock(sk);
if (!skb)
goto out;
- skb->dev = llc->dev;
+ if (sock_flag(sk, SOCK_ZAPPED) ||
+ llc->dev != dev ||
+ hdrlen != llc_ui_header_len(sk, addr) ||
+ hh_len != LL_RESERVED_SPACE(dev) ||
+ size > READ_ONCE(dev->mtu))
+ goto out;
+ skb->dev = dev;
skb->protocol = llc_proto_type(addr->sllc_arphrd);
- skb_reserve(skb, hdrlen);
+ skb_reserve(skb, hh_len + hdrlen);
rc = memcpy_from_msg(skb_put(skb, copied), msg, copied);
if (rc)
goto out;
diff --git a/net/llc/llc_core.c b/net/llc/llc_core.c
index 6e387aadffce..4f16d9c88350 100644
--- a/net/llc/llc_core.c
+++ b/net/llc/llc_core.c
@@ -135,22 +135,15 @@ static struct packet_type llc_packet_type __read_mostly = {
.func = llc_rcv,
};
-static struct packet_type llc_tr_packet_type __read_mostly = {
- .type = cpu_to_be16(ETH_P_TR_802_2),
- .func = llc_rcv,
-};
-
static int __init llc_init(void)
{
dev_add_pack(&llc_packet_type);
- dev_add_pack(&llc_tr_packet_type);
return 0;
}
static void __exit llc_exit(void)
{
dev_remove_pack(&llc_packet_type);
- dev_remove_pack(&llc_tr_packet_type);
}
module_init(llc_init);
diff --git a/net/mac80211/Kconfig b/net/mac80211/Kconfig
index cb0291decf2e..13438cc0a6b1 100644
--- a/net/mac80211/Kconfig
+++ b/net/mac80211/Kconfig
@@ -62,7 +62,6 @@ config MAC80211_KUNIT_TEST
depends on KUNIT
depends on MAC80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test mac80211 internals with kunit.
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 489dd97f5172..327682995c92 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <[email protected]>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright (C) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2022 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/ieee80211.h>
@@ -987,7 +987,8 @@ static int
ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
struct cfg80211_unsol_bcast_probe_resp *params,
struct ieee80211_link_data *link,
- struct ieee80211_bss_conf *link_conf)
+ struct ieee80211_bss_conf *link_conf,
+ u64 *changed)
{
struct unsol_bcast_probe_resp_data *new, *old = NULL;
@@ -1011,7 +1012,8 @@ ieee80211_set_unsol_bcast_probe_resp(struct ieee80211_sub_if_data *sdata,
RCU_INIT_POINTER(link->u.ap.unsol_bcast_probe_resp, NULL);
}
- return BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ *changed |= BSS_CHANGED_UNSOL_BCAST_PROBE_RESP;
+ return 0;
}
static int ieee80211_set_ftm_responder_params(
@@ -1450,10 +1452,9 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
goto error;
- changed |= err;
err = drv_start_ap(sdata->local, sdata, link_conf);
if (err) {
@@ -1525,10 +1526,9 @@ static int ieee80211_change_beacon(struct wiphy *wiphy, struct net_device *dev,
err = ieee80211_set_unsol_bcast_probe_resp(sdata,
&params->unsol_bcast_probe_resp,
- link, link_conf);
+ link, link_conf, &changed);
if (err < 0)
return err;
- changed |= err;
if (beacon->he_bss_color_valid &&
beacon->he_bss_color.enabled != link_conf->he_bss_color.enabled) {
@@ -1869,6 +1869,8 @@ static int sta_link_apply_parameters(struct ieee80211_local *local,
sband->band);
}
+ ieee80211_sta_set_rx_nss(link_sta);
+
return ret;
}
diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index dce5606ed66d..68596ef78b15 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -997,8 +997,8 @@ static void add_link_files(struct ieee80211_link_data *link,
}
}
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif)
+static void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
+ bool mld_vif)
{
char buf[10+IFNAMSIZ];
diff --git a/net/mac80211/debugfs_netdev.h b/net/mac80211/debugfs_netdev.h
index b226b1aae88a..a02ec0a413f6 100644
--- a/net/mac80211/debugfs_netdev.h
+++ b/net/mac80211/debugfs_netdev.h
@@ -11,8 +11,6 @@
#include "ieee80211_i.h"
#ifdef CONFIG_MAC80211_DEBUGFS
-void ieee80211_debugfs_add_netdev(struct ieee80211_sub_if_data *sdata,
- bool mld_vif);
void ieee80211_debugfs_remove_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_rename_netdev(struct ieee80211_sub_if_data *sdata);
void ieee80211_debugfs_recreate_netdev(struct ieee80211_sub_if_data *sdata,
@@ -24,9 +22,6 @@ void ieee80211_link_debugfs_remove(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_add(struct ieee80211_link_data *link);
void ieee80211_link_debugfs_drv_remove(struct ieee80211_link_data *link);
#else
-static inline void ieee80211_debugfs_add_netdev(
- struct ieee80211_sub_if_data *sdata, bool mld_vif)
-{}
static inline void ieee80211_debugfs_remove_netdev(
struct ieee80211_sub_if_data *sdata)
{}
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index e4e7c0b38cb6..11c4caa4748e 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -1783,7 +1783,7 @@ static void ieee80211_setup_sdata(struct ieee80211_sub_if_data *sdata,
/* need to do this after the switch so vif.type is correct */
ieee80211_link_setup(&sdata->deflink);
- ieee80211_debugfs_add_netdev(sdata, false);
+ ieee80211_debugfs_recreate_netdev(sdata, false);
}
static int ieee80211_runtime_change_iftype(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 073105deb424..2022a26eb881 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -8,7 +8,7 @@
* Copyright 2007, Michael Wu <[email protected]>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2023 Intel Corporation
+ * Copyright (C) 2018 - 2024 Intel Corporation
*/
#include <linux/delay.h>
@@ -2918,6 +2918,7 @@ static void ieee80211_set_disassoc(struct ieee80211_sub_if_data *sdata,
/* other links will be destroyed */
sdata->deflink.u.mgd.bss = NULL;
+ sdata->deflink.smps_mode = IEEE80211_SMPS_OFF;
netif_carrier_off(sdata->dev);
@@ -5045,9 +5046,6 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
if (!link)
return 0;
- /* will change later if needed */
- link->smps_mode = IEEE80211_SMPS_OFF;
-
/*
* If this fails (possibly due to channel context sharing
* on incompatible channels, e.g. 80+80 and 160 sharing the
@@ -7096,6 +7094,7 @@ void ieee80211_mgd_setup_link(struct ieee80211_link_data *link)
link->u.mgd.p2p_noa_index = -1;
link->u.mgd.conn_flags = 0;
link->conf->bssid = link->u.mgd.bssid;
+ link->smps_mode = IEEE80211_SMPS_OFF;
wiphy_work_init(&link->u.mgd.request_smps_work,
ieee80211_request_smps_mgd_work);
@@ -7309,6 +7308,75 @@ out_err:
return err;
}
+static bool ieee80211_mgd_csa_present(struct ieee80211_sub_if_data *sdata,
+ const struct cfg80211_bss_ies *ies,
+ u8 cur_channel, bool ignore_ecsa)
+{
+ const struct element *csa_elem, *ecsa_elem;
+ struct ieee80211_channel_sw_ie *csa = NULL;
+ struct ieee80211_ext_chansw_ie *ecsa = NULL;
+
+ if (!ies)
+ return false;
+
+ csa_elem = cfg80211_find_elem(WLAN_EID_CHANNEL_SWITCH,
+ ies->data, ies->len);
+ if (csa_elem && csa_elem->datalen == sizeof(*csa))
+ csa = (void *)csa_elem->data;
+
+ ecsa_elem = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ ies->data, ies->len);
+ if (ecsa_elem && ecsa_elem->datalen == sizeof(*ecsa))
+ ecsa = (void *)ecsa_elem->data;
+
+ if (csa && csa->count == 0)
+ csa = NULL;
+ if (csa && !csa->mode && csa->new_ch_num == cur_channel)
+ csa = NULL;
+
+ if (ecsa && ecsa->count == 0)
+ ecsa = NULL;
+ if (ecsa && !ecsa->mode && ecsa->new_ch_num == cur_channel)
+ ecsa = NULL;
+
+ if (ignore_ecsa && ecsa) {
+ sdata_info(sdata,
+ "Ignoring ECSA in probe response - was considered stuck!\n");
+ return csa;
+ }
+
+ return csa || ecsa;
+}
+
+static bool ieee80211_mgd_csa_in_process(struct ieee80211_sub_if_data *sdata,
+ struct cfg80211_bss *bss)
+{
+ u8 cur_channel;
+ bool ret;
+
+ cur_channel = ieee80211_frequency_to_channel(bss->channel->center_freq);
+
+ rcu_read_lock();
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->beacon_ies),
+ cur_channel, false)) {
+ ret = true;
+ goto out;
+ }
+
+ if (ieee80211_mgd_csa_present(sdata,
+ rcu_dereference(bss->proberesp_ies),
+ cur_channel, bss->proberesp_ecsa_stuck)) {
+ ret = true;
+ goto out;
+ }
+
+ ret = false;
+out:
+ rcu_read_unlock();
+ return ret;
+}
+
/* config hooks */
int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct cfg80211_auth_request *req)
@@ -7317,7 +7385,6 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_auth_data *auth_data;
struct ieee80211_link_data *link;
- const struct element *csa_elem, *ecsa_elem;
u16 auth_alg;
int err;
bool cont_auth;
@@ -7360,21 +7427,10 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
if (ifmgd->assoc_data)
return -EBUSY;
- rcu_read_lock();
- csa_elem = ieee80211_bss_get_elem(req->bss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(req->bss,
- WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, req->bss)) {
sdata_info(sdata, "AP is in CSA process, reject auth\n");
return -EINVAL;
}
- rcu_read_unlock();
auth_data = kzalloc(sizeof(*auth_data) + req->auth_data_len +
req->ie_len, GFP_KERNEL);
@@ -7684,7 +7740,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
struct ieee80211_local *local = sdata->local;
struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
struct ieee80211_mgd_assoc_data *assoc_data;
- const struct element *ssid_elem, *csa_elem, *ecsa_elem;
+ const struct element *ssid_elem;
struct ieee80211_vif_cfg *vif_cfg = &sdata->vif.cfg;
ieee80211_conn_flags_t conn_flags = 0;
struct ieee80211_link_data *link;
@@ -7707,23 +7763,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
cbss = req->link_id < 0 ? req->bss : req->links[req->link_id].bss;
- rcu_read_lock();
- ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
- if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
- rcu_read_unlock();
+ if (ieee80211_mgd_csa_in_process(sdata, cbss)) {
+ sdata_info(sdata, "AP is in CSA process, reject assoc\n");
kfree(assoc_data);
return -EINVAL;
}
- csa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_CHANNEL_SWITCH);
- ecsa_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_EXT_CHANSWITCH_ANN);
- if ((csa_elem &&
- csa_elem->datalen == sizeof(struct ieee80211_channel_sw_ie) &&
- ((struct ieee80211_channel_sw_ie *)csa_elem->data)->count != 0) ||
- (ecsa_elem &&
- ecsa_elem->datalen == sizeof(struct ieee80211_ext_chansw_ie) &&
- ((struct ieee80211_ext_chansw_ie *)ecsa_elem->data)->count != 0)) {
- sdata_info(sdata, "AP is in CSA process, reject assoc\n");
+ rcu_read_lock();
+ ssid_elem = ieee80211_bss_get_elem(cbss, WLAN_EID_SSID);
+ if (!ssid_elem || ssid_elem->datalen > sizeof(assoc_data->ssid)) {
rcu_read_unlock();
kfree(assoc_data);
return -EINVAL;
@@ -7998,8 +8046,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
rcu_read_lock();
beacon_ies = rcu_dereference(req->bss->beacon_ies);
-
- if (beacon_ies) {
+ if (!beacon_ies) {
/*
* Wait up to one beacon interval ...
* should this be more if we miss one?
@@ -8080,6 +8127,7 @@ int ieee80211_mgd_deauth(struct ieee80211_sub_if_data *sdata,
ieee80211_report_disconnect(sdata, frame_buf,
sizeof(frame_buf), true,
req->reason_code, false);
+ drv_mgd_complete_tx(sdata->local, sdata, &info);
return 0;
}
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 645355e5f1bc..f9d5842601fa 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -9,7 +9,7 @@
* Copyright 2007, Michael Wu <[email protected]>
* Copyright 2013-2015 Intel Mobile Communications GmbH
* Copyright 2016-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#include <linux/if_arp.h>
@@ -237,14 +237,18 @@ ieee80211_bss_info_update(struct ieee80211_local *local,
}
static bool ieee80211_scan_accept_presp(struct ieee80211_sub_if_data *sdata,
+ struct ieee80211_channel *channel,
u32 scan_flags, const u8 *da)
{
if (!sdata)
return false;
- /* accept broadcast for OCE */
- if (scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP &&
- is_broadcast_ether_addr(da))
+
+ /* accept broadcast on 6 GHz and for OCE */
+ if (is_broadcast_ether_addr(da) &&
+ (channel->band == NL80211_BAND_6GHZ ||
+ scan_flags & NL80211_SCAN_FLAG_ACCEPT_BCAST_PROBE_RESP))
return true;
+
if (scan_flags & NL80211_SCAN_FLAG_RANDOM_ADDR)
return true;
return ether_addr_equal(da, sdata->vif.addr);
@@ -293,6 +297,12 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
wiphy_delayed_work_queue(local->hw.wiphy, &local->scan_work, 0);
}
+ channel = ieee80211_get_channel_khz(local->hw.wiphy,
+ ieee80211_rx_status_to_khz(rx_status));
+
+ if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
+ return;
+
if (ieee80211_is_probe_resp(mgmt->frame_control)) {
struct cfg80211_scan_request *scan_req;
struct cfg80211_sched_scan_request *sched_scan_req;
@@ -310,19 +320,15 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
/* ignore ProbeResp to foreign address or non-bcast (OCE)
* unless scanning with randomised address
*/
- if (!ieee80211_scan_accept_presp(sdata1, scan_req_flags,
+ if (!ieee80211_scan_accept_presp(sdata1, channel,
+ scan_req_flags,
mgmt->da) &&
- !ieee80211_scan_accept_presp(sdata2, sched_scan_req_flags,
+ !ieee80211_scan_accept_presp(sdata2, channel,
+ sched_scan_req_flags,
mgmt->da))
return;
}
- channel = ieee80211_get_channel_khz(local->hw.wiphy,
- ieee80211_rx_status_to_khz(rx_status));
-
- if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
- return;
-
bss = ieee80211_bss_info_update(local, rx_status,
mgmt, skb->len,
channel);
diff --git a/net/mac80211/sta_info.c b/net/mac80211/sta_info.c
index bf1adcd96b41..4391d8dd634b 100644
--- a/net/mac80211/sta_info.c
+++ b/net/mac80211/sta_info.c
@@ -404,7 +404,10 @@ void sta_info_free(struct ieee80211_local *local, struct sta_info *sta)
int i;
for (i = 0; i < ARRAY_SIZE(sta->link); i++) {
- if (!(sta->sta.valid_links & BIT(i)))
+ struct link_sta_info *link_sta;
+
+ link_sta = rcu_access_pointer(sta->link[i]);
+ if (!link_sta)
continue;
sta_remove_link(sta, i, false);
@@ -910,6 +913,8 @@ static int sta_info_insert_finish(struct sta_info *sta) __acquires(RCU)
if (ieee80211_vif_is_mesh(&sdata->vif))
mesh_accept_plinks_update(sdata);
+ ieee80211_check_fast_xmit(sta);
+
return 0;
out_remove:
if (sta->sta.valid_links)
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 314998fdb1a5..e448ab338448 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -3048,7 +3048,7 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
sdata->vif.type == NL80211_IFTYPE_STATION)
goto out;
- if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED))
+ if (!test_sta_flag(sta, WLAN_STA_AUTHORIZED) || !sta->uploaded)
goto out;
if (test_sta_flag(sta, WLAN_STA_PS_STA) ||
@@ -3100,10 +3100,11 @@ void ieee80211_check_fast_xmit(struct sta_info *sta)
/* DA SA BSSID */
build.da_offs = offsetof(struct ieee80211_hdr, addr1);
build.sa_offs = offsetof(struct ieee80211_hdr, addr2);
+ rcu_read_lock();
link = rcu_dereference(sdata->link[tdls_link_id]);
- if (WARN_ON_ONCE(!link))
- break;
- memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ if (!WARN_ON_ONCE(!link))
+ memcpy(hdr->addr3, link->u.mgd.bssid, ETH_ALEN);
+ rcu_read_unlock();
build.hdr_len = 24;
break;
}
diff --git a/net/mac80211/wbrf.c b/net/mac80211/wbrf.c
index a05c5b971789..3a8612309137 100644
--- a/net/mac80211/wbrf.c
+++ b/net/mac80211/wbrf.c
@@ -23,8 +23,6 @@ void ieee80211_check_wbrf_support(struct ieee80211_local *local)
return;
local->wbrf_supported = acpi_amd_wbrf_supported_producer(dev);
- dev_dbg(dev, "WBRF is %s supported\n",
- local->wbrf_supported ? "" : "not");
}
static void get_chan_freq_boundary(u32 center_freq, u32 bandwidth, u64 *start, u64 *end)
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 3ed4709a7509..028e8b473626 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2314,9 +2314,6 @@ bool __mptcp_retransmit_pending_data(struct sock *sk)
if (__mptcp_check_fallback(msk))
return false;
- if (tcp_rtx_and_write_queues_empty(sk))
- return false;
-
/* the closing socket has some data untransmitted and/or unacked:
* some data in the mptcp rtx queue has not really xmitted yet.
* keep it simple and re-inject the whole mptcp level rtx queue
diff --git a/net/netfilter/ipset/ip_set_bitmap_gen.h b/net/netfilter/ipset/ip_set_bitmap_gen.h
index 21f7860e8fa1..cb48a2b9cb9f 100644
--- a/net/netfilter/ipset/ip_set_bitmap_gen.h
+++ b/net/netfilter/ipset/ip_set_bitmap_gen.h
@@ -30,6 +30,7 @@
#define mtype_del IPSET_TOKEN(MTYPE, _del)
#define mtype_list IPSET_TOKEN(MTYPE, _list)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype MTYPE
#define get_ext(set, map, id) ((map)->extensions + ((set)->dsize * (id)))
@@ -59,9 +60,6 @@ mtype_destroy(struct ip_set *set)
{
struct mtype *map = set->data;
- if (SET_WITH_TIMEOUT(set))
- del_timer_sync(&map->gc);
-
if (set->dsize && set->extensions & IPSET_EXT_DESTROY)
mtype_ext_cleanup(set);
ip_set_free(map->members);
@@ -290,6 +288,15 @@ mtype_gc(struct timer_list *t)
add_timer(&map->gc);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct mtype *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ del_timer_sync(&map->gc);
+}
+
static const struct ip_set_type_variant mtype = {
.kadt = mtype_kadt,
.uadt = mtype_uadt,
@@ -303,6 +310,7 @@ static const struct ip_set_type_variant mtype = {
.head = mtype_head,
.list = mtype_list,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
};
#endif /* __IP_SET_BITMAP_IP_GEN_H */
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 4c133e06be1d..3184cc6be4c9 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -1154,6 +1154,7 @@ static int ip_set_create(struct sk_buff *skb, const struct nfnl_info *info,
return ret;
cleanup:
+ set->variant->cancel_gc(set);
set->variant->destroy(set);
put_out:
module_put(set->type->me);
@@ -1182,6 +1183,14 @@ ip_set_destroy_set(struct ip_set *set)
kfree(set);
}
+static void
+ip_set_destroy_set_rcu(struct rcu_head *head)
+{
+ struct ip_set *set = container_of(head, struct ip_set, rcu);
+
+ ip_set_destroy_set(set);
+}
+
static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
const struct nlattr * const attr[])
{
@@ -1193,8 +1202,6 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
if (unlikely(protocol_min_failed(attr)))
return -IPSET_ERR_PROTOCOL;
- /* Must wait for flush to be really finished in list:set */
- rcu_barrier();
/* Commands are serialized and references are
* protected by the ip_set_ref_lock.
@@ -1206,8 +1213,10 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
* counter, so if it's already zero, we can proceed
* without holding the lock.
*/
- read_lock_bh(&ip_set_ref_lock);
if (!attr[IPSET_ATTR_SETNAME]) {
+ /* Must wait for flush to be really finished in list:set */
+ rcu_barrier();
+ read_lock_bh(&ip_set_ref_lock);
for (i = 0; i < inst->ip_set_max; i++) {
s = ip_set(inst, i);
if (s && (s->ref || s->ref_netlink)) {
@@ -1221,6 +1230,8 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
s = ip_set(inst, i);
if (s) {
ip_set(inst, i) = NULL;
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
ip_set_destroy_set(s);
}
}
@@ -1228,6 +1239,9 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
inst->is_destroyed = false;
} else {
u32 flags = flag_exist(info->nlh);
+ u16 features = 0;
+
+ read_lock_bh(&ip_set_ref_lock);
s = find_set_and_id(inst, nla_data(attr[IPSET_ATTR_SETNAME]),
&i);
if (!s) {
@@ -1238,10 +1252,16 @@ static int ip_set_destroy(struct sk_buff *skb, const struct nfnl_info *info,
ret = -IPSET_ERR_BUSY;
goto out;
}
+ features = s->type->features;
ip_set(inst, i) = NULL;
read_unlock_bh(&ip_set_ref_lock);
-
- ip_set_destroy_set(s);
+ if (features & IPSET_TYPE_NAME) {
+ /* Must wait for flush to be really finished */
+ rcu_barrier();
+ }
+ /* Must cancel garbage collectors */
+ s->variant->cancel_gc(s);
+ call_rcu(&s->rcu, ip_set_destroy_set_rcu);
}
return 0;
out:
@@ -1394,9 +1414,6 @@ static int ip_set_swap(struct sk_buff *skb, const struct nfnl_info *info,
ip_set(inst, to_id) = from;
write_unlock_bh(&ip_set_ref_lock);
- /* Make sure all readers of the old set pointers are completed. */
- synchronize_rcu();
-
return 0;
}
@@ -2362,6 +2379,7 @@ ip_set_net_exit(struct net *net)
set = ip_set(inst, i);
if (set) {
ip_set(inst, i) = NULL;
+ set->variant->cancel_gc(set);
ip_set_destroy_set(set);
}
}
@@ -2409,8 +2427,11 @@ ip_set_fini(void)
{
nf_unregister_sockopt(&so_set);
nfnetlink_subsys_unregister(&ip_set_netlink_subsys);
-
unregister_pernet_subsys(&ip_set_net_ops);
+
+ /* Wait for call_rcu() in destroy */
+ rcu_barrier();
+
pr_debug("these are the famous last words\n");
}
diff --git a/net/netfilter/ipset/ip_set_hash_gen.h b/net/netfilter/ipset/ip_set_hash_gen.h
index cbf80da9a01c..cf3ce72c3de6 100644
--- a/net/netfilter/ipset/ip_set_hash_gen.h
+++ b/net/netfilter/ipset/ip_set_hash_gen.h
@@ -222,6 +222,7 @@ static const union nf_inet_addr zeromask = {};
#undef mtype_gc_do
#undef mtype_gc
#undef mtype_gc_init
+#undef mtype_cancel_gc
#undef mtype_variant
#undef mtype_data_match
@@ -266,6 +267,7 @@ static const union nf_inet_addr zeromask = {};
#define mtype_gc_do IPSET_TOKEN(MTYPE, _gc_do)
#define mtype_gc IPSET_TOKEN(MTYPE, _gc)
#define mtype_gc_init IPSET_TOKEN(MTYPE, _gc_init)
+#define mtype_cancel_gc IPSET_TOKEN(MTYPE, _cancel_gc)
#define mtype_variant IPSET_TOKEN(MTYPE, _variant)
#define mtype_data_match IPSET_TOKEN(MTYPE, _data_match)
@@ -430,7 +432,7 @@ mtype_ahash_destroy(struct ip_set *set, struct htable *t, bool ext_destroy)
u32 i;
for (i = 0; i < jhash_size(t->htable_bits); i++) {
- n = __ipset_dereference(hbucket(t, i));
+ n = (__force struct hbucket *)hbucket(t, i);
if (!n)
continue;
if (set->extensions & IPSET_EXT_DESTROY && ext_destroy)
@@ -450,10 +452,7 @@ mtype_destroy(struct ip_set *set)
struct htype *h = set->data;
struct list_head *l, *lt;
- if (SET_WITH_TIMEOUT(set))
- cancel_delayed_work_sync(&h->gc.dwork);
-
- mtype_ahash_destroy(set, ipset_dereference_nfnl(h->table), true);
+ mtype_ahash_destroy(set, (__force struct htable *)h->table, true);
list_for_each_safe(l, lt, &h->ad) {
list_del(l);
kfree(l);
@@ -599,6 +598,15 @@ mtype_gc_init(struct htable_gc *gc)
queue_delayed_work(system_power_efficient_wq, &gc->dwork, HZ);
}
+static void
+mtype_cancel_gc(struct ip_set *set)
+{
+ struct htype *h = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ cancel_delayed_work_sync(&h->gc.dwork);
+}
+
static int
mtype_add(struct ip_set *set, void *value, const struct ip_set_ext *ext,
struct ip_set_ext *mext, u32 flags);
@@ -1441,6 +1449,7 @@ static const struct ip_set_type_variant mtype_variant = {
.uref = mtype_uref,
.resize = mtype_resize,
.same_set = mtype_same_set,
+ .cancel_gc = mtype_cancel_gc,
.region_lock = true,
};
diff --git a/net/netfilter/ipset/ip_set_list_set.c b/net/netfilter/ipset/ip_set_list_set.c
index e162636525cf..6c3f28bc59b3 100644
--- a/net/netfilter/ipset/ip_set_list_set.c
+++ b/net/netfilter/ipset/ip_set_list_set.c
@@ -426,9 +426,6 @@ list_set_destroy(struct ip_set *set)
struct list_set *map = set->data;
struct set_elem *e, *n;
- if (SET_WITH_TIMEOUT(set))
- timer_shutdown_sync(&map->gc);
-
list_for_each_entry_safe(e, n, &map->members, list) {
list_del(&e->list);
ip_set_put_byindex(map->net, e->id);
@@ -545,6 +542,15 @@ list_set_same_set(const struct ip_set *a, const struct ip_set *b)
a->extensions == b->extensions;
}
+static void
+list_set_cancel_gc(struct ip_set *set)
+{
+ struct list_set *map = set->data;
+
+ if (SET_WITH_TIMEOUT(set))
+ timer_shutdown_sync(&map->gc);
+}
+
static const struct ip_set_type_variant set_variant = {
.kadt = list_set_kadt,
.uadt = list_set_uadt,
@@ -558,6 +564,7 @@ static const struct ip_set_type_variant set_variant = {
.head = list_set_head,
.list = list_set_list,
.same_set = list_set_same_set,
+ .cancel_gc = list_set_cancel_gc,
};
static void
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 0c22a02c2035..3b846cbdc050 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -876,6 +876,7 @@ struct ctnetlink_filter_u32 {
struct ctnetlink_filter {
u8 family;
+ bool zone_filter;
u_int32_t orig_flags;
u_int32_t reply_flags;
@@ -992,9 +993,12 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
if (err)
goto err_filter;
- err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
- if (err < 0)
- goto err_filter;
+ if (cda[CTA_ZONE]) {
+ err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+ if (err < 0)
+ goto err_filter;
+ filter->zone_filter = true;
+ }
if (!cda[CTA_FILTER])
return filter;
@@ -1148,7 +1152,7 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
if (filter->family && nf_ct_l3num(ct) != filter->family)
goto ignore_entry;
- if (filter->zone.id != NF_CT_DEFAULT_ZONE_ID &&
+ if (filter->zone_filter &&
!nf_ct_zone_equal_any(ct, &filter->zone))
goto ignore_entry;
diff --git a/net/netfilter/nf_conntrack_proto_sctp.c b/net/netfilter/nf_conntrack_proto_sctp.c
index c6bd533983c1..4cc97f971264 100644
--- a/net/netfilter/nf_conntrack_proto_sctp.c
+++ b/net/netfilter/nf_conntrack_proto_sctp.c
@@ -283,7 +283,7 @@ sctp_new(struct nf_conn *ct, const struct sk_buff *skb,
pr_debug("Setting vtag %x for secondary conntrack\n",
sh->vtag);
ct->proto.sctp.vtag[IP_CT_DIR_ORIGINAL] = sh->vtag;
- } else {
+ } else if (sch->type == SCTP_CID_SHUTDOWN_ACK) {
/* If it is a shutdown ack OOTB packet, we expect a return
shutdown complete, otherwise an ABORT Sec 8.4 (5) and (8) */
pr_debug("Setting vtag %x for new conn OOTB\n",
diff --git a/net/netfilter/nf_conntrack_proto_tcp.c b/net/netfilter/nf_conntrack_proto_tcp.c
index e573be5afde7..ae493599a3ef 100644
--- a/net/netfilter/nf_conntrack_proto_tcp.c
+++ b/net/netfilter/nf_conntrack_proto_tcp.c
@@ -457,7 +457,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
const struct sk_buff *skb,
unsigned int dataoff,
const struct tcphdr *tcph,
- u32 end, u32 win)
+ u32 end, u32 win,
+ enum ip_conntrack_dir dir)
{
/* SYN-ACK in reply to a SYN
* or SYN from reply direction in simultaneous open.
@@ -471,7 +472,8 @@ static void tcp_init_sender(struct ip_ct_tcp_state *sender,
* Both sides must send the Window Scale option
* to enable window scaling in either direction.
*/
- if (!(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
+ if (dir == IP_CT_DIR_REPLY &&
+ !(sender->flags & IP_CT_TCP_FLAG_WINDOW_SCALE &&
receiver->flags & IP_CT_TCP_FLAG_WINDOW_SCALE)) {
sender->td_scale = 0;
receiver->td_scale = 0;
@@ -542,7 +544,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
if (tcph->syn) {
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (!tcph->ack)
/* Simultaneous open */
return NFCT_TCP_ACCEPT;
@@ -585,7 +587,7 @@ tcp_in_window(struct nf_conn *ct, enum ip_conntrack_dir dir,
*/
tcp_init_sender(sender, receiver,
skb, dataoff, tcph,
- end, win);
+ end, win, dir);
if (dir == IP_CT_DIR_REPLY && !tcph->ack)
return NFCT_TCP_ACCEPT;
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index 8cc52d2bd31b..e16f158388bb 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -193,11 +193,12 @@ void nf_logger_put(int pf, enum nf_log_type type)
return;
}
- BUG_ON(loggers[pf][type] == NULL);
-
rcu_read_lock();
logger = rcu_dereference(loggers[pf][type]);
- module_put(logger->me);
+ if (!logger)
+ WARN_ON_ONCE(1);
+ else
+ module_put(logger->me);
rcu_read_unlock();
}
EXPORT_SYMBOL_GPL(nf_logger_put);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4b55533ce5ca..f8e3f70c35bd 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -24,6 +24,7 @@
#include <net/sock.h>
#define NFT_MODULE_AUTOLOAD_LIMIT (MODULE_NAME_LEN - sizeof("nft-expr-255-"))
+#define NFT_SET_MAX_ANONLEN 16
unsigned int nf_tables_net_id __read_mostly;
@@ -4413,6 +4414,9 @@ static int nf_tables_set_alloc_name(struct nft_ctx *ctx, struct nft_set *set,
if (p[1] != 'd' || strchr(p + 2, '%'))
return -EINVAL;
+ if (strnlen(name, NFT_SET_MAX_ANONLEN) >= NFT_SET_MAX_ANONLEN)
+ return -EINVAL;
+
inuse = (unsigned long *)get_zeroed_page(GFP_KERNEL);
if (inuse == NULL)
return -ENOMEM;
@@ -7547,11 +7551,15 @@ nla_put_failure:
return -1;
}
-static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
+static const struct nft_object_type *__nft_obj_type_get(u32 objtype, u8 family)
{
const struct nft_object_type *type;
list_for_each_entry(type, &nf_tables_objects, list) {
+ if (type->family != NFPROTO_UNSPEC &&
+ type->family != family)
+ continue;
+
if (objtype == type->type)
return type;
}
@@ -7559,11 +7567,11 @@ static const struct nft_object_type *__nft_obj_type_get(u32 objtype)
}
static const struct nft_object_type *
-nft_obj_type_get(struct net *net, u32 objtype)
+nft_obj_type_get(struct net *net, u32 objtype, u8 family)
{
const struct nft_object_type *type;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (type != NULL && try_module_get(type->owner))
return type;
@@ -7656,7 +7664,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
- type = __nft_obj_type_get(objtype);
+ type = __nft_obj_type_get(objtype, family);
if (WARN_ON_ONCE(!type))
return -ENOENT;
@@ -7670,7 +7678,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
if (!nft_use_inc(&table->use))
return -EMFILE;
- type = nft_obj_type_get(net, objtype);
+ type = nft_obj_type_get(net, objtype, family);
if (IS_ERR(type)) {
err = PTR_ERR(type);
goto err_type;
@@ -9819,6 +9827,7 @@ dead_elem:
struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
{
struct nft_set_elem_catchall *catchall, *next;
+ u64 tstamp = nft_net_tstamp(gc->net);
const struct nft_set *set = gc->set;
struct nft_elem_priv *elem_priv;
struct nft_set_ext *ext;
@@ -9828,7 +9837,7 @@ struct nft_trans_gc *nft_trans_gc_catchall_sync(struct nft_trans_gc *gc)
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
ext = nft_set_elem_ext(set, catchall->elem);
- if (!nft_set_elem_expired(ext))
+ if (!__nft_set_elem_expired(ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -10614,6 +10623,7 @@ static bool nf_tables_valid_genid(struct net *net, u32 genid)
bool genid_ok;
mutex_lock(&nft_net->commit_mutex);
+ nft_net->tstamp = get_jiffies_64();
genid_ok = genid == 0 || nft_net->base_seq == genid;
if (!genid_ok)
@@ -10988,16 +10998,10 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.code = ntohl(nla_get_be32(tb[NFTA_VERDICT_CODE]));
switch (data->verdict.code) {
- default:
- switch (data->verdict.code & NF_VERDICT_MASK) {
- case NF_ACCEPT:
- case NF_DROP:
- case NF_QUEUE:
- break;
- default:
- return -EINVAL;
- }
- fallthrough;
+ case NF_ACCEPT:
+ case NF_DROP:
+ case NF_QUEUE:
+ break;
case NFT_CONTINUE:
case NFT_BREAK:
case NFT_RETURN:
@@ -11032,6 +11036,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
data->verdict.chain = chain;
break;
+ default:
+ return -EINVAL;
}
desc->len = sizeof(data->verdict);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index 171d1f52d3dd..5cf38fc0a366 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -232,18 +232,25 @@ static void nfqnl_reinject(struct nf_queue_entry *entry, unsigned int verdict)
if (verdict == NF_ACCEPT ||
verdict == NF_REPEAT ||
verdict == NF_STOP) {
+ unsigned int ct_verdict = verdict;
+
rcu_read_lock();
ct_hook = rcu_dereference(nf_ct_hook);
if (ct_hook)
- verdict = ct_hook->update(entry->state.net, entry->skb);
+ ct_verdict = ct_hook->update(entry->state.net, entry->skb);
rcu_read_unlock();
- switch (verdict & NF_VERDICT_MASK) {
+ switch (ct_verdict & NF_VERDICT_MASK) {
+ case NF_ACCEPT:
+ /* follow userspace verdict, could be REPEAT */
+ break;
case NF_STOLEN:
nf_queue_entry_free(entry);
return;
+ default:
+ verdict = ct_verdict & NF_VERDICT_MASK;
+ break;
}
-
}
nf_reinject(entry, verdict);
}
diff --git a/net/netfilter/nft_chain_filter.c b/net/netfilter/nft_chain_filter.c
index 680fe557686e..274b6f7e6bb5 100644
--- a/net/netfilter/nft_chain_filter.c
+++ b/net/netfilter/nft_chain_filter.c
@@ -357,9 +357,10 @@ static int nf_tables_netdev_event(struct notifier_block *this,
unsigned long event, void *ptr)
{
struct net_device *dev = netdev_notifier_info_to_dev(ptr);
+ struct nft_base_chain *basechain;
struct nftables_pernet *nft_net;
- struct nft_table *table;
struct nft_chain *chain, *nr;
+ struct nft_table *table;
struct nft_ctx ctx = {
.net = dev_net(dev),
};
@@ -371,7 +372,8 @@ static int nf_tables_netdev_event(struct notifier_block *this,
nft_net = nft_pernet(ctx.net);
mutex_lock(&nft_net->commit_mutex);
list_for_each_entry(table, &nft_net->tables, list) {
- if (table->family != NFPROTO_NETDEV)
+ if (table->family != NFPROTO_NETDEV &&
+ table->family != NFPROTO_INET)
continue;
ctx.family = table->family;
@@ -380,6 +382,11 @@ static int nf_tables_netdev_event(struct notifier_block *this,
if (!nft_is_base_chain(chain))
continue;
+ basechain = nft_base_chain(chain);
+ if (table->family == NFPROTO_INET &&
+ basechain->ops.hooknum != NF_INET_INGRESS)
+ continue;
+
ctx.chain = chain;
nft_netdev_event(event, dev, &ctx);
}
diff --git a/net/netfilter/nft_compat.c b/net/netfilter/nft_compat.c
index 5284cd2ad532..1f9474fefe84 100644
--- a/net/netfilter/nft_compat.c
+++ b/net/netfilter/nft_compat.c
@@ -135,7 +135,7 @@ static void nft_target_eval_bridge(const struct nft_expr *expr,
static const struct nla_policy nft_target_policy[NFTA_TARGET_MAX + 1] = {
[NFTA_TARGET_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_TARGET_REV] = { .type = NLA_U32 },
+ [NFTA_TARGET_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TARGET_INFO] = { .type = NLA_BINARY },
};
@@ -200,6 +200,7 @@ static const struct nla_policy nft_rule_compat_policy[NFTA_RULE_COMPAT_MAX + 1]
static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
{
struct nlattr *tb[NFTA_RULE_COMPAT_MAX+1];
+ u32 l4proto;
u32 flags;
int err;
@@ -212,12 +213,18 @@ static int nft_parse_compat(const struct nlattr *attr, u16 *proto, bool *inv)
return -EINVAL;
flags = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_FLAGS]));
- if (flags & ~NFT_RULE_COMPAT_F_MASK)
+ if (flags & NFT_RULE_COMPAT_F_UNUSED ||
+ flags & ~NFT_RULE_COMPAT_F_MASK)
return -EINVAL;
if (flags & NFT_RULE_COMPAT_F_INV)
*inv = true;
- *proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ l4proto = ntohl(nla_get_be32(tb[NFTA_RULE_COMPAT_PROTO]));
+ if (l4proto > U16_MAX)
+ return -EINVAL;
+
+ *proto = l4proto;
+
return 0;
}
@@ -350,6 +357,12 @@ static int nft_target_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -413,7 +426,7 @@ static void nft_match_eval(const struct nft_expr *expr,
static const struct nla_policy nft_match_policy[NFTA_MATCH_MAX + 1] = {
[NFTA_MATCH_NAME] = { .type = NLA_NUL_STRING },
- [NFTA_MATCH_REV] = { .type = NLA_U32 },
+ [NFTA_MATCH_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_MATCH_INFO] = { .type = NLA_BINARY },
};
@@ -595,6 +608,12 @@ static int nft_match_validate(const struct nft_ctx *ctx,
unsigned int hook_mask = 0;
int ret;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_BRIDGE &&
+ ctx->family != NFPROTO_ARP)
+ return -EOPNOTSUPP;
+
if (nft_is_base_chain(ctx->chain)) {
const struct nft_base_chain *basechain =
nft_base_chain(ctx->chain);
@@ -712,7 +731,7 @@ out_put:
static const struct nla_policy nfnl_compat_policy_get[NFTA_COMPAT_MAX+1] = {
[NFTA_COMPAT_NAME] = { .type = NLA_NUL_STRING,
.len = NFT_COMPAT_NAME_MAX-1 },
- [NFTA_COMPAT_REV] = { .type = NLA_U32 },
+ [NFTA_COMPAT_REV] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_COMPAT_TYPE] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 86bb9d7797d9..bfd3e5a14dab 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -476,6 +476,9 @@ static int nft_ct_get_init(const struct nft_ctx *ctx,
break;
#endif
case NFT_CT_ID:
+ if (tb[NFTA_CT_DIRECTION])
+ return -EINVAL;
+
len = sizeof(u32);
break;
default:
@@ -1250,7 +1253,31 @@ static int nft_ct_expect_obj_init(const struct nft_ctx *ctx,
if (tb[NFTA_CT_EXPECT_L3PROTO])
priv->l3num = ntohs(nla_get_be16(tb[NFTA_CT_EXPECT_L3PROTO]));
+ switch (priv->l3num) {
+ case NFPROTO_IPV4:
+ case NFPROTO_IPV6:
+ if (priv->l3num != ctx->family)
+ return -EINVAL;
+
+ fallthrough;
+ case NFPROTO_INET:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->l4proto = nla_get_u8(tb[NFTA_CT_EXPECT_L4PROTO]);
+ switch (priv->l4proto) {
+ case IPPROTO_TCP:
+ case IPPROTO_UDP:
+ case IPPROTO_UDPLITE:
+ case IPPROTO_DCCP:
+ case IPPROTO_SCTP:
+ break;
+ default:
+ return -EOPNOTSUPP;
+ }
+
priv->dport = nla_get_be16(tb[NFTA_CT_EXPECT_DPORT]);
priv->timeout = nla_get_u32(tb[NFTA_CT_EXPECT_TIMEOUT]);
priv->size = nla_get_u8(tb[NFTA_CT_EXPECT_SIZE]);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index ab3362c483b4..397351fa4d5f 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -384,6 +384,11 @@ static int nft_flow_offload_validate(const struct nft_ctx *ctx,
{
unsigned int hook_mask = (1 << NF_INET_FORWARD);
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, hook_mask);
}
diff --git a/net/netfilter/nft_limit.c b/net/netfilter/nft_limit.c
index 79039afde34e..cefa25e0dbb0 100644
--- a/net/netfilter/nft_limit.c
+++ b/net/netfilter/nft_limit.c
@@ -58,17 +58,19 @@ static inline bool nft_limit_eval(struct nft_limit_priv *priv, u64 cost)
static int nft_limit_init(struct nft_limit_priv *priv,
const struct nlattr * const tb[], bool pkts)
{
+ u64 unit, tokens, rate_with_burst;
bool invert = false;
- u64 unit, tokens;
if (tb[NFTA_LIMIT_RATE] == NULL ||
tb[NFTA_LIMIT_UNIT] == NULL)
return -EINVAL;
priv->rate = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_RATE]));
+ if (priv->rate == 0)
+ return -EINVAL;
+
unit = be64_to_cpu(nla_get_be64(tb[NFTA_LIMIT_UNIT]));
- priv->nsecs = unit * NSEC_PER_SEC;
- if (priv->rate == 0 || priv->nsecs < unit)
+ if (check_mul_overflow(unit, NSEC_PER_SEC, &priv->nsecs))
return -EOVERFLOW;
if (tb[NFTA_LIMIT_BURST])
@@ -77,18 +79,25 @@ static int nft_limit_init(struct nft_limit_priv *priv,
if (pkts && priv->burst == 0)
priv->burst = NFT_LIMIT_PKT_BURST_DEFAULT;
- if (priv->rate + priv->burst < priv->rate)
+ if (check_add_overflow(priv->rate, priv->burst, &rate_with_burst))
return -EOVERFLOW;
if (pkts) {
- tokens = div64_u64(priv->nsecs, priv->rate) * priv->burst;
+ u64 tmp = div64_u64(priv->nsecs, priv->rate);
+
+ if (check_mul_overflow(tmp, priv->burst, &tokens))
+ return -EOVERFLOW;
} else {
+ u64 tmp;
+
/* The token bucket size limits the number of tokens can be
* accumulated. tokens_max specifies the bucket size.
* tokens_max = unit * (rate + burst) / rate.
*/
- tokens = div64_u64(priv->nsecs * (priv->rate + priv->burst),
- priv->rate);
+ if (check_mul_overflow(priv->nsecs, rate_with_burst, &tmp))
+ return -EOVERFLOW;
+
+ tokens = div64_u64(tmp, priv->rate);
}
if (tb[NFTA_LIMIT_FLAGS]) {
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 583885ce7232..808f5802c270 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -143,6 +143,11 @@ static int nft_nat_validate(const struct nft_ctx *ctx,
struct nft_nat *priv = nft_expr_priv(expr);
int err;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
err = nft_chain_validate_dependency(ctx->chain, NFT_CHAIN_T_NAT);
if (err < 0)
return err;
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 35a2c28caa60..24d977138572 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -166,6 +166,11 @@ static int nft_rt_validate(const struct nft_ctx *ctx, const struct nft_expr *exp
const struct nft_rt *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->key) {
case NFT_RT_NEXTHOP4:
case NFT_RT_NEXTHOP6:
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 6c2061bfdae6..6968a3b34236 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -36,6 +36,7 @@ struct nft_rhash_cmp_arg {
const struct nft_set *set;
const u32 *key;
u8 genmask;
+ u64 tstamp;
};
static inline u32 nft_rhash_key(const void *data, u32 len, u32 seed)
@@ -62,7 +63,7 @@ static inline int nft_rhash_cmp(struct rhashtable_compare_arg *arg,
return 1;
if (nft_set_elem_is_dead(&he->ext))
return 1;
- if (nft_set_elem_expired(&he->ext))
+ if (__nft_set_elem_expired(&he->ext, x->tstamp))
return 1;
if (!nft_set_elem_active(&he->ext, x->genmask))
return 1;
@@ -87,6 +88,7 @@ bool nft_rhash_lookup(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -106,6 +108,7 @@ nft_rhash_get(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_cur(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -131,6 +134,7 @@ static bool nft_rhash_update(struct nft_set *set, const u32 *key,
.genmask = NFT_GENMASK_ANY,
.set = set,
.key = key,
+ .tstamp = get_jiffies_64(),
};
he = rhashtable_lookup(&priv->ht, &arg, nft_rhash_params);
@@ -175,6 +179,7 @@ static int nft_rhash_insert(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
struct nft_rhash_elem *prev;
@@ -216,6 +221,7 @@ nft_rhash_deactivate(const struct net *net, const struct nft_set *set,
.genmask = nft_genmask_next(net),
.set = set,
.key = elem->key.val.data,
+ .tstamp = nft_net_tstamp(net),
};
rcu_read_lock();
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index efd523496be4..aa1d9e93a9a0 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -342,9 +342,6 @@
#include "nft_set_pipapo_avx2.h"
#include "nft_set_pipapo.h"
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_scratch_index);
-
/**
* pipapo_refill() - For each set bit, set bits from selected mapping table item
* @map: Bitmap to be scanned for set bits
@@ -412,6 +409,7 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_scratch *scratch;
unsigned long *res_map, *fill_map;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
@@ -422,15 +420,17 @@ bool nft_pipapo_lookup(const struct net *net, const struct nft_set *set,
local_bh_disable();
- map_index = raw_cpu_read(nft_pipapo_scratch_index);
-
m = rcu_dereference(priv->match);
if (unlikely(!m || !*raw_cpu_ptr(m->scratch)))
goto out;
- res_map = *raw_cpu_ptr(m->scratch) + (map_index ? m->bsize_max : 0);
- fill_map = *raw_cpu_ptr(m->scratch) + (map_index ? 0 : m->bsize_max);
+ scratch = *raw_cpu_ptr(m->scratch);
+
+ map_index = scratch->map_index;
+
+ res_map = scratch->map + (map_index ? m->bsize_max : 0);
+ fill_map = scratch->map + (map_index ? 0 : m->bsize_max);
memset(res_map, 0xff, m->bsize_max * sizeof(*res_map));
@@ -460,7 +460,7 @@ next_match:
b = pipapo_refill(res_map, f->bsize, f->rules, fill_map, f->mt,
last);
if (b < 0) {
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return false;
@@ -477,7 +477,7 @@ next_match:
* current inactive bitmap is clean and can be reused as
* *next* bitmap (not initial) for the next packet.
*/
- raw_cpu_write(nft_pipapo_scratch_index, map_index);
+ scratch->map_index = map_index;
local_bh_enable();
return true;
@@ -504,6 +504,7 @@ out:
* @set: nftables API set representation
* @data: Key data to be matched against existing elements
* @genmask: If set, check that element is active in given genmask
+ * @tstamp: timestamp to check for expired elements
*
* This is essentially the same as the lookup function, except that it matches
* key data against the uncommitted copy and doesn't use preallocated maps for
@@ -513,7 +514,8 @@ out:
*/
static struct nft_pipapo_elem *pipapo_get(const struct net *net,
const struct nft_set *set,
- const u8 *data, u8 genmask)
+ const u8 *data, u8 genmask,
+ u64 tstamp)
{
struct nft_pipapo_elem *ret = ERR_PTR(-ENOENT);
struct nft_pipapo *priv = nft_set_priv(set);
@@ -566,7 +568,7 @@ next_match:
goto out;
if (last) {
- if (nft_set_elem_expired(&f->mt[b].e->ext))
+ if (__nft_set_elem_expired(&f->mt[b].e->ext, tstamp))
goto next_match;
if ((genmask &&
!nft_set_elem_active(&f->mt[b].e->ext, genmask)))
@@ -603,10 +605,10 @@ static struct nft_elem_priv *
nft_pipapo_get(const struct net *net, const struct nft_set *set,
const struct nft_set_elem *elem, unsigned int flags)
{
- static struct nft_pipapo_elem *e;
+ struct nft_pipapo_elem *e;
e = pipapo_get(net, set, (const u8 *)elem->key.val.data,
- nft_genmask_cur(net));
+ nft_genmask_cur(net), get_jiffies_64());
if (IS_ERR(e))
return ERR_CAST(e);
@@ -1109,6 +1111,25 @@ static void pipapo_map(struct nft_pipapo_match *m,
}
/**
+ * pipapo_free_scratch() - Free per-CPU map at original (not aligned) address
+ * @m: Matching data
+ * @cpu: CPU number
+ */
+static void pipapo_free_scratch(const struct nft_pipapo_match *m, unsigned int cpu)
+{
+ struct nft_pipapo_scratch *s;
+ void *mem;
+
+ s = *per_cpu_ptr(m->scratch, cpu);
+ if (!s)
+ return;
+
+ mem = s;
+ mem -= s->align_off;
+ kfree(mem);
+}
+
+/**
* pipapo_realloc_scratch() - Reallocate scratch maps for partial match results
* @clone: Copy of matching data with pending insertions and deletions
* @bsize_max: Maximum bucket size, scratch maps cover two buckets
@@ -1121,12 +1142,13 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
int i;
for_each_possible_cpu(i) {
- unsigned long *scratch;
+ struct nft_pipapo_scratch *scratch;
#ifdef NFT_PIPAPO_ALIGN
- unsigned long *scratch_aligned;
+ void *scratch_aligned;
+ u32 align_off;
#endif
-
- scratch = kzalloc_node(bsize_max * sizeof(*scratch) * 2 +
+ scratch = kzalloc_node(struct_size(scratch, map,
+ bsize_max * 2) +
NFT_PIPAPO_ALIGN_HEADROOM,
GFP_KERNEL, cpu_to_node(i));
if (!scratch) {
@@ -1140,14 +1162,25 @@ static int pipapo_realloc_scratch(struct nft_pipapo_match *clone,
return -ENOMEM;
}
- kfree(*per_cpu_ptr(clone->scratch, i));
-
- *per_cpu_ptr(clone->scratch, i) = scratch;
+ pipapo_free_scratch(clone, i);
#ifdef NFT_PIPAPO_ALIGN
- scratch_aligned = NFT_PIPAPO_LT_ALIGN(scratch);
- *per_cpu_ptr(clone->scratch_aligned, i) = scratch_aligned;
+ /* Align &scratch->map (not the struct itself): the extra
+ * %NFT_PIPAPO_ALIGN_HEADROOM bytes passed to kzalloc_node()
+ * above guarantee we can waste up to those bytes in order
+ * to align the map field regardless of its offset within
+ * the struct.
+ */
+ BUILD_BUG_ON(offsetof(struct nft_pipapo_scratch, map) > NFT_PIPAPO_ALIGN_HEADROOM);
+
+ scratch_aligned = NFT_PIPAPO_LT_ALIGN(&scratch->map);
+ scratch_aligned -= offsetof(struct nft_pipapo_scratch, map);
+ align_off = scratch_aligned - (void *)scratch;
+
+ scratch = scratch_aligned;
+ scratch->align_off = align_off;
#endif
+ *per_cpu_ptr(clone->scratch, i) = scratch;
}
return 0;
@@ -1173,6 +1206,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
struct nft_pipapo_match *m = priv->clone;
u8 genmask = nft_genmask_next(net);
struct nft_pipapo_elem *e, *dup;
+ u64 tstamp = nft_net_tstamp(net);
struct nft_pipapo_field *f;
const u8 *start_p, *end_p;
int i, bsize_max, err = 0;
@@ -1182,7 +1216,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
else
end = start;
- dup = pipapo_get(net, set, start, genmask);
+ dup = pipapo_get(net, set, start, genmask, tstamp);
if (!IS_ERR(dup)) {
/* Check if we already have the same exact entry */
const struct nft_data *dup_key, *dup_end;
@@ -1204,7 +1238,7 @@ static int nft_pipapo_insert(const struct net *net, const struct nft_set *set,
if (PTR_ERR(dup) == -ENOENT) {
/* Look for partially overlapping entries */
- dup = pipapo_get(net, set, end, nft_genmask_next(net));
+ dup = pipapo_get(net, set, end, nft_genmask_next(net), tstamp);
}
if (PTR_ERR(dup) != -ENOENT) {
@@ -1301,11 +1335,6 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
if (!new->scratch)
goto out_scratch;
-#ifdef NFT_PIPAPO_ALIGN
- new->scratch_aligned = alloc_percpu(*new->scratch_aligned);
- if (!new->scratch_aligned)
- goto out_scratch;
-#endif
for_each_possible_cpu(i)
*per_cpu_ptr(new->scratch, i) = NULL;
@@ -1357,10 +1386,7 @@ out_lt:
}
out_scratch_realloc:
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(new->scratch, i));
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(new->scratch_aligned);
-#endif
+ pipapo_free_scratch(new, i);
out_scratch:
free_percpu(new->scratch);
kfree(new);
@@ -1560,6 +1586,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
int rules_f0, first_rule = 0;
struct nft_pipapo_elem *e;
struct nft_trans_gc *gc;
@@ -1594,7 +1621,7 @@ static void pipapo_gc(struct nft_set *set, struct nft_pipapo_match *m)
/* synchronous gc never fails, there is no need to set on
* NFT_SET_ELEM_DEAD_BIT.
*/
- if (nft_set_elem_expired(&e->ext)) {
+ if (__nft_set_elem_expired(&e->ext, tstamp)) {
priv->dirty = true;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
@@ -1640,13 +1667,9 @@ static void pipapo_free_match(struct nft_pipapo_match *m)
int i;
for_each_possible_cpu(i)
- kfree(*per_cpu_ptr(m->scratch, i));
+ pipapo_free_scratch(m, i);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
-
pipapo_free_fields(m);
kfree(m);
@@ -1769,7 +1792,7 @@ static void *pipapo_deactivate(const struct net *net, const struct nft_set *set,
{
struct nft_pipapo_elem *e;
- e = pipapo_get(net, set, data, nft_genmask_next(net));
+ e = pipapo_get(net, set, data, nft_genmask_next(net), nft_net_tstamp(net));
if (IS_ERR(e))
return NULL;
@@ -2132,7 +2155,7 @@ static int nft_pipapo_init(const struct nft_set *set,
m->field_count = field_count;
m->bsize_max = 0;
- m->scratch = alloc_percpu(unsigned long *);
+ m->scratch = alloc_percpu(struct nft_pipapo_scratch *);
if (!m->scratch) {
err = -ENOMEM;
goto out_scratch;
@@ -2140,16 +2163,6 @@ static int nft_pipapo_init(const struct nft_set *set,
for_each_possible_cpu(i)
*per_cpu_ptr(m->scratch, i) = NULL;
-#ifdef NFT_PIPAPO_ALIGN
- m->scratch_aligned = alloc_percpu(unsigned long *);
- if (!m->scratch_aligned) {
- err = -ENOMEM;
- goto out_free;
- }
- for_each_possible_cpu(i)
- *per_cpu_ptr(m->scratch_aligned, i) = NULL;
-#endif
-
rcu_head_init(&m->rcu);
nft_pipapo_for_each_field(f, i, m) {
@@ -2180,9 +2193,6 @@ static int nft_pipapo_init(const struct nft_set *set,
return 0;
out_free:
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
free_percpu(m->scratch);
out_scratch:
kfree(m);
@@ -2236,11 +2246,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(m->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(m->scratch, cpu));
+ pipapo_free_scratch(m, cpu);
free_percpu(m->scratch);
pipapo_free_fields(m);
kfree(m);
@@ -2253,11 +2260,8 @@ static void nft_pipapo_destroy(const struct nft_ctx *ctx,
if (priv->dirty)
nft_set_pipapo_match_destroy(ctx, set, m);
-#ifdef NFT_PIPAPO_ALIGN
- free_percpu(priv->clone->scratch_aligned);
-#endif
for_each_possible_cpu(cpu)
- kfree(*per_cpu_ptr(priv->clone->scratch, cpu));
+ pipapo_free_scratch(priv->clone, cpu);
free_percpu(priv->clone->scratch);
pipapo_free_fields(priv->clone);
diff --git a/net/netfilter/nft_set_pipapo.h b/net/netfilter/nft_set_pipapo.h
index 1040223da5fa..f59a0cd81105 100644
--- a/net/netfilter/nft_set_pipapo.h
+++ b/net/netfilter/nft_set_pipapo.h
@@ -131,20 +131,28 @@ struct nft_pipapo_field {
};
/**
+ * struct nft_pipapo_scratch - percpu data used for lookup and matching
+ * @map_index: Current working bitmap index, toggled between field matches
+ * @align_off: Offset to get the originally allocated address
+ * @map: store partial matching results during lookup
+ */
+struct nft_pipapo_scratch {
+ u8 map_index;
+ u32 align_off;
+ unsigned long map[];
+};
+
+/**
* struct nft_pipapo_match - Data used for lookup and matching
* @field_count Amount of fields in set
* @scratch: Preallocated per-CPU maps for partial matching results
- * @scratch_aligned: Version of @scratch aligned to NFT_PIPAPO_ALIGN bytes
* @bsize_max: Maximum lookup table bucket size of all fields, in longs
* @rcu Matching data is swapped on commits
* @f: Fields, with lookup and mapping tables
*/
struct nft_pipapo_match {
int field_count;
-#ifdef NFT_PIPAPO_ALIGN
- unsigned long * __percpu *scratch_aligned;
-#endif
- unsigned long * __percpu *scratch;
+ struct nft_pipapo_scratch * __percpu *scratch;
size_t bsize_max;
struct rcu_head rcu;
struct nft_pipapo_field f[] __counted_by(field_count);
diff --git a/net/netfilter/nft_set_pipapo_avx2.c b/net/netfilter/nft_set_pipapo_avx2.c
index 52e0d026d30a..a3a8ddca9918 100644
--- a/net/netfilter/nft_set_pipapo_avx2.c
+++ b/net/netfilter/nft_set_pipapo_avx2.c
@@ -57,7 +57,7 @@
/* Jump to label if @reg is zero */
#define NFT_PIPAPO_AVX2_NOMATCH_GOTO(reg, label) \
- asm_volatile_goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
+ asm goto("vptest %%ymm" #reg ", %%ymm" #reg ";" \
"je %l[" #label "]" : : : : label)
/* Store 256 bits from YMM register into memory. Contrary to bucket load
@@ -71,9 +71,6 @@
#define NFT_PIPAPO_AVX2_ZERO(reg) \
asm volatile("vpxor %ymm" #reg ", %ymm" #reg ", %ymm" #reg)
-/* Current working bitmap index, toggled between field matches */
-static DEFINE_PER_CPU(bool, nft_pipapo_avx2_scratch_index);
-
/**
* nft_pipapo_avx2_prepare() - Prepare before main algorithm body
*
@@ -1120,11 +1117,12 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
const u32 *key, const struct nft_set_ext **ext)
{
struct nft_pipapo *priv = nft_set_priv(set);
- unsigned long *res, *fill, *scratch;
+ struct nft_pipapo_scratch *scratch;
u8 genmask = nft_genmask_cur(net);
const u8 *rp = (const u8 *)key;
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
+ unsigned long *res, *fill;
bool map_index;
int i, ret = 0;
@@ -1141,15 +1139,16 @@ bool nft_pipapo_avx2_lookup(const struct net *net, const struct nft_set *set,
*/
kernel_fpu_begin_mask(0);
- scratch = *raw_cpu_ptr(m->scratch_aligned);
+ scratch = *raw_cpu_ptr(m->scratch);
if (unlikely(!scratch)) {
kernel_fpu_end();
return false;
}
- map_index = raw_cpu_read(nft_pipapo_avx2_scratch_index);
- res = scratch + (map_index ? m->bsize_max : 0);
- fill = scratch + (map_index ? 0 : m->bsize_max);
+ map_index = scratch->map_index;
+
+ res = scratch->map + (map_index ? m->bsize_max : 0);
+ fill = scratch->map + (map_index ? 0 : m->bsize_max);
/* Starting map doesn't need to be set for this implementation */
@@ -1221,7 +1220,7 @@ next_match:
out:
if (i % 2)
- raw_cpu_write(nft_pipapo_avx2_scratch_index, !map_index);
+ scratch->map_index = !map_index;
kernel_fpu_end();
return ret >= 0;
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index baa3fea4fe65..9944fe479e53 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -234,7 +234,7 @@ static void nft_rbtree_gc_elem_remove(struct net *net, struct nft_set *set,
static const struct nft_rbtree_elem *
nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
- struct nft_rbtree_elem *rbe, u8 genmask)
+ struct nft_rbtree_elem *rbe)
{
struct nft_set *set = (struct nft_set *)__set;
struct rb_node *prev = rb_prev(&rbe->node);
@@ -253,7 +253,7 @@ nft_rbtree_gc_elem(const struct nft_set *__set, struct nft_rbtree *priv,
while (prev) {
rbe_prev = rb_entry(prev, struct nft_rbtree_elem, node);
if (nft_rbtree_interval_end(rbe_prev) &&
- nft_set_elem_active(&rbe_prev->ext, genmask))
+ nft_set_elem_active(&rbe_prev->ext, NFT_GENMASK_ANY))
break;
prev = rb_prev(prev);
@@ -313,6 +313,7 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
struct nft_rbtree *priv = nft_set_priv(set);
u8 cur_genmask = nft_genmask_cur(net);
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
/* Descend the tree to search for an existing element greater than the
@@ -360,11 +361,11 @@ static int __nft_rbtree_insert(const struct net *net, const struct nft_set *set,
/* perform garbage collection to avoid bogus overlap reports
* but skip new elements in this transaction.
*/
- if (nft_set_elem_expired(&rbe->ext) &&
+ if (__nft_set_elem_expired(&rbe->ext, tstamp) &&
nft_set_elem_active(&rbe->ext, cur_genmask)) {
const struct nft_rbtree_elem *removed_end;
- removed_end = nft_rbtree_gc_elem(set, priv, rbe, genmask);
+ removed_end = nft_rbtree_gc_elem(set, priv, rbe);
if (IS_ERR(removed_end))
return PTR_ERR(removed_end);
@@ -551,6 +552,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
const struct nft_rbtree *priv = nft_set_priv(set);
const struct rb_node *parent = priv->root.rb_node;
u8 genmask = nft_genmask_next(net);
+ u64 tstamp = nft_net_tstamp(net);
int d;
while (parent != NULL) {
@@ -571,7 +573,7 @@ nft_rbtree_deactivate(const struct net *net, const struct nft_set *set,
nft_rbtree_interval_end(this)) {
parent = parent->rb_right;
continue;
- } else if (nft_set_elem_expired(&rbe->ext)) {
+ } else if (__nft_set_elem_expired(&rbe->ext, tstamp)) {
break;
} else if (!nft_set_elem_active(&rbe->ext, genmask)) {
parent = parent->rb_left;
@@ -624,9 +626,10 @@ static void nft_rbtree_gc(struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe, *rbe_end = NULL;
+ struct net *net = read_pnet(&set->net);
+ u64 tstamp = nft_net_tstamp(net);
struct rb_node *node, *next;
struct nft_trans_gc *gc;
- struct net *net;
set = nft_set_container_of(priv);
net = read_pnet(&set->net);
@@ -648,7 +651,7 @@ static void nft_rbtree_gc(struct nft_set *set)
rbe_end = rbe;
continue;
}
- if (!nft_set_elem_expired(&rbe->ext))
+ if (!__nft_set_elem_expired(&rbe->ext, tstamp))
continue;
gc = nft_trans_gc_queue_sync(gc, GFP_KERNEL);
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 9ed85be79452..f30163e2ca62 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -242,6 +242,11 @@ static int nft_socket_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain,
(1 << NF_INET_PRE_ROUTING) |
(1 << NF_INET_LOCAL_IN) |
diff --git a/net/netfilter/nft_synproxy.c b/net/netfilter/nft_synproxy.c
index 13da882669a4..1d737f89dfc1 100644
--- a/net/netfilter/nft_synproxy.c
+++ b/net/netfilter/nft_synproxy.c
@@ -186,7 +186,6 @@ static int nft_synproxy_do_init(const struct nft_ctx *ctx,
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
err = nf_synproxy_ipv4_init(snet, ctx->net);
if (err)
goto nf_ct_failure;
@@ -219,7 +218,6 @@ static void nft_synproxy_do_destroy(const struct nft_ctx *ctx)
break;
#endif
case NFPROTO_INET:
- case NFPROTO_BRIDGE:
nf_synproxy_ipv4_fini(snet, ctx->net);
nf_synproxy_ipv6_fini(snet, ctx->net);
break;
@@ -253,6 +251,11 @@ static int nft_synproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, (1 << NF_INET_LOCAL_IN) |
(1 << NF_INET_FORWARD));
}
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ae15cd693f0e..71412adb73d4 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -316,6 +316,11 @@ static int nft_tproxy_validate(const struct nft_ctx *ctx,
const struct nft_expr *expr,
const struct nft_data **data)
{
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
return nft_chain_validate_hooks(ctx->chain, 1 << NF_INET_PRE_ROUTING);
}
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index 9f21953c7433..f735d79d8be5 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -713,6 +713,7 @@ static const struct nft_object_ops nft_tunnel_obj_ops = {
static struct nft_object_type nft_tunnel_obj_type __read_mostly = {
.type = NFT_OBJECT_TUNNEL,
+ .family = NFPROTO_NETDEV,
.ops = &nft_tunnel_obj_ops,
.maxattr = NFTA_TUNNEL_KEY_MAX,
.policy = nft_tunnel_key_policy,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index 452f8587adda..1c866757db55 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -235,6 +235,11 @@ static int nft_xfrm_validate(const struct nft_ctx *ctx, const struct nft_expr *e
const struct nft_xfrm *priv = nft_expr_priv(expr);
unsigned int hooks;
+ if (ctx->family != NFPROTO_IPV4 &&
+ ctx->family != NFPROTO_IPV6 &&
+ ctx->family != NFPROTO_INET)
+ return -EOPNOTSUPP;
+
switch (priv->dir) {
case XFRM_POLICY_IN:
hooks = (1 << NF_INET_FORWARD) |
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 4ed8ffd58ff3..9c962347cf85 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -374,7 +374,7 @@ static void netlink_skb_destructor(struct sk_buff *skb)
if (is_vmalloc_addr(skb->head)) {
if (!skb->cloned ||
!atomic_dec_return(&(skb_shinfo(skb)->dataref)))
- vfree(skb->head);
+ vfree_atomic(skb->head);
skb->head = NULL;
}
diff --git a/net/nfc/nci/core.c b/net/nfc/nci/core.c
index 97348cedb16b..cdad47b140fa 100644
--- a/net/nfc/nci/core.c
+++ b/net/nfc/nci/core.c
@@ -1208,6 +1208,10 @@ void nci_free_device(struct nci_dev *ndev)
{
nfc_free_device(ndev->nfc_dev);
nci_hci_deallocate(ndev);
+
+ /* drop partial rx data packet if present */
+ if (ndev->rx_data_reassembly)
+ kfree_skb(ndev->rx_data_reassembly);
kfree(ndev);
}
EXPORT_SYMBOL(nci_free_device);
diff --git a/net/rds/af_rds.c b/net/rds/af_rds.c
index 01c4cdfef45d..8435a20968ef 100644
--- a/net/rds/af_rds.c
+++ b/net/rds/af_rds.c
@@ -419,7 +419,7 @@ static int rds_recv_track_latency(struct rds_sock *rs, sockptr_t optval,
rs->rs_rx_traces = trace.rx_traces;
for (i = 0; i < rs->rs_rx_traces; i++) {
- if (trace.rx_trace_pos[i] > RDS_MSG_RX_DGRAM_TRACE_MAX) {
+ if (trace.rx_trace_pos[i] >= RDS_MSG_RX_DGRAM_TRACE_MAX) {
rs->rs_rx_traces = 0;
return -EFAULT;
}
diff --git a/net/rxrpc/ar-internal.h b/net/rxrpc/ar-internal.h
index dbeb75c29857..7818aae1be8e 100644
--- a/net/rxrpc/ar-internal.h
+++ b/net/rxrpc/ar-internal.h
@@ -199,11 +199,19 @@ struct rxrpc_host_header {
*/
struct rxrpc_skb_priv {
struct rxrpc_connection *conn; /* Connection referred to (poke packet) */
- u16 offset; /* Offset of data */
- u16 len; /* Length of data */
- u8 flags;
+ union {
+ struct {
+ u16 offset; /* Offset of data */
+ u16 len; /* Length of data */
+ u8 flags;
#define RXRPC_RX_VERIFIED 0x01
-
+ };
+ struct {
+ rxrpc_seq_t first_ack; /* First packet in acks table */
+ u8 nr_acks; /* Number of acks+nacks */
+ u8 nr_nacks; /* Number of nacks */
+ };
+ };
struct rxrpc_host_header hdr; /* RxRPC packet header from this packet */
};
@@ -510,7 +518,7 @@ struct rxrpc_connection {
enum rxrpc_call_completion completion; /* Completion condition */
s32 abort_code; /* Abort code of connection abort */
int debug_id; /* debug ID for printks */
- atomic_t serial; /* packet serial number counter */
+ rxrpc_serial_t tx_serial; /* Outgoing packet serial number counter */
unsigned int hi_serial; /* highest serial number received */
u32 service_id; /* Service ID, possibly upgraded */
u32 security_level; /* Security level selected */
@@ -692,11 +700,11 @@ struct rxrpc_call {
u8 cong_dup_acks; /* Count of ACKs showing missing packets */
u8 cong_cumul_acks; /* Cumulative ACK count */
ktime_t cong_tstamp; /* Last time cwnd was changed */
+ struct sk_buff *cong_last_nack; /* Last ACK with nacks received */
/* Receive-phase ACK management (ACKs we send). */
u8 ackr_reason; /* reason to ACK */
u16 ackr_sack_base; /* Starting slot in SACK table ring */
- rxrpc_serial_t ackr_serial; /* serial of packet being ACK'd */
rxrpc_seq_t ackr_window; /* Base of SACK window */
rxrpc_seq_t ackr_wtop; /* Base of SACK window */
unsigned int ackr_nr_unacked; /* Number of unacked packets */
@@ -730,7 +738,8 @@ struct rxrpc_call {
struct rxrpc_ack_summary {
u16 nr_acks; /* Number of ACKs in packet */
u16 nr_new_acks; /* Number of new ACKs in packet */
- u16 nr_rot_new_acks; /* Number of rotated new ACKs */
+ u16 nr_new_nacks; /* Number of new nacks in packet */
+ u16 nr_retained_nacks; /* Number of nacks retained between ACKs */
u8 ack_reason;
bool saw_nacks; /* Saw NACKs in packet */
bool new_low_nack; /* T if new low NACK found */
@@ -823,6 +832,20 @@ static inline bool rxrpc_sending_to_client(const struct rxrpc_txbuf *txb)
#include <trace/events/rxrpc.h>
/*
+ * Allocate the next serial number on a connection. 0 must be skipped.
+ */
+static inline rxrpc_serial_t rxrpc_get_next_serial(struct rxrpc_connection *conn)
+{
+ rxrpc_serial_t serial;
+
+ serial = conn->tx_serial;
+ if (serial == 0)
+ serial = 1;
+ conn->tx_serial = serial + 1;
+ return serial;
+}
+
+/*
* af_rxrpc.c
*/
extern atomic_t rxrpc_n_rx_skbs;
diff --git a/net/rxrpc/call_event.c b/net/rxrpc/call_event.c
index e363f21a2014..0f78544d043b 100644
--- a/net/rxrpc/call_event.c
+++ b/net/rxrpc/call_event.c
@@ -43,8 +43,6 @@ void rxrpc_propose_delay_ACK(struct rxrpc_call *call, rxrpc_serial_t serial,
unsigned long expiry = rxrpc_soft_ack_delay;
unsigned long now = jiffies, ack_at;
- call->ackr_serial = serial;
-
if (rxrpc_soft_ack_delay < expiry)
expiry = rxrpc_soft_ack_delay;
if (call->peer->srtt_us != 0)
@@ -114,6 +112,7 @@ static void rxrpc_congestion_timeout(struct rxrpc_call *call)
void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
{
struct rxrpc_ackpacket *ack = NULL;
+ struct rxrpc_skb_priv *sp;
struct rxrpc_txbuf *txb;
unsigned long resend_at;
rxrpc_seq_t transmitted = READ_ONCE(call->tx_transmitted);
@@ -141,14 +140,15 @@ void rxrpc_resend(struct rxrpc_call *call, struct sk_buff *ack_skb)
* explicitly NAK'd packets.
*/
if (ack_skb) {
+ sp = rxrpc_skb(ack_skb);
ack = (void *)ack_skb->data + sizeof(struct rxrpc_wire_header);
- for (i = 0; i < ack->nAcks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
rxrpc_seq_t seq;
if (ack->acks[i] & 1)
continue;
- seq = ntohl(ack->firstPacket) + i;
+ seq = sp->first_ack + i;
if (after(txb->seq, transmitted))
break;
if (after(txb->seq, seq))
@@ -373,7 +373,6 @@ static void rxrpc_send_initial_ping(struct rxrpc_call *call)
bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
{
unsigned long now, next, t;
- rxrpc_serial_t ackr_serial;
bool resend = false, expired = false;
s32 abort_code;
@@ -423,8 +422,7 @@ bool rxrpc_input_call_event(struct rxrpc_call *call, struct sk_buff *skb)
if (time_after_eq(now, t)) {
trace_rxrpc_timer(call, rxrpc_timer_exp_ack, now);
cmpxchg(&call->delay_ack_at, t, now + MAX_JIFFY_OFFSET);
- ackr_serial = xchg(&call->ackr_serial, 0);
- rxrpc_send_ACK(call, RXRPC_ACK_DELAY, ackr_serial,
+ rxrpc_send_ACK(call, RXRPC_ACK_DELAY, 0,
rxrpc_propose_ack_ping_for_lost_ack);
}
diff --git a/net/rxrpc/call_object.c b/net/rxrpc/call_object.c
index 0943e54370ba..9fc9a6c3f685 100644
--- a/net/rxrpc/call_object.c
+++ b/net/rxrpc/call_object.c
@@ -686,6 +686,7 @@ static void rxrpc_destroy_call(struct work_struct *work)
del_timer_sync(&call->timer);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
rxrpc_cleanup_ring(call);
while ((txb = list_first_entry_or_null(&call->tx_sendmsg,
struct rxrpc_txbuf, call_link))) {
diff --git a/net/rxrpc/conn_event.c b/net/rxrpc/conn_event.c
index 95f4bc206b3d..1f251d758cb9 100644
--- a/net/rxrpc/conn_event.c
+++ b/net/rxrpc/conn_event.c
@@ -95,6 +95,14 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
_enter("%d", conn->debug_id);
+ if (sp && sp->hdr.type == RXRPC_PACKET_TYPE_ACK) {
+ if (skb_copy_bits(skb, sizeof(struct rxrpc_wire_header),
+ &pkt.ack, sizeof(pkt.ack)) < 0)
+ return;
+ if (pkt.ack.reason == RXRPC_ACK_PING_RESPONSE)
+ return;
+ }
+
chan = &conn->channels[channel];
/* If the last call got moved on whilst we were waiting to run, just
@@ -117,7 +125,7 @@ void rxrpc_conn_retransmit_call(struct rxrpc_connection *conn,
iov[2].iov_base = &ack_info;
iov[2].iov_len = sizeof(ack_info);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.epoch = htonl(conn->proto.epoch);
pkt.whdr.cid = htonl(conn->proto.cid | channel);
diff --git a/net/rxrpc/input.c b/net/rxrpc/input.c
index 92495e73b869..9691de00ade7 100644
--- a/net/rxrpc/input.c
+++ b/net/rxrpc/input.c
@@ -45,11 +45,9 @@ static void rxrpc_congestion_management(struct rxrpc_call *call,
}
cumulative_acks += summary->nr_new_acks;
- cumulative_acks += summary->nr_rot_new_acks;
if (cumulative_acks > 255)
cumulative_acks = 255;
- summary->mode = call->cong_mode;
summary->cwnd = call->cong_cwnd;
summary->ssthresh = call->cong_ssthresh;
summary->cumulative_acks = cumulative_acks;
@@ -151,6 +149,7 @@ out_no_clear_ca:
cwnd = RXRPC_TX_MAX_WINDOW;
call->cong_cwnd = cwnd;
call->cong_cumul_acks = cumulative_acks;
+ summary->mode = call->cong_mode;
trace_rxrpc_congest(call, summary, acked_serial, change);
if (resend)
rxrpc_resend(call, skb);
@@ -213,7 +212,6 @@ static bool rxrpc_rotate_tx_window(struct rxrpc_call *call, rxrpc_seq_t to,
list_for_each_entry_rcu(txb, &call->tx_buffer, call_link, false) {
if (before_eq(txb->seq, call->acks_hard_ack))
continue;
- summary->nr_rot_new_acks++;
if (test_bit(RXRPC_TXBUF_LAST, &txb->flags)) {
set_bit(RXRPC_CALL_TX_LAST, &call->flags);
rot_last = true;
@@ -254,6 +252,11 @@ static void rxrpc_end_tx_phase(struct rxrpc_call *call, bool reply_begun,
{
ASSERT(test_bit(RXRPC_CALL_TX_LAST, &call->flags));
+ if (unlikely(call->cong_last_nack)) {
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ }
+
switch (__rxrpc_call_state(call)) {
case RXRPC_CALL_CLIENT_SEND_REQUEST:
case RXRPC_CALL_CLIENT_AWAIT_REPLY:
@@ -703,6 +706,43 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
}
/*
+ * Determine how many nacks from the previous ACK have now been satisfied.
+ */
+static rxrpc_seq_t rxrpc_input_check_prev_ack(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ rxrpc_seq_t seq)
+{
+ struct sk_buff *skb = call->cong_last_nack;
+ struct rxrpc_ackpacket ack;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, new_acks = 0, retained_nacks = 0;
+ rxrpc_seq_t old_seq = sp->first_ack;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(ack);
+
+ if (after_eq(seq, old_seq + sp->nr_acks)) {
+ summary->nr_new_acks += sp->nr_nacks;
+ summary->nr_new_acks += seq - (old_seq + sp->nr_acks);
+ summary->nr_retained_nacks = 0;
+ } else if (seq == old_seq) {
+ summary->nr_retained_nacks = sp->nr_nacks;
+ } else {
+ for (i = 0; i < sp->nr_acks; i++) {
+ if (acks[i] == RXRPC_ACK_TYPE_NACK) {
+ if (before(old_seq + i, seq))
+ new_acks++;
+ else
+ retained_nacks++;
+ }
+ }
+
+ summary->nr_new_acks += new_acks;
+ summary->nr_retained_nacks = retained_nacks;
+ }
+
+ return old_seq + sp->nr_acks;
+}
+
+/*
* Process individual soft ACKs.
*
* Each ACK in the array corresponds to one packet and can be either an ACK or
@@ -711,25 +751,51 @@ static void rxrpc_input_ackinfo(struct rxrpc_call *call, struct sk_buff *skb,
* the timer on the basis that the peer might just not have processed them at
* the time the ACK was sent.
*/
-static void rxrpc_input_soft_acks(struct rxrpc_call *call, u8 *acks,
- rxrpc_seq_t seq, int nr_acks,
- struct rxrpc_ack_summary *summary)
+static void rxrpc_input_soft_acks(struct rxrpc_call *call,
+ struct rxrpc_ack_summary *summary,
+ struct sk_buff *skb,
+ rxrpc_seq_t seq,
+ rxrpc_seq_t since)
{
- unsigned int i;
+ struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
+ unsigned int i, old_nacks = 0;
+ rxrpc_seq_t lowest_nak = seq + sp->nr_acks;
+ u8 *acks = skb->data + sizeof(struct rxrpc_wire_header) + sizeof(struct rxrpc_ackpacket);
- for (i = 0; i < nr_acks; i++) {
+ for (i = 0; i < sp->nr_acks; i++) {
if (acks[i] == RXRPC_ACK_TYPE_ACK) {
summary->nr_acks++;
- summary->nr_new_acks++;
+ if (after_eq(seq, since))
+ summary->nr_new_acks++;
} else {
- if (!summary->saw_nacks &&
- call->acks_lowest_nak != seq + i) {
- call->acks_lowest_nak = seq + i;
- summary->new_low_nack = true;
- }
summary->saw_nacks = true;
+ if (before(seq, since)) {
+ /* Overlap with previous ACK */
+ old_nacks++;
+ } else {
+ summary->nr_new_nacks++;
+ sp->nr_nacks++;
+ }
+
+ if (before(seq, lowest_nak))
+ lowest_nak = seq;
}
+ seq++;
+ }
+
+ if (lowest_nak != call->acks_lowest_nak) {
+ call->acks_lowest_nak = lowest_nak;
+ summary->new_low_nack = true;
}
+
+ /* We *can* have more nacks than we did - the peer is permitted to drop
+ * packets it has soft-acked and re-request them. Further, it is
+ * possible for the nack distribution to change whilst the number of
+ * nacks stays the same or goes down.
+ */
+ if (old_nacks < summary->nr_retained_nacks)
+ summary->nr_new_acks += summary->nr_retained_nacks - old_nacks;
+ summary->nr_retained_nacks = old_nacks;
}
/*
@@ -773,7 +839,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
struct rxrpc_skb_priv *sp = rxrpc_skb(skb);
struct rxrpc_ackinfo info;
rxrpc_serial_t ack_serial, acked_serial;
- rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt;
+ rxrpc_seq_t first_soft_ack, hard_ack, prev_pkt, since;
int nr_acks, offset, ioffset;
_enter("");
@@ -789,6 +855,8 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
prev_pkt = ntohl(ack.previousPacket);
hard_ack = first_soft_ack - 1;
nr_acks = ack.nAcks;
+ sp->first_ack = first_soft_ack;
+ sp->nr_acks = nr_acks;
summary.ack_reason = (ack.reason < RXRPC_ACK__INVALID ?
ack.reason : RXRPC_ACK__INVALID);
@@ -858,6 +926,16 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0)
skb_condense(skb);
+ if (call->cong_last_nack) {
+ since = rxrpc_input_check_prev_ack(call, &summary, first_soft_ack);
+ rxrpc_free_skb(call->cong_last_nack, rxrpc_skb_put_last_nack);
+ call->cong_last_nack = NULL;
+ } else {
+ summary.nr_new_acks = first_soft_ack - call->acks_first_seq;
+ call->acks_lowest_nak = first_soft_ack + nr_acks;
+ since = first_soft_ack;
+ }
+
call->acks_latest_ts = skb->tstamp;
call->acks_first_seq = first_soft_ack;
call->acks_prev_seq = prev_pkt;
@@ -866,7 +944,7 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
case RXRPC_ACK_PING:
break;
default:
- if (after(acked_serial, call->acks_highest_serial))
+ if (acked_serial && after(acked_serial, call->acks_highest_serial))
call->acks_highest_serial = acked_serial;
break;
}
@@ -905,8 +983,9 @@ static void rxrpc_input_ack(struct rxrpc_call *call, struct sk_buff *skb)
if (nr_acks > 0) {
if (offset > (int)skb->len - nr_acks)
return rxrpc_proto_abort(call, 0, rxrpc_eproto_ackr_short_sack);
- rxrpc_input_soft_acks(call, skb->data + offset, first_soft_ack,
- nr_acks, &summary);
+ rxrpc_input_soft_acks(call, &summary, skb, first_soft_ack, since);
+ rxrpc_get_skb(skb, rxrpc_skb_get_last_nack);
+ call->cong_last_nack = skb;
}
if (test_bit(RXRPC_CALL_TX_LAST, &call->flags) &&
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index a0906145e829..4a292f860ae3 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -216,7 +216,7 @@ int rxrpc_send_ack_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
iov[0].iov_len = sizeof(txb->wire) + sizeof(txb->ack) + n;
len = iov[0].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
trace_rxrpc_tx_ack(call->debug_id, serial,
ntohl(txb->ack.firstPacket),
@@ -302,7 +302,7 @@ int rxrpc_send_abort_packet(struct rxrpc_call *call)
iov[0].iov_base = &pkt;
iov[0].iov_len = sizeof(pkt);
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
pkt.whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 1, sizeof(pkt));
@@ -334,7 +334,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct rxrpc_txbuf *txb)
_enter("%x,{%d}", txb->seq, txb->len);
/* Each transmission of a Tx packet needs a new serial number */
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
txb->wire.serial = htonl(serial);
if (test_bit(RXRPC_CONN_PROBING_FOR_UPGRADE, &conn->flags) &&
@@ -558,7 +558,7 @@ void rxrpc_send_conn_abort(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
iov_iter_kvec(&msg.msg_iter, WRITE, iov, 2, len);
diff --git a/net/rxrpc/proc.c b/net/rxrpc/proc.c
index 6c86cbb98d1d..26dc2f26d92d 100644
--- a/net/rxrpc/proc.c
+++ b/net/rxrpc/proc.c
@@ -181,7 +181,7 @@ print:
atomic_read(&conn->active),
state,
key_serial(conn->key),
- atomic_read(&conn->serial),
+ conn->tx_serial,
conn->hi_serial,
conn->channels[0].call_id,
conn->channels[1].call_id,
diff --git a/net/rxrpc/rxkad.c b/net/rxrpc/rxkad.c
index b52dedcebce0..6b32d61d4cdc 100644
--- a/net/rxrpc/rxkad.c
+++ b/net/rxrpc/rxkad.c
@@ -664,7 +664,7 @@ static int rxkad_issue_challenge(struct rxrpc_connection *conn)
len = iov[0].iov_len + iov[1].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
ret = kernel_sendmsg(conn->local->socket, &msg, iov, 2, len);
@@ -721,7 +721,7 @@ static int rxkad_send_response(struct rxrpc_connection *conn,
len = iov[0].iov_len + iov[1].iov_len + iov[2].iov_len;
- serial = atomic_inc_return(&conn->serial);
+ serial = rxrpc_get_next_serial(conn);
whdr.serial = htonl(serial);
rxrpc_local_dont_fragment(conn->local, false);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 92a12e3d0fe6..ff3d396a65aa 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1560,6 +1560,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
chain_prev = chain,
chain = __tcf_get_next_chain(block, chain),
tcf_chain_put(chain_prev)) {
+ if (chain->tmplt_ops && add)
+ chain->tmplt_ops->tmplt_reoffload(chain, true, cb,
+ cb_priv);
for (tp = __tcf_get_next_proto(chain, NULL); tp;
tp_prev = tp,
tp = __tcf_get_next_proto(chain, tp),
@@ -1575,6 +1578,9 @@ tcf_block_playback_offloads(struct tcf_block *block, flow_setup_cb_t *cb,
goto err_playback_remove;
}
}
+ if (chain->tmplt_ops && !add)
+ chain->tmplt_ops->tmplt_reoffload(chain, false, cb,
+ cb_priv);
}
return 0;
@@ -3000,7 +3006,8 @@ static int tc_chain_tmplt_add(struct tcf_chain *chain, struct net *net,
ops = tcf_proto_lookup_ops(name, true, extack);
if (IS_ERR(ops))
return PTR_ERR(ops);
- if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump) {
+ if (!ops->tmplt_create || !ops->tmplt_destroy || !ops->tmplt_dump ||
+ !ops->tmplt_reoffload) {
NL_SET_ERR_MSG(extack, "Chain templates are not supported with specified classifier");
module_put(ops->owner);
return -EOPNOTSUPP;
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index e5314a31f75a..efb9d2811b73 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2721,6 +2721,28 @@ static void fl_tmplt_destroy(void *tmplt_priv)
kfree(tmplt);
}
+static void fl_tmplt_reoffload(struct tcf_chain *chain, bool add,
+ flow_setup_cb_t *cb, void *cb_priv)
+{
+ struct fl_flow_tmplt *tmplt = chain->tmplt_priv;
+ struct flow_cls_offload cls_flower = {};
+
+ cls_flower.rule = flow_rule_alloc(0);
+ if (!cls_flower.rule)
+ return;
+
+ cls_flower.common.chain_index = chain->index;
+ cls_flower.command = add ? FLOW_CLS_TMPLT_CREATE :
+ FLOW_CLS_TMPLT_DESTROY;
+ cls_flower.cookie = (unsigned long) tmplt;
+ cls_flower.rule->match.dissector = &tmplt->dissector;
+ cls_flower.rule->match.mask = &tmplt->mask;
+ cls_flower.rule->match.key = &tmplt->dummy_key;
+
+ cb(TC_SETUP_CLSFLOWER, &cls_flower, cb_priv);
+ kfree(cls_flower.rule);
+}
+
static int fl_dump_key_val(struct sk_buff *skb,
void *val, int val_type,
void *mask, int mask_type, int len)
@@ -3628,6 +3650,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
.bind_class = fl_bind_class,
.tmplt_create = fl_tmplt_create,
.tmplt_destroy = fl_tmplt_destroy,
+ .tmplt_reoffload = fl_tmplt_reoffload,
.tmplt_dump = fl_tmplt_dump,
.get_exts = fl_get_exts,
.owner = THIS_MODULE,
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 95cc95458e2d..e4c858411207 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1877,9 +1877,15 @@ static bool smcd_lgr_match(struct smc_link_group *lgr,
struct smcd_dev *smcismdev,
struct smcd_gid *peer_gid)
{
- return lgr->peer_gid.gid == peer_gid->gid && lgr->smcd == smcismdev &&
- smc_ism_is_virtual(smcismdev) ?
- (lgr->peer_gid.gid_ext == peer_gid->gid_ext) : 1;
+ if (lgr->peer_gid.gid != peer_gid->gid ||
+ lgr->smcd != smcismdev)
+ return false;
+
+ if (smc_ism_is_virtual(smcismdev) &&
+ lgr->peer_gid.gid_ext != peer_gid->gid_ext)
+ return false;
+
+ return true;
}
/* create a new SMC connection (and a new link group if necessary) */
diff --git a/net/smc/smc_diag.c b/net/smc/smc_diag.c
index 52f7c4f1e767..5a33908015f3 100644
--- a/net/smc/smc_diag.c
+++ b/net/smc/smc_diag.c
@@ -164,7 +164,7 @@ static int __smc_diag_dump(struct sock *sk, struct sk_buff *skb,
}
if (smc_conn_lgr_valid(&smc->conn) && smc->conn.lgr->is_smcd &&
(req->diag_ext & (1 << (SMC_DIAG_DMBINFO - 1))) &&
- !list_empty(&smc->conn.lgr->list)) {
+ !list_empty(&smc->conn.lgr->list) && smc->conn.rmb_desc) {
struct smc_connection *conn = &smc->conn;
struct smcd_diag_dmbinfo dinfo;
struct smcd_dev *smcd = conn->lgr->smcd;
diff --git a/net/sunrpc/svc.c b/net/sunrpc/svc.c
index f60c93e5a25d..b969e505c7b7 100644
--- a/net/sunrpc/svc.c
+++ b/net/sunrpc/svc.c
@@ -1598,10 +1598,10 @@ void svc_process_bc(struct rpc_rqst *req, struct svc_rqst *rqstp)
/* Finally, send the reply synchronously */
if (rqstp->bc_to_initval > 0) {
timeout.to_initval = rqstp->bc_to_initval;
- timeout.to_retries = rqstp->bc_to_initval;
+ timeout.to_retries = rqstp->bc_to_retries;
} else {
timeout.to_initval = req->rq_xprt->timeout->to_initval;
- timeout.to_initval = req->rq_xprt->timeout->to_retries;
+ timeout.to_retries = req->rq_xprt->timeout->to_retries;
}
memcpy(&req->rq_snd_buf, &rqstp->rq_res, sizeof(req->rq_snd_buf));
task = rpc_run_bc_task(req, &timeout);
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index bfb2f78523a8..545017a3daa4 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -717,12 +717,12 @@ static int svc_udp_sendto(struct svc_rqst *rqstp)
ARRAY_SIZE(rqstp->rq_bvec), xdr);
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
if (err == -ECONNREFUSED) {
/* ICMP error on earlier request. */
iov_iter_bvec(&msg.msg_iter, ITER_SOURCE, rqstp->rq_bvec,
- count, 0);
+ count, rqstp->rq_res.len);
err = sock_sendmsg(svsk->sk_sock, &msg);
}
diff --git a/net/tipc/bearer.c b/net/tipc/bearer.c
index 2cde375477e3..878415c43527 100644
--- a/net/tipc/bearer.c
+++ b/net/tipc/bearer.c
@@ -1086,6 +1086,12 @@ int tipc_nl_bearer_add(struct sk_buff *skb, struct genl_info *info)
#ifdef CONFIG_TIPC_MEDIA_UDP
if (attrs[TIPC_NLA_BEARER_UDP_OPTS]) {
+ if (b->media->type_id != TIPC_MEDIA_TYPE_UDP) {
+ rtnl_unlock();
+ NL_SET_ERR_MSG(info->extack, "UDP option is unsupported");
+ return -EINVAL;
+ }
+
err = tipc_udp_nl_bearer_add(b,
attrs[TIPC_NLA_BEARER_UDP_OPTS]);
if (err) {
diff --git a/net/unix/af_unix.c b/net/unix/af_unix.c
index ac1f2bc18fc9..30b178ebba60 100644
--- a/net/unix/af_unix.c
+++ b/net/unix/af_unix.c
@@ -1344,13 +1344,11 @@ static void unix_state_double_lock(struct sock *sk1, struct sock *sk2)
unix_state_lock(sk1);
return;
}
- if (sk1 < sk2) {
- unix_state_lock(sk1);
- unix_state_lock_nested(sk2);
- } else {
- unix_state_lock(sk2);
- unix_state_lock_nested(sk1);
- }
+ if (sk1 > sk2)
+ swap(sk1, sk2);
+
+ unix_state_lock(sk1);
+ unix_state_lock_nested(sk2, U_LOCK_SECOND);
}
static void unix_state_double_unlock(struct sock *sk1, struct sock *sk2)
@@ -1591,7 +1589,7 @@ restart:
goto out_unlock;
}
- unix_state_lock_nested(sk);
+ unix_state_lock_nested(sk, U_LOCK_SECOND);
if (sk->sk_state != st) {
unix_state_unlock(sk);
diff --git a/net/unix/diag.c b/net/unix/diag.c
index bec09a3a1d44..be19827eca36 100644
--- a/net/unix/diag.c
+++ b/net/unix/diag.c
@@ -84,7 +84,7 @@ static int sk_diag_dump_icons(struct sock *sk, struct sk_buff *nlskb)
* queue lock. With the other's queue locked it's
* OK to lock the state.
*/
- unix_state_lock_nested(req);
+ unix_state_lock_nested(req, U_LOCK_DIAG);
peer = unix_sk(req)->peer;
buf[i++] = (peer ? sock_i_ino(peer) : 0);
unix_state_unlock(req);
diff --git a/net/unix/garbage.c b/net/unix/garbage.c
index 2405f0f9af31..8f63f0b4bf01 100644
--- a/net/unix/garbage.c
+++ b/net/unix/garbage.c
@@ -314,6 +314,17 @@ void unix_gc(void)
/* Here we are. Hitlist is filled. Die. */
__skb_queue_purge(&hitlist);
+#if IS_ENABLED(CONFIG_AF_UNIX_OOB)
+ list_for_each_entry_safe(u, next, &gc_candidates, link) {
+ struct sk_buff *skb = u->oob_skb;
+
+ if (skb) {
+ u->oob_skb = NULL;
+ kfree_skb(skb);
+ }
+ }
+#endif
+
spin_lock(&unix_gc_lock);
/* There could be io_uring registered files, just push them back to
diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index a9ac85e09af3..10345388ad13 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -206,7 +206,6 @@ config CFG80211_KUNIT_TEST
depends on KUNIT
depends on CFG80211
default KUNIT_ALL_TESTS
- depends on !KERNEL_6_2
help
Enable this option to test cfg80211 functions with kunit.
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 409d74c57ca0..3fb1b637352a 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
* Copyright 2006-2010 Johannes Berg <[email protected]>
* Copyright 2013-2014 Intel Mobile Communications GmbH
* Copyright 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2023 Intel Corporation
+ * Copyright (C) 2018-2024 Intel Corporation
*/
#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -1661,6 +1661,7 @@ void wiphy_delayed_work_queue(struct wiphy *wiphy,
unsigned long delay)
{
if (!delay) {
+ del_timer(&dwork->timer);
wiphy_work_queue(wiphy, &dwork->work);
return;
}
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 60877b532993..b09700400d09 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4020,6 +4020,7 @@ static int nl80211_dump_interface(struct sk_buff *skb, struct netlink_callback *
}
wiphy_unlock(&rdev->wiphy);
+ if_start = 0;
wp_idx++;
}
out:
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 2249b1a89d1c..389a52c29bfc 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1731,6 +1731,61 @@ static void cfg80211_update_hidden_bsses(struct cfg80211_internal_bss *known,
}
}
+static void cfg80211_check_stuck_ecsa(struct cfg80211_registered_device *rdev,
+ struct cfg80211_internal_bss *known,
+ const struct cfg80211_bss_ies *old)
+{
+ const struct ieee80211_ext_chansw_ie *ecsa;
+ const struct element *elem_new, *elem_old;
+ const struct cfg80211_bss_ies *new, *bcn;
+
+ if (known->pub.proberesp_ecsa_stuck)
+ return;
+
+ new = rcu_dereference_protected(known->pub.proberesp_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (WARN_ON(!new))
+ return;
+
+ if (new->tsf - old->tsf < USEC_PER_SEC)
+ return;
+
+ elem_old = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ old->data, old->len);
+ if (!elem_old)
+ return;
+
+ elem_new = cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ new->data, new->len);
+ if (!elem_new)
+ return;
+
+ bcn = rcu_dereference_protected(known->pub.beacon_ies,
+ lockdep_is_held(&rdev->bss_lock));
+ if (bcn &&
+ cfg80211_find_elem(WLAN_EID_EXT_CHANSWITCH_ANN,
+ bcn->data, bcn->len))
+ return;
+
+ if (elem_new->datalen != elem_old->datalen)
+ return;
+ if (elem_new->datalen < sizeof(struct ieee80211_ext_chansw_ie))
+ return;
+ if (memcmp(elem_new->data, elem_old->data, elem_new->datalen))
+ return;
+
+ ecsa = (void *)elem_new->data;
+
+ if (!ecsa->mode)
+ return;
+
+ if (ecsa->new_ch_num !=
+ ieee80211_frequency_to_channel(known->pub.channel->center_freq))
+ return;
+
+ known->pub.proberesp_ecsa_stuck = 1;
+}
+
static bool
cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
struct cfg80211_internal_bss *known,
@@ -1750,8 +1805,10 @@ cfg80211_update_known_bss(struct cfg80211_registered_device *rdev,
/* Override possible earlier Beacon frame IEs */
rcu_assign_pointer(known->pub.ies,
new->pub.proberesp_ies);
- if (old)
+ if (old) {
+ cfg80211_check_stuck_ecsa(rdev, known, old);
kfree_rcu((struct cfg80211_bss_ies *)old, rcu_head);
+ }
}
if (rcu_access_pointer(new->pub.beacon_ies)) {
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 9f13aa3353e3..1eadfac03cc4 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -167,8 +167,10 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
contd = XDP_PKT_CONTD;
err = __xsk_rcv_zc(xs, xskb, len, contd);
- if (err || likely(!frags))
- goto out;
+ if (err)
+ goto err;
+ if (likely(!frags))
+ return 0;
xskb_list = &xskb->pool->xskb_list;
list_for_each_entry_safe(pos, tmp, xskb_list, xskb_list_node) {
@@ -177,11 +179,13 @@ static int xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
len = pos->xdp.data_end - pos->xdp.data;
err = __xsk_rcv_zc(xs, pos, len, contd);
if (err)
- return err;
+ goto err;
list_del(&pos->xskb_list_node);
}
-out:
+ return 0;
+err:
+ xsk_buff_free(xdp);
return err;
}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 28711cc44ced..ce60ecd48a4d 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -555,6 +555,7 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
xskb->xdp.data_meta = xskb->xdp.data;
+ xskb->xdp.flags = 0;
if (pool->dma_need_sync) {
dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,