aboutsummaryrefslogtreecommitdiff
path: root/net/netfilter
diff options
context:
space:
mode:
Diffstat (limited to 'net/netfilter')
-rw-r--r--net/netfilter/ipset/ip_set_core.c10
-rw-r--r--net/netfilter/ipset/ip_set_hash_netiface.c10
-rw-r--r--net/netfilter/ipvs/Kconfig27
-rw-r--r--net/netfilter/ipvs/ip_vs_conn.c26
-rw-r--r--net/netfilter/ipvs/ip_vs_xmit.c6
-rw-r--r--net/netfilter/nf_conntrack_core.c23
-rw-r--r--net/netfilter/nf_conntrack_helper.c4
-rw-r--r--net/netfilter/nf_conntrack_proto_dccp.c52
-rw-r--r--net/netfilter/nf_conntrack_proto_gre.c11
-rw-r--r--net/netfilter/nf_conntrack_sip.c2
-rw-r--r--net/netfilter/nf_flow_table_core.c37
-rw-r--r--net/netfilter/nf_flow_table_ip.c236
-rw-r--r--net/netfilter/nf_nat_core.c92
-rw-r--r--net/netfilter/nf_tables_api.c672
-rw-r--r--net/netfilter/nfnetlink.c3
-rw-r--r--net/netfilter/nfnetlink_osf.c1
-rw-r--r--net/netfilter/nfnetlink_queue.c1
-rw-r--r--net/netfilter/nft_bitwise.c4
-rw-r--r--net/netfilter/nft_byteorder.c20
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/netfilter/nft_dynset.c2
-rw-r--r--net/netfilter/nft_exthdr.c110
-rw-r--r--net/netfilter/nft_flow_offload.c18
-rw-r--r--net/netfilter/nft_fwd_netdev.c2
-rw-r--r--net/netfilter/nft_hash.c2
-rw-r--r--net/netfilter/nft_immediate.c90
-rw-r--r--net/netfilter/nft_lookup.c23
-rw-r--r--net/netfilter/nft_meta.c2
-rw-r--r--net/netfilter/nft_objref.c8
-rw-r--r--net/netfilter/nft_payload.c3
-rw-r--r--net/netfilter/nft_range.c2
-rw-r--r--net/netfilter/nft_reject.c2
-rw-r--r--net/netfilter/nft_rt.c2
-rw-r--r--net/netfilter/nft_set_bitmap.c5
-rw-r--r--net/netfilter/nft_set_hash.c23
-rw-r--r--net/netfilter/nft_set_pipapo.c81
-rw-r--r--net/netfilter/nft_set_rbtree.c5
-rw-r--r--net/netfilter/nft_socket.c4
-rw-r--r--net/netfilter/nft_tproxy.c2
-rw-r--r--net/netfilter/nft_tunnel.c4
-rw-r--r--net/netfilter/nft_xfrm.c4
-rw-r--r--net/netfilter/xt_LED.c3
-rw-r--r--net/netfilter/xt_osf.c1
43 files changed, 1227 insertions, 410 deletions
diff --git a/net/netfilter/ipset/ip_set_core.c b/net/netfilter/ipset/ip_set_core.c
index 46ebee9400da..0b68e2e2824e 100644
--- a/net/netfilter/ipset/ip_set_core.c
+++ b/net/netfilter/ipset/ip_set_core.c
@@ -739,9 +739,7 @@ ip_set_test(ip_set_id_t index, const struct sk_buff *skb,
!(opt->family == set->family || set->family == NFPROTO_UNSPEC))
return 0;
- rcu_read_lock_bh();
ret = set->variant->kadt(set, skb, par, IPSET_TEST, opt);
- rcu_read_unlock_bh();
if (ret == -EAGAIN) {
/* Type requests element to be completed */
@@ -1694,6 +1692,14 @@ call_ad(struct net *net, struct sock *ctnl, struct sk_buff *skb,
bool eexist = flags & IPSET_FLAG_EXIST, retried = false;
do {
+ if (retried) {
+ __ip_set_get(set);
+ nfnl_unlock(NFNL_SUBSYS_IPSET);
+ cond_resched();
+ nfnl_lock(NFNL_SUBSYS_IPSET);
+ __ip_set_put(set);
+ }
+
ip_set_lock(set);
ret = set->variant->uadt(set, tb, adt, &lineno, flags, retried);
ip_set_unlock(set);
diff --git a/net/netfilter/ipset/ip_set_hash_netiface.c b/net/netfilter/ipset/ip_set_hash_netiface.c
index 031073286236..95aeb31c60e0 100644
--- a/net/netfilter/ipset/ip_set_hash_netiface.c
+++ b/net/netfilter/ipset/ip_set_hash_netiface.c
@@ -40,7 +40,7 @@ MODULE_ALIAS("ip_set_hash:net,iface");
#define IP_SET_HASH_WITH_MULTI
#define IP_SET_HASH_WITH_NET0
-#define STRLCPY(a, b) strlcpy(a, b, IFNAMSIZ)
+#define STRSCPY(a, b) strscpy(a, b, IFNAMSIZ)
/* IPv4 variant */
@@ -182,11 +182,11 @@ hash_netiface4_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!eiface)
return -EINVAL;
- STRLCPY(e.iface, eiface);
+ STRSCPY(e.iface, eiface);
e.physdev = 1;
#endif
} else {
- STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
}
if (strlen(e.iface) == 0)
@@ -400,11 +400,11 @@ hash_netiface6_kadt(struct ip_set *set, const struct sk_buff *skb,
if (!eiface)
return -EINVAL;
- STRLCPY(e.iface, eiface);
+ STRSCPY(e.iface, eiface);
e.physdev = 1;
#endif
} else {
- STRLCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
+ STRSCPY(e.iface, SRCDIR ? IFACE(in) : IFACE(out));
}
if (strlen(e.iface) == 0)
diff --git a/net/netfilter/ipvs/Kconfig b/net/netfilter/ipvs/Kconfig
index 271da8447b29..2a3017b9c001 100644
--- a/net/netfilter/ipvs/Kconfig
+++ b/net/netfilter/ipvs/Kconfig
@@ -44,7 +44,8 @@ config IP_VS_DEBUG
config IP_VS_TAB_BITS
int "IPVS connection table size (the Nth power of 2)"
- range 8 20
+ range 8 20 if !64BIT
+ range 8 27 if 64BIT
default 12
help
The IPVS connection hash table uses the chaining scheme to handle
@@ -54,24 +55,24 @@ config IP_VS_TAB_BITS
Note the table size must be power of 2. The table size will be the
value of 2 to the your input number power. The number to choose is
- from 8 to 20, the default number is 12, which means the table size
- is 4096. Don't input the number too small, otherwise you will lose
- performance on it. You can adapt the table size yourself, according
- to your virtual server application. It is good to set the table size
- not far less than the number of connections per second multiplying
- average lasting time of connection in the table. For example, your
- virtual server gets 200 connections per second, the connection lasts
- for 200 seconds in average in the connection table, the table size
- should be not far less than 200x200, it is good to set the table
- size 32768 (2**15).
+ from 8 to 27 for 64BIT(20 otherwise), the default number is 12,
+ which means the table size is 4096. Don't input the number too
+ small, otherwise you will lose performance on it. You can adapt the
+ table size yourself, according to your virtual server application.
+ It is good to set the table size not far less than the number of
+ connections per second multiplying average lasting time of
+ connection in the table. For example, your virtual server gets 200
+ connections per second, the connection lasts for 200 seconds in
+ average in the connection table, the table size should be not far
+ less than 200x200, it is good to set the table size 32768 (2**15).
Another note that each connection occupies 128 bytes effectively and
each hash entry uses 8 bytes, so you can estimate how much memory is
needed for your box.
You can overwrite this number setting conn_tab_bits module parameter
- or by appending ip_vs.conn_tab_bits=? to the kernel command line
- if IP VS was compiled built-in.
+ or by appending ip_vs.conn_tab_bits=? to the kernel command line if
+ IP VS was compiled built-in.
comment "IPVS transport protocol load balancing support"
diff --git a/net/netfilter/ipvs/ip_vs_conn.c b/net/netfilter/ipvs/ip_vs_conn.c
index 928e64653837..9065da3cdd12 100644
--- a/net/netfilter/ipvs/ip_vs_conn.c
+++ b/net/netfilter/ipvs/ip_vs_conn.c
@@ -26,7 +26,6 @@
#include <linux/net.h>
#include <linux/kernel.h>
#include <linux/module.h>
-#include <linux/vmalloc.h>
#include <linux/proc_fs.h> /* for proc_net_* */
#include <linux/slab.h>
#include <linux/seq_file.h>
@@ -1482,13 +1481,21 @@ void __net_exit ip_vs_conn_net_cleanup(struct netns_ipvs *ipvs)
int __init ip_vs_conn_init(void)
{
size_t tab_array_size;
+ int max_avail;
+#if BITS_PER_LONG > 32
+ int max = 27;
+#else
+ int max = 20;
+#endif
+ int min = 8;
int idx;
- /* Compute size and mask */
- if (ip_vs_conn_tab_bits < 8 || ip_vs_conn_tab_bits > 20) {
- pr_info("conn_tab_bits not in [8, 20]. Using default value\n");
- ip_vs_conn_tab_bits = CONFIG_IP_VS_TAB_BITS;
- }
+ max_avail = order_base_2(totalram_pages()) + PAGE_SHIFT;
+ max_avail -= 2; /* ~4 in hash row */
+ max_avail -= 1; /* IPVS up to 1/2 of mem */
+ max_avail -= order_base_2(sizeof(struct ip_vs_conn));
+ max = clamp(max, min, max_avail);
+ ip_vs_conn_tab_bits = clamp_val(ip_vs_conn_tab_bits, min, max);
ip_vs_conn_tab_size = 1 << ip_vs_conn_tab_bits;
ip_vs_conn_tab_mask = ip_vs_conn_tab_size - 1;
@@ -1497,7 +1504,8 @@ int __init ip_vs_conn_init(void)
*/
tab_array_size = array_size(ip_vs_conn_tab_size,
sizeof(*ip_vs_conn_tab));
- ip_vs_conn_tab = vmalloc(tab_array_size);
+ ip_vs_conn_tab = kvmalloc_array(ip_vs_conn_tab_size,
+ sizeof(*ip_vs_conn_tab), GFP_KERNEL);
if (!ip_vs_conn_tab)
return -ENOMEM;
@@ -1506,7 +1514,7 @@ int __init ip_vs_conn_init(void)
sizeof(struct ip_vs_conn), 0,
SLAB_HWCACHE_ALIGN, NULL);
if (!ip_vs_conn_cachep) {
- vfree(ip_vs_conn_tab);
+ kvfree(ip_vs_conn_tab);
return -ENOMEM;
}
@@ -1534,5 +1542,5 @@ void ip_vs_conn_cleanup(void)
rcu_barrier();
/* Release the empty cache */
kmem_cache_destroy(ip_vs_conn_cachep);
- vfree(ip_vs_conn_tab);
+ kvfree(ip_vs_conn_tab);
}
diff --git a/net/netfilter/ipvs/ip_vs_xmit.c b/net/netfilter/ipvs/ip_vs_xmit.c
index feb1d7fcb09f..9193e109e6b3 100644
--- a/net/netfilter/ipvs/ip_vs_xmit.c
+++ b/net/netfilter/ipvs/ip_vs_xmit.c
@@ -139,7 +139,7 @@ retry:
if (PTR_ERR(rt) == -EINVAL && *saddr &&
rt_mode & IP_VS_RT_MODE_CONNECT && !loop) {
*saddr = 0;
- flowi4_update_output(&fl4, 0, 0, daddr, 0);
+ flowi4_update_output(&fl4, 0, daddr, 0);
goto retry;
}
IP_VS_DBG_RL("ip_route_output error, dest: %pI4\n", &daddr);
@@ -147,7 +147,7 @@ retry:
} else if (!*saddr && rt_mode & IP_VS_RT_MODE_CONNECT && fl4.saddr) {
ip_rt_put(rt);
*saddr = fl4.saddr;
- flowi4_update_output(&fl4, 0, 0, daddr, fl4.saddr);
+ flowi4_update_output(&fl4, 0, daddr, fl4.saddr);
loop = true;
goto retry;
}
@@ -1207,6 +1207,7 @@ ip_vs_tunnel_xmit(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
@@ -1349,6 +1350,7 @@ ip_vs_tunnel_xmit_v6(struct sk_buff *skb, struct ip_vs_conn *cp,
skb->transport_header = skb->network_header;
skb_set_inner_ipproto(skb, next_protocol);
+ skb_set_inner_mac_header(skb, skb_inner_network_offset(skb));
if (tun_type == IP_VS_CONN_F_TUNNEL_TYPE_GUE) {
bool check = false;
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index c4ccfec6cb98..992393102d5f 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -211,24 +211,18 @@ static u32 hash_conntrack_raw(const struct nf_conntrack_tuple *tuple,
unsigned int zoneid,
const struct net *net)
{
- u64 a, b, c, d;
+ siphash_key_t key;
get_random_once(&nf_conntrack_hash_rnd, sizeof(nf_conntrack_hash_rnd));
- /* The direction must be ignored, handle usable tuplehash members manually */
- a = (u64)tuple->src.u3.all[0] << 32 | tuple->src.u3.all[3];
- b = (u64)tuple->dst.u3.all[0] << 32 | tuple->dst.u3.all[3];
+ key = nf_conntrack_hash_rnd;
- c = (__force u64)tuple->src.u.all << 32 | (__force u64)tuple->dst.u.all << 16;
- c |= tuple->dst.protonum;
+ key.key[0] ^= zoneid;
+ key.key[1] ^= net_hash_mix(net);
- d = (u64)zoneid << 32 | net_hash_mix(net);
-
- /* IPv4: u3.all[1,2,3] == 0 */
- c ^= (u64)tuple->src.u3.all[1] << 32 | tuple->src.u3.all[2];
- d += (u64)tuple->dst.u3.all[1] << 32 | tuple->dst.u3.all[2];
-
- return (u32)siphash_4u64(a, b, c, d, &nf_conntrack_hash_rnd);
+ return siphash((void *)tuple,
+ offsetofend(struct nf_conntrack_tuple, dst.__nfct_hash_offsetend),
+ &key);
}
static u32 scale_hash(u32 hash)
@@ -2260,6 +2254,9 @@ static int nf_confirm_cthelper(struct sk_buff *skb, struct nf_conn *ct,
return 0;
helper = rcu_dereference(help->helper);
+ if (!helper)
+ return 0;
+
if (!(helper->flags & NF_CT_HELPER_F_USERSPACE))
return 0;
diff --git a/net/netfilter/nf_conntrack_helper.c b/net/netfilter/nf_conntrack_helper.c
index 0c4db2f2ac43..f22691f83853 100644
--- a/net/netfilter/nf_conntrack_helper.c
+++ b/net/netfilter/nf_conntrack_helper.c
@@ -360,6 +360,9 @@ int nf_conntrack_helper_register(struct nf_conntrack_helper *me)
BUG_ON(me->expect_class_max >= NF_CT_MAX_EXPECT_CLASSES);
BUG_ON(strlen(me->name) > NF_CT_HELPER_NAME_LEN - 1);
+ if (!nf_ct_helper_hash)
+ return -ENOENT;
+
if (me->expect_policy->max_expected > NF_CT_EXPECT_MAX_CNT)
return -EINVAL;
@@ -515,4 +518,5 @@ int nf_conntrack_helper_init(void)
void nf_conntrack_helper_fini(void)
{
kvfree(nf_ct_helper_hash);
+ nf_ct_helper_hash = NULL;
}
diff --git a/net/netfilter/nf_conntrack_proto_dccp.c b/net/netfilter/nf_conntrack_proto_dccp.c
index c1557d47ccd1..d4fd626d2b8c 100644
--- a/net/netfilter/nf_conntrack_proto_dccp.c
+++ b/net/netfilter/nf_conntrack_proto_dccp.c
@@ -432,9 +432,19 @@ static bool dccp_error(const struct dccp_hdr *dh,
struct sk_buff *skb, unsigned int dataoff,
const struct nf_hook_state *state)
{
+ static const unsigned long require_seq48 = 1 << DCCP_PKT_REQUEST |
+ 1 << DCCP_PKT_RESPONSE |
+ 1 << DCCP_PKT_CLOSEREQ |
+ 1 << DCCP_PKT_CLOSE |
+ 1 << DCCP_PKT_RESET |
+ 1 << DCCP_PKT_SYNC |
+ 1 << DCCP_PKT_SYNCACK;
unsigned int dccp_len = skb->len - dataoff;
unsigned int cscov;
const char *msg;
+ u8 type;
+
+ BUILD_BUG_ON(DCCP_PKT_INVALID >= BITS_PER_LONG);
if (dh->dccph_doff * 4 < sizeof(struct dccp_hdr) ||
dh->dccph_doff * 4 > dccp_len) {
@@ -459,34 +469,70 @@ static bool dccp_error(const struct dccp_hdr *dh,
goto out_invalid;
}
- if (dh->dccph_type >= DCCP_PKT_INVALID) {
+ type = dh->dccph_type;
+ if (type >= DCCP_PKT_INVALID) {
msg = "nf_ct_dccp: reserved packet type ";
goto out_invalid;
}
+
+ if (test_bit(type, &require_seq48) && !dh->dccph_x) {
+ msg = "nf_ct_dccp: type lacks 48bit sequence numbers";
+ goto out_invalid;
+ }
+
return false;
out_invalid:
nf_l4proto_log_invalid(skb, state, IPPROTO_DCCP, "%s", msg);
return true;
}
+struct nf_conntrack_dccp_buf {
+ struct dccp_hdr dh; /* generic header part */
+ struct dccp_hdr_ext ext; /* optional depending dh->dccph_x */
+ union { /* depends on header type */
+ struct dccp_hdr_ack_bits ack;
+ struct dccp_hdr_request req;
+ struct dccp_hdr_response response;
+ struct dccp_hdr_reset rst;
+ } u;
+};
+
+static struct dccp_hdr *
+dccp_header_pointer(const struct sk_buff *skb, int offset, const struct dccp_hdr *dh,
+ struct nf_conntrack_dccp_buf *buf)
+{
+ unsigned int hdrlen = __dccp_hdr_len(dh);
+
+ if (hdrlen > sizeof(*buf))
+ return NULL;
+
+ return skb_header_pointer(skb, offset, hdrlen, buf);
+}
+
int nf_conntrack_dccp_packet(struct nf_conn *ct, struct sk_buff *skb,
unsigned int dataoff,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
enum ip_conntrack_dir dir = CTINFO2DIR(ctinfo);
- struct dccp_hdr _dh, *dh;
+ struct nf_conntrack_dccp_buf _dh;
u_int8_t type, old_state, new_state;
enum ct_dccp_roles role;
unsigned int *timeouts;
+ struct dccp_hdr *dh;
- dh = skb_header_pointer(skb, dataoff, sizeof(_dh), &_dh);
+ dh = skb_header_pointer(skb, dataoff, sizeof(*dh), &_dh.dh);
if (!dh)
return NF_DROP;
if (dccp_error(dh, skb, dataoff, state))
return -NF_ACCEPT;
+ /* pull again, including possible 48 bit sequences and subtype header */
+ dh = dccp_header_pointer(skb, dataoff, dh, &_dh);
+ if (!dh)
+ return NF_DROP;
+
type = dh->dccph_type;
if (!nf_ct_is_confirmed(ct) && !dccp_new(ct, skb, dh, state))
return -NF_ACCEPT;
diff --git a/net/netfilter/nf_conntrack_proto_gre.c b/net/netfilter/nf_conntrack_proto_gre.c
index 728eeb0aea87..af369e686fc5 100644
--- a/net/netfilter/nf_conntrack_proto_gre.c
+++ b/net/netfilter/nf_conntrack_proto_gre.c
@@ -205,6 +205,8 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
enum ip_conntrack_info ctinfo,
const struct nf_hook_state *state)
{
+ unsigned long status;
+
if (!nf_ct_is_confirmed(ct)) {
unsigned int *timeouts = nf_ct_timeout_lookup(ct);
@@ -217,11 +219,17 @@ int nf_conntrack_gre_packet(struct nf_conn *ct,
ct->proto.gre.timeout = timeouts[GRE_CT_UNREPLIED];
}
+ status = READ_ONCE(ct->status);
/* If we've seen traffic both ways, this is a GRE connection.
* Extend timeout. */
- if (ct->status & IPS_SEEN_REPLY) {
+ if (status & IPS_SEEN_REPLY) {
nf_ct_refresh_acct(ct, ctinfo, skb,
ct->proto.gre.stream_timeout);
+
+ /* never set ASSURED for IPS_NAT_CLASH, they time out soon */
+ if (unlikely((status & IPS_NAT_CLASH)))
+ return NF_ACCEPT;
+
/* Also, more likely to be important, and not a probe. */
if (!test_and_set_bit(IPS_ASSURED_BIT, &ct->status))
nf_conntrack_event_cache(IPCT_ASSURED, ct);
@@ -296,6 +304,7 @@ void nf_conntrack_gre_init_net(struct net *net)
/* protocol helper struct */
const struct nf_conntrack_l4proto nf_conntrack_l4proto_gre = {
.l4proto = IPPROTO_GRE,
+ .allow_clash = true,
#ifdef CONFIG_NF_CONNTRACK_PROCFS
.print_conntrack = gre_print_conntrack,
#endif
diff --git a/net/netfilter/nf_conntrack_sip.c b/net/netfilter/nf_conntrack_sip.c
index 77f5e82d8e3f..d0eac27f6ba0 100644
--- a/net/netfilter/nf_conntrack_sip.c
+++ b/net/netfilter/nf_conntrack_sip.c
@@ -611,7 +611,7 @@ int ct_sip_parse_numerical_param(const struct nf_conn *ct, const char *dptr,
start += strlen(name);
*val = simple_strtoul(start, &end, 0);
if (start == end)
- return 0;
+ return -1;
if (matchoff && matchlen) {
*matchoff = start - dptr;
*matchlen = end - start;
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 04bd0ed4d2ae..1d34d700bd09 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -125,9 +125,6 @@ static int flow_offload_fill_route(struct flow_offload *flow,
break;
case FLOW_OFFLOAD_XMIT_XFRM:
case FLOW_OFFLOAD_XMIT_NEIGH:
- if (!dst_hold_safe(route->tuple[dir].dst))
- return -1;
-
flow_tuple->dst_cache = dst;
flow_tuple->dst_cookie = flow_offload_dst_cookie(flow_tuple);
break;
@@ -148,27 +145,12 @@ static void nft_flow_dst_release(struct flow_offload *flow,
dst_release(flow->tuplehash[dir].tuple.dst_cache);
}
-int flow_offload_route_init(struct flow_offload *flow,
+void flow_offload_route_init(struct flow_offload *flow,
const struct nf_flow_route *route)
{
- int err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
- if (err < 0)
- return err;
-
- err = flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
- if (err < 0)
- goto err_route_reply;
-
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_ORIGINAL);
+ flow_offload_fill_route(flow, route, FLOW_OFFLOAD_DIR_REPLY);
flow->type = NF_FLOW_OFFLOAD_ROUTE;
-
- return 0;
-
-err_route_reply:
- nft_flow_dst_release(flow, FLOW_OFFLOAD_DIR_ORIGINAL);
-
- return err;
}
EXPORT_SYMBOL_GPL(flow_offload_route_init);
@@ -317,12 +299,12 @@ int flow_offload_add(struct nf_flowtable *flow_table, struct flow_offload *flow)
EXPORT_SYMBOL_GPL(flow_offload_add);
void flow_offload_refresh(struct nf_flowtable *flow_table,
- struct flow_offload *flow)
+ struct flow_offload *flow, bool force)
{
u32 timeout;
timeout = nf_flowtable_time_stamp + flow_offload_get_timeout(flow);
- if (timeout - READ_ONCE(flow->timeout) > HZ)
+ if (force || timeout - READ_ONCE(flow->timeout) > HZ)
WRITE_ONCE(flow->timeout, timeout);
else
return;
@@ -334,6 +316,12 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
}
EXPORT_SYMBOL_GPL(flow_offload_refresh);
+static bool nf_flow_is_outdated(const struct flow_offload *flow)
+{
+ return test_bit(IPS_SEEN_REPLY_BIT, &flow->ct->status) &&
+ !test_bit(NF_FLOW_HW_ESTABLISHED, &flow->flags);
+}
+
static inline bool nf_flow_has_expired(const struct flow_offload *flow)
{
return nf_flow_timeout_delta(flow->timeout) <= 0;
@@ -423,7 +411,8 @@ static void nf_flow_offload_gc_step(struct nf_flowtable *flow_table,
struct flow_offload *flow, void *data)
{
if (nf_flow_has_expired(flow) ||
- nf_ct_is_dying(flow->ct))
+ nf_ct_is_dying(flow->ct) ||
+ nf_flow_is_outdated(flow))
flow_offload_teardown(flow);
if (test_bit(NF_FLOW_TEARDOWN, &flow->flags)) {
diff --git a/net/netfilter/nf_flow_table_ip.c b/net/netfilter/nf_flow_table_ip.c
index 19efba1e51ef..e45fade76409 100644
--- a/net/netfilter/nf_flow_table_ip.c
+++ b/net/netfilter/nf_flow_table_ip.c
@@ -8,6 +8,7 @@
#include <linux/ipv6.h>
#include <linux/netdevice.h>
#include <linux/if_ether.h>
+#include <net/gso.h>
#include <net/ip.h>
#include <net/ipv6.h>
#include <net/ip6_route.h>
@@ -163,38 +164,43 @@ static void nf_flow_tuple_encap(struct sk_buff *skb,
}
}
-static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+struct nf_flowtable_ctx {
+ const struct net_device *in;
+ u32 offset;
+ u32 hdrsize;
+};
+
+static int nf_flow_tuple_ip(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
unsigned int thoff;
struct iphdr *iph;
u8 ipproto;
- if (!pskb_may_pull(skb, sizeof(*iph) + offset))
+ if (!pskb_may_pull(skb, sizeof(*iph) + ctx->offset))
return -1;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
thoff = (iph->ihl * 4);
if (ip_is_fragment(iph) ||
unlikely(ip_has_options(thoff)))
return -1;
- thoff += offset;
+ thoff += ctx->offset;
ipproto = iph->protocol;
switch (ipproto) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -204,7 +210,7 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
if (iph->ttl <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (ipproto) {
@@ -224,13 +230,13 @@ static int nf_flow_tuple_ip(struct sk_buff *skb, const struct net_device *dev,
}
}
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v4.s_addr = iph->saddr;
tuple->dst_v4.s_addr = iph->daddr;
tuple->l3proto = AF_INET;
tuple->l4proto = ipproto;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
@@ -336,58 +342,56 @@ static unsigned int nf_flow_queue_xmit(struct net *net, struct sk_buff *skb,
return NF_STOLEN;
}
-unsigned int
-nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table, struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
struct flow_offload_tuple tuple = {};
- enum flow_offload_tuple_dir dir;
- struct flow_offload *flow;
- struct net_device *outdev;
- u32 hdrsize, offset = 0;
- unsigned int thoff, mtu;
- struct rtable *rt;
- struct iphdr *iph;
- __be32 nexthop;
- int ret;
if (skb->protocol != htons(ETH_P_IP) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &offset))
- return NF_ACCEPT;
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IP), &ctx->offset))
+ return NULL;
- if (nf_flow_tuple_ip(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
+ if (nf_flow_tuple_ip(ctx, skb, &tuple) < 0)
+ return NULL;
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+static int nf_flow_offload_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
+{
+ enum flow_offload_tuple_dir dir;
+ struct flow_offload *flow;
+ unsigned int thoff, mtu;
+ struct iphdr *iph;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- iph = (struct iphdr *)(skb_network_header(skb) + offset);
- thoff = (iph->ihl * 4) + offset;
+ iph = (struct iphdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = (iph->ihl * 4) + ctx->offset;
if (nf_flow_state_check(flow, iph->protocol, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
- thoff -= offset;
+ thoff -= ctx->offset;
iph = ip_hdr(skb);
nf_flow_nat_ip(flow, skb, thoff, dir, iph);
@@ -398,6 +402,35 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+unsigned int
+nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rtable *rt;
+ __be32 nexthop;
+ int ret;
+
+ tuplehash = nf_flow_offload_lookup(&ctx, flow_table, skb);
+ if (!tuplehash)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rtable *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet_skb_parm));
@@ -406,6 +439,9 @@ nf_flow_offload_ip_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rtable *)tuplehash->tuple.dst_cache;
@@ -535,32 +571,31 @@ static void nf_flow_nat_ipv6(const struct flow_offload *flow,
}
}
-static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
- struct flow_offload_tuple *tuple, u32 *hdrsize,
- u32 offset)
+static int nf_flow_tuple_ipv6(struct nf_flowtable_ctx *ctx, struct sk_buff *skb,
+ struct flow_offload_tuple *tuple)
{
struct flow_ports *ports;
struct ipv6hdr *ip6h;
unsigned int thoff;
u8 nexthdr;
- thoff = sizeof(*ip6h) + offset;
+ thoff = sizeof(*ip6h) + ctx->offset;
if (!pskb_may_pull(skb, thoff))
return -1;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
nexthdr = ip6h->nexthdr;
switch (nexthdr) {
case IPPROTO_TCP:
- *hdrsize = sizeof(struct tcphdr);
+ ctx->hdrsize = sizeof(struct tcphdr);
break;
case IPPROTO_UDP:
- *hdrsize = sizeof(struct udphdr);
+ ctx->hdrsize = sizeof(struct udphdr);
break;
#ifdef CONFIG_NF_CT_PROTO_GRE
case IPPROTO_GRE:
- *hdrsize = sizeof(struct gre_base_hdr);
+ ctx->hdrsize = sizeof(struct gre_base_hdr);
break;
#endif
default:
@@ -570,7 +605,7 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
if (ip6h->hop_limit <= 1)
return -1;
- if (!pskb_may_pull(skb, thoff + *hdrsize))
+ if (!pskb_may_pull(skb, thoff + ctx->hdrsize))
return -1;
switch (nexthdr) {
@@ -590,67 +625,49 @@ static int nf_flow_tuple_ipv6(struct sk_buff *skb, const struct net_device *dev,
}
}
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
tuple->src_v6 = ip6h->saddr;
tuple->dst_v6 = ip6h->daddr;
tuple->l3proto = AF_INET6;
tuple->l4proto = nexthdr;
- tuple->iifidx = dev->ifindex;
+ tuple->iifidx = ctx->in->ifindex;
nf_flow_tuple_encap(skb, tuple);
return 0;
}
-unsigned int
-nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
- const struct nf_hook_state *state)
+static int nf_flow_offload_ipv6_forward(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct flow_offload_tuple_rhash *tuplehash,
+ struct sk_buff *skb)
{
- struct flow_offload_tuple_rhash *tuplehash;
- struct nf_flowtable *flow_table = priv;
- struct flow_offload_tuple tuple = {};
enum flow_offload_tuple_dir dir;
- const struct in6_addr *nexthop;
struct flow_offload *flow;
- struct net_device *outdev;
unsigned int thoff, mtu;
- u32 hdrsize, offset = 0;
struct ipv6hdr *ip6h;
- struct rt6_info *rt;
- int ret;
-
- if (skb->protocol != htons(ETH_P_IPV6) &&
- !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &offset))
- return NF_ACCEPT;
-
- if (nf_flow_tuple_ipv6(skb, state->in, &tuple, &hdrsize, offset) < 0)
- return NF_ACCEPT;
-
- tuplehash = flow_offload_lookup(flow_table, &tuple);
- if (tuplehash == NULL)
- return NF_ACCEPT;
dir = tuplehash->tuple.dir;
flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
- mtu = flow->tuplehash[dir].tuple.mtu + offset;
+ mtu = flow->tuplehash[dir].tuple.mtu + ctx->offset;
if (unlikely(nf_flow_exceeds_mtu(skb, mtu)))
- return NF_ACCEPT;
+ return 0;
- ip6h = (struct ipv6hdr *)(skb_network_header(skb) + offset);
- thoff = sizeof(*ip6h) + offset;
+ ip6h = (struct ipv6hdr *)(skb_network_header(skb) + ctx->offset);
+ thoff = sizeof(*ip6h) + ctx->offset;
if (nf_flow_state_check(flow, ip6h->nexthdr, skb, thoff))
- return NF_ACCEPT;
+ return 0;
if (!nf_flow_dst_check(&tuplehash->tuple)) {
flow_offload_teardown(flow);
- return NF_ACCEPT;
+ return 0;
}
- if (skb_try_make_writable(skb, thoff + hdrsize))
- return NF_DROP;
+ if (skb_try_make_writable(skb, thoff + ctx->hdrsize))
+ return -1;
- flow_offload_refresh(flow_table, flow);
+ flow_offload_refresh(flow_table, flow, false);
nf_flow_encap_pop(skb, tuplehash);
@@ -663,6 +680,52 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
if (flow_table->flags & NF_FLOWTABLE_COUNTER)
nf_ct_acct_update(flow->ct, tuplehash->tuple.dir, skb->len);
+ return 1;
+}
+
+static struct flow_offload_tuple_rhash *
+nf_flow_offload_ipv6_lookup(struct nf_flowtable_ctx *ctx,
+ struct nf_flowtable *flow_table,
+ struct sk_buff *skb)
+{
+ struct flow_offload_tuple tuple = {};
+
+ if (skb->protocol != htons(ETH_P_IPV6) &&
+ !nf_flow_skb_encap_protocol(skb, htons(ETH_P_IPV6), &ctx->offset))
+ return NULL;
+
+ if (nf_flow_tuple_ipv6(ctx, skb, &tuple) < 0)
+ return NULL;
+
+ return flow_offload_lookup(flow_table, &tuple);
+}
+
+unsigned int
+nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
+ const struct nf_hook_state *state)
+{
+ struct flow_offload_tuple_rhash *tuplehash;
+ struct nf_flowtable *flow_table = priv;
+ enum flow_offload_tuple_dir dir;
+ struct nf_flowtable_ctx ctx = {
+ .in = state->in,
+ };
+ const struct in6_addr *nexthop;
+ struct flow_offload *flow;
+ struct net_device *outdev;
+ struct rt6_info *rt;
+ int ret;
+
+ tuplehash = nf_flow_offload_ipv6_lookup(&ctx, flow_table, skb);
+ if (tuplehash == NULL)
+ return NF_ACCEPT;
+
+ ret = nf_flow_offload_ipv6_forward(&ctx, flow_table, tuplehash, skb);
+ if (ret < 0)
+ return NF_DROP;
+ else if (ret == 0)
+ return NF_ACCEPT;
+
if (unlikely(tuplehash->tuple.xmit_type == FLOW_OFFLOAD_XMIT_XFRM)) {
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
memset(skb->cb, 0, sizeof(struct inet6_skb_parm));
@@ -671,6 +734,9 @@ nf_flow_offload_ipv6_hook(void *priv, struct sk_buff *skb,
return nf_flow_xmit_xfrm(skb, state, &rt->dst);
}
+ dir = tuplehash->tuple.dir;
+ flow = container_of(tuplehash, struct flow_offload, tuplehash[dir]);
+
switch (tuplehash->tuple.xmit_type) {
case FLOW_OFFLOAD_XMIT_NEIGH:
rt = (struct rt6_info *)tuplehash->tuple.dst_cache;
diff --git a/net/netfilter/nf_nat_core.c b/net/netfilter/nf_nat_core.c
index ce829d434f13..fadbd4ed3dc0 100644
--- a/net/netfilter/nf_nat_core.c
+++ b/net/netfilter/nf_nat_core.c
@@ -27,6 +27,9 @@
#include "nf_internals.h"
+#define NF_NAT_MAX_ATTEMPTS 128
+#define NF_NAT_HARDER_THRESH (NF_NAT_MAX_ATTEMPTS / 4)
+
static spinlock_t nf_nat_locks[CONNTRACK_LOCKS];
static DEFINE_MUTEX(nf_nat_proto_mutex);
@@ -197,6 +200,88 @@ nf_nat_used_tuple(const struct nf_conntrack_tuple *tuple,
return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
}
+static bool nf_nat_may_kill(struct nf_conn *ct, unsigned long flags)
+{
+ static const unsigned long flags_refuse = IPS_FIXED_TIMEOUT |
+ IPS_DYING;
+ static const unsigned long flags_needed = IPS_SRC_NAT;
+ enum tcp_conntrack old_state;
+
+ old_state = READ_ONCE(ct->proto.tcp.state);
+ if (old_state < TCP_CONNTRACK_TIME_WAIT)
+ return false;
+
+ if (flags & flags_refuse)
+ return false;
+
+ return (flags & flags_needed) == flags_needed;
+}
+
+/* reverse direction will send packets to new source, so
+ * make sure such packets are invalid.
+ */
+static bool nf_seq_has_advanced(const struct nf_conn *old, const struct nf_conn *new)
+{
+ return (__s32)(new->proto.tcp.seen[0].td_end -
+ old->proto.tcp.seen[0].td_end) > 0;
+}
+
+static int
+nf_nat_used_tuple_harder(const struct nf_conntrack_tuple *tuple,
+ const struct nf_conn *ignored_conntrack,
+ unsigned int attempts_left)
+{
+ static const unsigned long flags_offload = IPS_OFFLOAD | IPS_HW_OFFLOAD;
+ struct nf_conntrack_tuple_hash *thash;
+ const struct nf_conntrack_zone *zone;
+ struct nf_conntrack_tuple reply;
+ unsigned long flags;
+ struct nf_conn *ct;
+ bool taken = true;
+ struct net *net;
+
+ nf_ct_invert_tuple(&reply, tuple);
+
+ if (attempts_left > NF_NAT_HARDER_THRESH ||
+ tuple->dst.protonum != IPPROTO_TCP ||
+ ignored_conntrack->proto.tcp.state != TCP_CONNTRACK_SYN_SENT)
+ return nf_conntrack_tuple_taken(&reply, ignored_conntrack);
+
+ /* :ast few attempts to find a free tcp port. Destructive
+ * action: evict colliding if its in timewait state and the
+ * tcp sequence number has advanced past the one used by the
+ * old entry.
+ */
+ net = nf_ct_net(ignored_conntrack);
+ zone = nf_ct_zone(ignored_conntrack);
+
+ thash = nf_conntrack_find_get(net, zone, &reply);
+ if (!thash)
+ return false;
+
+ ct = nf_ct_tuplehash_to_ctrack(thash);
+
+ if (thash->tuple.dst.dir == IP_CT_DIR_ORIGINAL)
+ goto out;
+
+ if (WARN_ON_ONCE(ct == ignored_conntrack))
+ goto out;
+
+ flags = READ_ONCE(ct->status);
+ if (!nf_nat_may_kill(ct, flags))
+ goto out;
+
+ if (!nf_seq_has_advanced(ct, ignored_conntrack))
+ goto out;
+
+ /* Even if we can evict do not reuse if entry is offloaded. */
+ if (nf_ct_kill(ct))
+ taken = flags & flags_offload;
+out:
+ nf_ct_put(ct);
+ return taken;
+}
+
static bool nf_nat_inet_in_range(const struct nf_conntrack_tuple *t,
const struct nf_nat_range2 *range)
{
@@ -385,7 +470,6 @@ static void nf_nat_l4proto_unique_tuple(struct nf_conntrack_tuple *tuple,
unsigned int range_size, min, max, i, attempts;
__be16 *keyptr;
u16 off;
- static const unsigned int max_attempts = 128;
switch (tuple->dst.protonum) {
case IPPROTO_ICMP:
@@ -471,8 +555,8 @@ find_free_id:
off = get_random_u16();
attempts = range_size;
- if (attempts > max_attempts)
- attempts = max_attempts;
+ if (attempts > NF_NAT_MAX_ATTEMPTS)
+ attempts = NF_NAT_MAX_ATTEMPTS;
/* We are in softirq; doing a search of the entire range risks
* soft lockup when all tuples are already used.
@@ -483,7 +567,7 @@ find_free_id:
another_round:
for (i = 0; i < attempts; i++, off++) {
*keyptr = htons(min + off % range_size);
- if (!nf_nat_used_tuple(tuple, ct))
+ if (!nf_nat_used_tuple_harder(tuple, ct, attempts - i))
return;
}
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index dc5675962de4..237f739da3ca 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -151,6 +151,7 @@ static struct nft_trans *nft_trans_alloc_gfp(const struct nft_ctx *ctx,
return NULL;
INIT_LIST_HEAD(&trans->list);
+ INIT_LIST_HEAD(&trans->binding_list);
trans->msg_type = msg_type;
trans->ctx = *ctx;
@@ -163,13 +164,20 @@ static struct nft_trans *nft_trans_alloc(const struct nft_ctx *ctx,
return nft_trans_alloc_gfp(ctx, msg_type, size, GFP_KERNEL);
}
-static void nft_trans_destroy(struct nft_trans *trans)
+static void nft_trans_list_del(struct nft_trans *trans)
{
list_del(&trans->list);
+ list_del(&trans->binding_list);
+}
+
+static void nft_trans_destroy(struct nft_trans *trans)
+{
+ nft_trans_list_del(trans);
kfree(trans);
}
-static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+static void __nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set,
+ bool bind)
{
struct nftables_pernet *nft_net;
struct net *net = ctx->net;
@@ -183,16 +191,82 @@ static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
switch (trans->msg_type) {
case NFT_MSG_NEWSET:
if (nft_trans_set(trans) == set)
- nft_trans_set_bound(trans) = true;
+ nft_trans_set_bound(trans) = bind;
break;
case NFT_MSG_NEWSETELEM:
if (nft_trans_elem_set(trans) == set)
- nft_trans_elem_set_bound(trans) = true;
+ nft_trans_elem_set_bound(trans) = bind;
+ break;
+ }
+ }
+}
+
+static void nft_set_trans_bind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, true);
+}
+
+static void nft_set_trans_unbind(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ return __nft_set_trans_bind(ctx, set, false);
+}
+
+static void __nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain, bool bind)
+{
+ struct nftables_pernet *nft_net;
+ struct net *net = ctx->net;
+ struct nft_trans *trans;
+
+ if (!nft_chain_binding(chain))
+ return;
+
+ nft_net = nft_pernet(net);
+ list_for_each_entry_reverse(trans, &nft_net->commit_list, list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWCHAIN:
+ if (nft_trans_chain(trans) == chain)
+ nft_trans_chain_bound(trans) = bind;
+ break;
+ case NFT_MSG_NEWRULE:
+ if (trans->ctx.chain == chain)
+ nft_trans_rule_bound(trans) = bind;
break;
}
}
}
+static void nft_chain_trans_bind(const struct nft_ctx *ctx,
+ struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, true);
+}
+
+int nf_tables_bind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ if (!nft_chain_binding(chain))
+ return 0;
+
+ if (nft_chain_binding(ctx->chain))
+ return -EOPNOTSUPP;
+
+ if (chain->bound)
+ return -EBUSY;
+
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
+
+ chain->bound = true;
+ nft_chain_trans_bind(ctx, chain);
+
+ return 0;
+}
+
+void nf_tables_unbind_chain(const struct nft_ctx *ctx, struct nft_chain *chain)
+{
+ __nft_chain_trans_bind(ctx, chain, false);
+}
+
static int nft_netdev_register_hooks(struct net *net,
struct list_head *hook_list)
{
@@ -292,6 +366,19 @@ static void nft_trans_commit_list_add_tail(struct net *net, struct nft_trans *tr
{
struct nftables_pernet *nft_net = nft_pernet(net);
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)))
+ list_add_tail(&trans->binding_list, &nft_net->binding_list);
+ break;
+ }
+
list_add_tail(&trans->list, &nft_net->commit_list);
}
@@ -338,8 +425,9 @@ static struct nft_trans *nft_trans_chain_add(struct nft_ctx *ctx, int msg_type)
ntohl(nla_get_be32(ctx->nla[NFTA_CHAIN_ID]));
}
}
-
+ nft_trans_chain(trans) = ctx->chain;
nft_trans_commit_list_add_tail(ctx->net, trans);
+
return trans;
}
@@ -351,14 +439,13 @@ static int nft_delchain(struct nft_ctx *ctx)
if (IS_ERR(trans))
return PTR_ERR(trans);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nft_deactivate_next(ctx->net, ctx->chain);
return 0;
}
-static void nft_rule_expr_activate(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nft_rule_expr_activate(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr;
@@ -371,9 +458,8 @@ static void nft_rule_expr_activate(const struct nft_ctx *ctx,
}
}
-static void nft_rule_expr_deactivate(const struct nft_ctx *ctx,
- struct nft_rule *rule,
- enum nft_trans_phase phase)
+void nft_rule_expr_deactivate(const struct nft_ctx *ctx, struct nft_rule *rule,
+ enum nft_trans_phase phase)
{
struct nft_expr *expr;
@@ -392,7 +478,7 @@ nf_tables_delrule_deactivate(struct nft_ctx *ctx, struct nft_rule *rule)
/* You cannot delete the same rule twice */
if (nft_is_active_next(ctx->net, rule)) {
nft_deactivate_next(ctx->net, rule);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
return 0;
}
return -ENOENT;
@@ -483,6 +569,7 @@ static int __nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
nft_trans_set_update(trans) = true;
nft_trans_set_gc_int(trans) = desc->gc_int;
nft_trans_set_timeout(trans) = desc->timeout;
+ nft_trans_set_size(trans) = desc->size;
}
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -495,6 +582,58 @@ static int nft_trans_set_add(const struct nft_ctx *ctx, int msg_type,
return __nft_trans_set_add(ctx, msg_type, set, NULL);
}
+static void nft_setelem_data_deactivate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_deactivate(ctx->net, set, elem);
+
+ return 0;
+}
+
+struct nft_set_elem_catchall {
+ struct list_head list;
+ struct rcu_head rcu;
+ void *elem;
+};
+
+static void nft_map_catchall_deactivate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_deactivate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_deactivate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_deactivate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_deactivate(ctx, set);
+}
+
static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
{
int err;
@@ -503,8 +642,11 @@ static int nft_delset(const struct nft_ctx *ctx, struct nft_set *set)
if (err < 0)
return err;
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
nft_deactivate_next(ctx->net, set);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -536,7 +678,7 @@ static int nft_delobj(struct nft_ctx *ctx, struct nft_object *obj)
return err;
nft_deactivate_next(ctx->net, obj);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -571,7 +713,7 @@ static int nft_delflowtable(struct nft_ctx *ctx,
return err;
nft_deactivate_next(ctx->net, flowtable);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
return err;
}
@@ -1600,6 +1742,8 @@ static int nft_dump_basechain_hook(struct sk_buff *skb, int family,
if (nft_base_chain_netdev(family, ops->hooknum)) {
nest_devs = nla_nest_start_noflag(skb, NFTA_HOOK_DEVS);
+ if (!nest_devs)
+ goto nla_put_failure;
if (!hook_list)
hook_list = &basechain->hook_list;
@@ -2224,7 +2368,7 @@ static int nft_basechain_init(struct nft_base_chain *basechain, u8 family,
return 0;
}
-static int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
+int nft_chain_add(struct nft_table *table, struct nft_chain *chain)
{
int err;
@@ -2254,9 +2398,6 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
struct nft_chain *chain;
int err;
- if (table->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_CHAIN_HOOK]) {
struct nft_stats __percpu *stats = NULL;
struct nft_chain_hook hook = {};
@@ -2352,6 +2493,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
if (err < 0)
goto err_destroy_chain;
+ if (!nft_use_inc(&table->use)) {
+ err = -EMFILE;
+ goto err_use;
+ }
+
trans = nft_trans_chain_add(ctx, NFT_MSG_NEWCHAIN);
if (IS_ERR(trans)) {
err = PTR_ERR(trans);
@@ -2368,10 +2514,11 @@ static int nf_tables_addchain(struct nft_ctx *ctx, u8 family, u8 genmask,
goto err_unregister_hook;
}
- table->use++;
-
return 0;
+
err_unregister_hook:
+ nft_use_dec_restore(&table->use);
+err_use:
nf_tables_unregister_hook(net, table, chain);
err_destroy_chain:
nf_tables_chain_destroy(ctx);
@@ -2526,6 +2673,8 @@ static int nf_tables_updchain(struct nft_ctx *ctx, u8 genmask, u8 policy,
nft_trans_basechain(trans) = basechain;
INIT_LIST_HEAD(&nft_trans_chain_hooks(trans));
list_splice(&hook.list, &nft_trans_chain_hooks(trans));
+ if (nla[NFTA_CHAIN_HOOK])
+ module_put(hook.type->owner);
nft_trans_commit_list_add_tail(ctx->net, trans);
@@ -2550,7 +2699,7 @@ err_hooks:
static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
const struct nft_table *table,
- const struct nlattr *nla)
+ const struct nlattr *nla, u8 genmask)
{
struct nftables_pernet *nft_net = nft_pernet(net);
u32 id = ntohl(nla_get_be32(nla));
@@ -2561,7 +2710,8 @@ static struct nft_chain *nft_chain_lookup_byid(const struct net *net,
if (trans->msg_type == NFT_MSG_NEWCHAIN &&
chain->table == table &&
- id == nft_trans_chain_id(trans))
+ id == nft_trans_chain_id(trans) &&
+ nft_active_genmask(chain, genmask))
return chain;
}
return ERR_PTR(-ENOENT);
@@ -2668,21 +2818,18 @@ static int nf_tables_newchain(struct sk_buff *skb, const struct nfnl_info *info,
return nf_tables_addchain(&ctx, family, genmask, policy, flags, extack);
}
-static int nft_delchain_hook(struct nft_ctx *ctx, struct nft_chain *chain,
+static int nft_delchain_hook(struct nft_ctx *ctx,
+ struct nft_base_chain *basechain,
struct netlink_ext_ack *extack)
{
+ const struct nft_chain *chain = &basechain->chain;
const struct nlattr * const *nla = ctx->nla;
struct nft_chain_hook chain_hook = {};
- struct nft_base_chain *basechain;
struct nft_hook *this, *hook;
LIST_HEAD(chain_del_list);
struct nft_trans *trans;
int err;
- if (!nft_is_base_chain(chain))
- return -EOPNOTSUPP;
-
- basechain = nft_base_chain(chain);
err = nft_chain_parse_hook(ctx->net, basechain, nla, &chain_hook,
ctx->family, chain->flags, extack);
if (err < 0)
@@ -2767,7 +2914,12 @@ static int nf_tables_delchain(struct sk_buff *skb, const struct nfnl_info *info,
if (chain->flags & NFT_CHAIN_HW_OFFLOAD)
return -EOPNOTSUPP;
- return nft_delchain_hook(&ctx, chain, extack);
+ if (nft_is_base_chain(chain)) {
+ struct nft_base_chain *basechain = nft_base_chain(chain);
+
+ if (nft_base_chain_netdev(table->family, basechain->ops.hooknum))
+ return nft_delchain_hook(&ctx, basechain, extack);
+ }
}
if (info->nlh->nlmsg_flags & NLM_F_NONREC &&
@@ -3488,8 +3640,7 @@ err_fill_rule_info:
return err;
}
-static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
- struct nft_rule *rule)
+void nf_tables_rule_destroy(const struct nft_ctx *ctx, struct nft_rule *rule)
{
struct nft_expr *expr, *next;
@@ -3506,7 +3657,7 @@ static void nf_tables_rule_destroy(const struct nft_ctx *ctx,
kfree(rule);
}
-void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
+static void nf_tables_rule_release(const struct nft_ctx *ctx, struct nft_rule *rule)
{
nft_rule_expr_deactivate(ctx, rule, NFT_TRANS_RELEASE);
nf_tables_rule_destroy(ctx, rule);
@@ -3594,12 +3745,6 @@ int nft_setelem_validate(const struct nft_ctx *ctx, struct nft_set *set,
return 0;
}
-struct nft_set_elem_catchall {
- struct list_head list;
- struct rcu_head rcu;
- void *elem;
-};
-
int nft_set_catchall_validate(const struct nft_ctx *ctx, struct nft_set *set)
{
u8 genmask = nft_genmask_next(ctx->net);
@@ -3670,7 +3815,8 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EOPNOTSUPP;
} else if (nla[NFTA_RULE_CHAIN_ID]) {
- chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID]);
+ chain = nft_chain_lookup_byid(net, table, nla[NFTA_RULE_CHAIN_ID],
+ genmask);
if (IS_ERR(chain)) {
NL_SET_BAD_ATTR(extack, nla[NFTA_RULE_CHAIN_ID]);
return PTR_ERR(chain);
@@ -3701,9 +3847,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return -EINVAL;
handle = nf_tables_alloc_handle(table);
- if (chain->use == UINT_MAX)
- return -EOVERFLOW;
-
if (nla[NFTA_RULE_POSITION]) {
pos_handle = be64_to_cpu(nla_get_be64(nla[NFTA_RULE_POSITION]));
old_rule = __nft_rule_lookup(chain, pos_handle);
@@ -3797,6 +3940,11 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
+ if (!nft_use_inc(&chain->use)) {
+ err = -EMFILE;
+ goto err_release_rule;
+ }
+
if (info->nlh->nlmsg_flags & NLM_F_REPLACE) {
err = nft_delrule(&ctx, old_rule);
if (err < 0)
@@ -3828,7 +3976,6 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
}
}
kvfree(expr_info);
- chain->use++;
if (flow)
nft_trans_flow_rule(trans) = flow;
@@ -3839,10 +3986,12 @@ static int nf_tables_newrule(struct sk_buff *skb, const struct nfnl_info *info,
return 0;
err_destroy_flow_rule:
+ nft_use_dec_restore(&chain->use);
if (flow)
nft_flow_rule_destroy(flow);
err_release_rule:
- nf_tables_rule_release(&ctx, rule);
+ nft_rule_expr_deactivate(&ctx, rule, NFT_TRANS_PREPARE_ERROR);
+ nf_tables_rule_destroy(&ctx, rule);
err_release_expr:
for (i = 0; i < n; i++) {
if (expr_info[i].ops) {
@@ -4774,6 +4923,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
err = nf_msecs_to_jiffies64(nla[NFTA_SET_TIMEOUT], &desc.timeout);
if (err)
return err;
@@ -4782,6 +4934,10 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (nla[NFTA_SET_GC_INTERVAL] != NULL) {
if (!(flags & NFT_SET_TIMEOUT))
return -EINVAL;
+
+ if (flags & NFT_SET_ANONYMOUS)
+ return -EOPNOTSUPP;
+
desc.gc_int = ntohl(nla_get_be32(nla[NFTA_SET_GC_INTERVAL]));
}
@@ -4828,6 +4984,9 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
if (info->nlh->nlmsg_flags & NLM_F_REPLACE)
return -EOPNOTSUPP;
+ if (nft_set_is_anonymous(set))
+ return -EOPNOTSUPP;
+
err = nft_set_expr_alloc(&ctx, set, nla, exprs, &num_exprs, flags);
if (err < 0)
return err;
@@ -4864,9 +5023,15 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
alloc_size = sizeof(*set) + size + udlen;
if (alloc_size < size || alloc_size > INT_MAX)
return -ENOMEM;
+
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
set = kvzalloc(alloc_size, GFP_KERNEL_ACCOUNT);
- if (!set)
- return -ENOMEM;
+ if (!set) {
+ err = -ENOMEM;
+ goto err_alloc;
+ }
name = nla_strdup(nla[NFTA_SET_NAME], GFP_KERNEL_ACCOUNT);
if (!name) {
@@ -4917,24 +5082,28 @@ static int nf_tables_newset(struct sk_buff *skb, const struct nfnl_info *info,
set->num_exprs = num_exprs;
set->handle = nf_tables_alloc_handle(table);
+ INIT_LIST_HEAD(&set->pending_update);
err = nft_trans_set_add(&ctx, NFT_MSG_NEWSET, set);
if (err < 0)
goto err_set_expr_alloc;
list_add_tail_rcu(&set->list, &table->sets);
- table->use++;
+
return 0;
err_set_expr_alloc:
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(&ctx, set->exprs[i]);
err_set_destroy:
- ops->destroy(set);
+ ops->destroy(&ctx, set);
err_set_init:
kfree(set->name);
err_set_name:
kvfree(set);
+err_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -4945,7 +5114,7 @@ static void nft_set_catchall_destroy(const struct nft_ctx *ctx,
list_for_each_entry_safe(catchall, next, &set->catchall_list, list) {
list_del_rcu(&catchall->list);
- nft_set_elem_destroy(set, catchall->elem, true);
+ nf_tables_set_elem_destroy(ctx, set, catchall->elem);
kfree_rcu(catchall, rcu);
}
}
@@ -4960,7 +5129,7 @@ static void nft_set_destroy(const struct nft_ctx *ctx, struct nft_set *set)
for (i = 0; i < set->num_exprs; i++)
nft_expr_destroy(ctx, set->exprs[i]);
- set->ops->destroy(set);
+ set->ops->destroy(ctx, set);
nft_set_catchall_destroy(ctx, set);
kfree(set->name);
kvfree(set);
@@ -5073,9 +5242,6 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_binding *i;
struct nft_set_iter iter;
- if (set->use == UINT_MAX)
- return -EOVERFLOW;
-
if (!list_empty(&set->bindings) && nft_set_is_anonymous(set))
return -EBUSY;
@@ -5103,10 +5269,12 @@ int nf_tables_bind_set(const struct nft_ctx *ctx, struct nft_set *set,
return iter.err;
}
bind:
+ if (!nft_use_inc(&set->use))
+ return -EMFILE;
+
binding->chain = ctx->chain;
list_add_tail_rcu(&binding->list, &set->bindings);
nft_set_trans_bind(ctx, set);
- set->use++;
return 0;
}
@@ -5125,12 +5293,62 @@ static void nf_tables_unbind_set(const struct nft_ctx *ctx, struct nft_set *set,
}
}
+static void nft_setelem_data_activate(const struct net *net,
+ const struct nft_set *set,
+ struct nft_set_elem *elem);
+
+static int nft_mapelem_activate(const struct nft_ctx *ctx,
+ struct nft_set *set,
+ const struct nft_set_iter *iter,
+ struct nft_set_elem *elem)
+{
+ nft_setelem_data_activate(ctx->net, set, elem);
+
+ return 0;
+}
+
+static void nft_map_catchall_activate(const struct nft_ctx *ctx,
+ struct nft_set *set)
+{
+ u8 genmask = nft_genmask_next(ctx->net);
+ struct nft_set_elem_catchall *catchall;
+ struct nft_set_elem elem;
+ struct nft_set_ext *ext;
+
+ list_for_each_entry(catchall, &set->catchall_list, list) {
+ ext = nft_set_elem_ext(set, catchall->elem);
+ if (!nft_set_elem_active(ext, genmask))
+ continue;
+
+ elem.priv = catchall->elem;
+ nft_setelem_data_activate(ctx->net, set, &elem);
+ break;
+ }
+}
+
+static void nft_map_activate(const struct nft_ctx *ctx, struct nft_set *set)
+{
+ struct nft_set_iter iter = {
+ .genmask = nft_genmask_next(ctx->net),
+ .fn = nft_mapelem_activate,
+ };
+
+ set->ops->walk(ctx, set, &iter);
+ WARN_ON_ONCE(iter.err);
+
+ nft_map_catchall_activate(ctx, set);
+}
+
void nf_tables_activate_set(const struct nft_ctx *ctx, struct nft_set *set)
{
- if (nft_set_is_anonymous(set))
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(ctx, set);
+
nft_clear(ctx->net, set);
+ }
- set->use++;
+ nft_use_inc_restore(&set->use);
}
EXPORT_SYMBOL_GPL(nf_tables_activate_set);
@@ -5139,15 +5357,31 @@ void nf_tables_deactivate_set(const struct nft_ctx *ctx, struct nft_set *set,
enum nft_trans_phase phase)
{
switch (phase) {
- case NFT_TRANS_PREPARE:
+ case NFT_TRANS_PREPARE_ERROR:
+ nft_set_trans_unbind(ctx, set);
if (nft_set_is_anonymous(set))
nft_deactivate_next(ctx->net, set);
+ else
+ list_del_rcu(&binding->list);
- set->use--;
+ nft_use_dec(&set->use);
+ break;
+ case NFT_TRANS_PREPARE:
+ if (nft_set_is_anonymous(set)) {
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
+ nft_deactivate_next(ctx->net, set);
+ }
+ nft_use_dec(&set->use);
return;
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- set->use--;
+ if (nft_set_is_anonymous(set) &&
+ set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(ctx, set);
+
+ nft_use_dec(&set->use);
fallthrough;
default:
nf_tables_unbind_set(ctx, set, binding,
@@ -5228,7 +5462,8 @@ static const struct nla_policy nft_set_elem_list_policy[NFTA_SET_ELEM_LIST_MAX +
static int nft_set_elem_expr_dump(struct sk_buff *skb,
const struct nft_set *set,
- const struct nft_set_ext *ext)
+ const struct nft_set_ext *ext,
+ bool reset)
{
struct nft_set_elem_expr *elem_expr;
u32 size, num_exprs = 0;
@@ -5241,7 +5476,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb,
if (num_exprs == 1) {
expr = nft_setelem_expr_at(elem_expr, 0);
- if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, false) < 0)
+ if (nft_expr_dump(skb, NFTA_SET_ELEM_EXPR, expr, reset) < 0)
return -1;
return 0;
@@ -5252,7 +5487,7 @@ static int nft_set_elem_expr_dump(struct sk_buff *skb,
nft_setelem_expr_foreach(expr, elem_expr, size) {
expr = nft_setelem_expr_at(elem_expr, size);
- if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, false) < 0)
+ if (nft_expr_dump(skb, NFTA_LIST_ELEM, expr, reset) < 0)
goto nla_put_failure;
}
nla_nest_end(skb, nest);
@@ -5265,11 +5500,13 @@ nla_put_failure:
static int nf_tables_fill_setelem(struct sk_buff *skb,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_set_elem *elem,
+ bool reset)
{
const struct nft_set_ext *ext = nft_set_elem_ext(set, elem->priv);
unsigned char *b = skb_tail_pointer(skb);
struct nlattr *nest;
+ u64 timeout = 0;
nest = nla_nest_start_noflag(skb, NFTA_LIST_ELEM);
if (nest == NULL)
@@ -5292,7 +5529,7 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPRESSIONS) &&
- nft_set_elem_expr_dump(skb, set, ext))
+ nft_set_elem_expr_dump(skb, set, ext, reset))
goto nla_put_failure;
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF) &&
@@ -5305,11 +5542,15 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
htonl(*nft_set_ext_flags(ext))))
goto nla_put_failure;
- if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT) &&
- nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
- nf_jiffies64_to_msecs(*nft_set_ext_timeout(ext)),
- NFTA_SET_ELEM_PAD))
- goto nla_put_failure;
+ if (nft_set_ext_exists(ext, NFT_SET_EXT_TIMEOUT)) {
+ timeout = *nft_set_ext_timeout(ext);
+ if (nla_put_be64(skb, NFTA_SET_ELEM_TIMEOUT,
+ nf_jiffies64_to_msecs(timeout),
+ NFTA_SET_ELEM_PAD))
+ goto nla_put_failure;
+ } else if (set->flags & NFT_SET_TIMEOUT) {
+ timeout = READ_ONCE(set->timeout);
+ }
if (nft_set_ext_exists(ext, NFT_SET_EXT_EXPIRATION)) {
u64 expires, now = get_jiffies_64();
@@ -5324,6 +5565,9 @@ static int nf_tables_fill_setelem(struct sk_buff *skb,
nf_jiffies64_to_msecs(expires),
NFTA_SET_ELEM_PAD))
goto nla_put_failure;
+
+ if (reset)
+ *nft_set_ext_expiration(ext) = now + timeout;
}
if (nft_set_ext_exists(ext, NFT_SET_EXT_USERDATA)) {
@@ -5347,6 +5591,7 @@ struct nft_set_dump_args {
const struct netlink_callback *cb;
struct nft_set_iter iter;
struct sk_buff *skb;
+ bool reset;
};
static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
@@ -5357,7 +5602,7 @@ static int nf_tables_dump_setelem(const struct nft_ctx *ctx,
struct nft_set_dump_args *args;
args = container_of(iter, struct nft_set_dump_args, iter);
- return nf_tables_fill_setelem(args->skb, set, elem);
+ return nf_tables_fill_setelem(args->skb, set, elem, args->reset);
}
struct nft_set_dump_ctx {
@@ -5366,7 +5611,7 @@ struct nft_set_dump_ctx {
};
static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
- const struct nft_set *set)
+ const struct nft_set *set, bool reset)
{
struct nft_set_elem_catchall *catchall;
u8 genmask = nft_genmask_cur(net);
@@ -5381,7 +5626,7 @@ static int nft_set_catchall_dump(struct net *net, struct sk_buff *skb,
continue;
elem.priv = catchall->elem;
- ret = nf_tables_fill_setelem(skb, set, &elem);
+ ret = nf_tables_fill_setelem(skb, set, &elem, reset);
break;
}
@@ -5399,6 +5644,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
bool set_found = false;
struct nlmsghdr *nlh;
struct nlattr *nest;
+ bool reset = false;
u32 portid, seq;
int event;
@@ -5446,8 +5692,12 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
if (nest == NULL)
goto nla_put_failure;
+ if (NFNL_MSG_TYPE(cb->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
+ reset = true;
+
args.cb = cb;
args.skb = skb;
+ args.reset = reset;
args.iter.genmask = nft_genmask_cur(net);
args.iter.skip = cb->args[0];
args.iter.count = 0;
@@ -5456,7 +5706,7 @@ static int nf_tables_dump_set(struct sk_buff *skb, struct netlink_callback *cb)
set->ops->walk(&dump_ctx->ctx, set, &args.iter);
if (!args.iter.err && args.iter.count == cb->args[0])
- args.iter.err = nft_set_catchall_dump(net, skb, set);
+ args.iter.err = nft_set_catchall_dump(net, skb, set, reset);
rcu_read_unlock();
nla_nest_end(skb, nest);
@@ -5494,7 +5744,8 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
const struct nft_ctx *ctx, u32 seq,
u32 portid, int event, u16 flags,
const struct nft_set *set,
- const struct nft_set_elem *elem)
+ const struct nft_set_elem *elem,
+ bool reset)
{
struct nlmsghdr *nlh;
struct nlattr *nest;
@@ -5515,7 +5766,7 @@ static int nf_tables_fill_setelem_info(struct sk_buff *skb,
if (nest == NULL)
goto nla_put_failure;
- err = nf_tables_fill_setelem(skb, set, elem);
+ err = nf_tables_fill_setelem(skb, set, elem, reset);
if (err < 0)
goto nla_put_failure;
@@ -5621,7 +5872,7 @@ static int nft_setelem_get(struct nft_ctx *ctx, struct nft_set *set,
}
static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
- const struct nlattr *attr)
+ const struct nlattr *attr, bool reset)
{
struct nlattr *nla[NFTA_SET_ELEM_MAX + 1];
struct nft_set_elem elem;
@@ -5665,7 +5916,8 @@ static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
return err;
err = nf_tables_fill_setelem_info(skb, ctx, ctx->seq, ctx->portid,
- NFT_MSG_NEWSETELEM, 0, set, &elem);
+ NFT_MSG_NEWSETELEM, 0, set, &elem,
+ reset);
if (err < 0)
goto err_fill_setelem;
@@ -5689,6 +5941,7 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
struct nft_set *set;
struct nlattr *attr;
struct nft_ctx ctx;
+ bool reset = false;
int rem, err = 0;
table = nft_table_lookup(net, nla[NFTA_SET_ELEM_LIST_TABLE], family,
@@ -5723,8 +5976,11 @@ static int nf_tables_getsetelem(struct sk_buff *skb,
if (!nla[NFTA_SET_ELEM_LIST_ELEMENTS])
return -EINVAL;
+ if (NFNL_MSG_TYPE(info->nlh->nlmsg_type) == NFT_MSG_GETSETELEM_RESET)
+ reset = true;
+
nla_for_each_nested(attr, nla[NFTA_SET_ELEM_LIST_ELEMENTS], rem) {
- err = nft_get_set_elem(&ctx, set, attr);
+ err = nft_get_set_elem(&ctx, set, attr, reset);
if (err < 0) {
NL_SET_BAD_ATTR(extack, attr);
break;
@@ -5757,7 +6013,7 @@ static void nf_tables_setelem_notify(const struct nft_ctx *ctx,
flags |= ctx->flags & (NLM_F_CREATE | NLM_F_EXCL);
err = nf_tables_fill_setelem_info(skb, ctx, 0, portid, event, flags,
- set, elem);
+ set, elem, false);
if (err < 0) {
kfree_skb(skb);
goto err;
@@ -5899,6 +6155,7 @@ static void nft_set_elem_expr_destroy(const struct nft_ctx *ctx,
__nft_set_elem_expr_destroy(ctx, expr);
}
+/* Drop references and destroy. Called from gc, dynset and abort path. */
void nft_set_elem_destroy(const struct nft_set *set, void *elem,
bool destroy_expr)
{
@@ -5915,16 +6172,16 @@ void nft_set_elem_destroy(const struct nft_set *set, void *elem,
nft_set_elem_expr_destroy(&ctx, nft_set_ext_expr(ext));
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
kfree(elem);
}
EXPORT_SYMBOL_GPL(nft_set_elem_destroy);
-/* Only called from commit path, nft_setelem_data_deactivate() already deals
- * with the refcounting from the preparation phase.
+/* Destroy element. References have been already dropped in the preparation
+ * path via nft_setelem_data_deactivate().
*/
-static void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
- const struct nft_set *set, void *elem)
+void nf_tables_set_elem_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set, void *elem)
{
struct nft_set_ext *ext = nft_set_elem_ext(set, elem);
@@ -6417,8 +6674,16 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
set->objtype, genmask);
if (IS_ERR(obj)) {
err = PTR_ERR(obj);
+ obj = NULL;
goto err_parse_key_end;
}
+
+ if (!nft_use_inc(&obj->use)) {
+ err = -EMFILE;
+ obj = NULL;
+ goto err_parse_key_end;
+ }
+
err = nft_set_ext_add(&tmpl, NFT_SET_EXT_OBJREF);
if (err < 0)
goto err_parse_key_end;
@@ -6487,19 +6752,18 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
if (flags)
*nft_set_ext_flags(ext) = flags;
+ if (obj)
+ *nft_set_ext_obj(ext) = obj;
+
if (ulen > 0) {
if (nft_set_ext_check(&tmpl, NFT_SET_EXT_USERDATA, ulen) < 0) {
err = -EINVAL;
- goto err_elem_userdata;
+ goto err_elem_free;
}
udata = nft_set_ext_userdata(ext);
udata->len = ulen - 1;
nla_memcpy(&udata->data, nla[NFTA_SET_ELEM_USERDATA], ulen);
}
- if (obj) {
- *nft_set_ext_obj(ext) = obj;
- obj->use++;
- }
err = nft_set_elem_expr_setup(ctx, &tmpl, ext, expr_array, num_exprs);
if (err < 0)
goto err_elem_free;
@@ -6539,10 +6803,13 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
goto err_element_clash;
}
- if (!(flags & NFT_SET_ELEM_CATCHALL) && set->size &&
- !atomic_add_unless(&set->nelems, 1, set->size + set->ndeact)) {
- err = -ENFILE;
- goto err_set_full;
+ if (!(flags & NFT_SET_ELEM_CATCHALL)) {
+ unsigned int max = set->size ? set->size + set->ndeact : UINT_MAX;
+
+ if (!atomic_add_unless(&set->nelems, 1, max)) {
+ err = -ENFILE;
+ goto err_set_full;
+ }
}
nft_trans_elem(trans) = elem;
@@ -6554,14 +6821,14 @@ err_set_full:
err_element_clash:
kfree(trans);
err_elem_free:
- if (obj)
- obj->use--;
-err_elem_userdata:
nf_tables_set_elem_destroy(ctx, set, elem.priv);
err_parse_data:
if (nla[NFTA_SET_ELEM_DATA] != NULL)
nft_data_release(&elem.data.val, desc.type);
err_parse_key_end:
+ if (obj)
+ nft_use_dec_restore(&obj->use);
+
nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
err_parse_key:
nft_data_release(&elem.key.val, NFT_DATA_VALUE);
@@ -6601,7 +6868,8 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -6634,23 +6902,13 @@ static int nf_tables_newsetelem(struct sk_buff *skb,
void nft_data_hold(const struct nft_data *data, enum nft_data_types type)
{
struct nft_chain *chain;
- struct nft_rule *rule;
if (type == NFT_DATA_VERDICT) {
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use++;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use++;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use++;
-
- nft_chain_add(chain->table, chain);
+ nft_use_inc_restore(&chain->use);
break;
}
}
@@ -6665,7 +6923,7 @@ static void nft_setelem_data_activate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_hold(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use++;
+ nft_use_inc_restore(&(*nft_set_ext_obj(ext))->use);
}
static void nft_setelem_data_deactivate(const struct net *net,
@@ -6677,7 +6935,7 @@ static void nft_setelem_data_deactivate(const struct net *net,
if (nft_set_ext_exists(ext, NFT_SET_EXT_DATA))
nft_data_release(nft_set_ext_data(ext), set->dtype);
if (nft_set_ext_exists(ext, NFT_SET_EXT_OBJREF))
- (*nft_set_ext_obj(ext))->use--;
+ nft_use_dec(&(*nft_set_ext_obj(ext))->use);
}
static int nft_del_setelem(struct nft_ctx *ctx, struct nft_set *set,
@@ -6885,7 +7143,9 @@ static int nf_tables_delsetelem(struct sk_buff *skb,
set = nft_set_lookup(table, nla[NFTA_SET_ELEM_LIST_SET], genmask);
if (IS_ERR(set))
return PTR_ERR(set);
- if (!list_empty(&set->bindings) && set->flags & NFT_SET_CONSTANT)
+
+ if (!list_empty(&set->bindings) &&
+ (set->flags & (NFT_SET_CONSTANT | NFT_SET_ANONYMOUS)))
return -EBUSY;
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
@@ -7218,9 +7478,14 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
type = nft_obj_type_get(net, objtype);
- if (IS_ERR(type))
- return PTR_ERR(type);
+ if (IS_ERR(type)) {
+ err = PTR_ERR(type);
+ goto err_type;
+ }
obj = nft_obj_init(&ctx, type, nla[NFTA_OBJ_DATA]);
if (IS_ERR(obj)) {
@@ -7254,7 +7519,7 @@ static int nf_tables_newobj(struct sk_buff *skb, const struct nfnl_info *info,
goto err_obj_ht;
list_add_tail_rcu(&obj->list, &table->objects);
- table->use++;
+
return 0;
err_obj_ht:
/* queued in transaction log */
@@ -7270,6 +7535,9 @@ err_strdup:
kfree(obj);
err_init:
module_put(type->owner);
+err_type:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -7667,10 +7935,11 @@ void nf_tables_deactivate_flowtable(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
case NFT_TRANS_PREPARE:
case NFT_TRANS_ABORT:
case NFT_TRANS_RELEASE:
- flowtable->use--;
+ nft_use_dec(&flowtable->use);
fallthrough;
default:
return;
@@ -8024,9 +8293,14 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
nft_ctx_init(&ctx, net, skb, info->nlh, family, table, NULL, nla);
+ if (!nft_use_inc(&table->use))
+ return -EMFILE;
+
flowtable = kzalloc(sizeof(*flowtable), GFP_KERNEL_ACCOUNT);
- if (!flowtable)
- return -ENOMEM;
+ if (!flowtable) {
+ err = -ENOMEM;
+ goto flowtable_alloc;
+ }
flowtable->table = table;
flowtable->handle = nf_tables_alloc_handle(table);
@@ -8081,7 +8355,6 @@ static int nf_tables_newflowtable(struct sk_buff *skb,
goto err5;
list_add_tail_rcu(&flowtable->list, &table->flowtables);
- table->use++;
return 0;
err5:
@@ -8098,6 +8371,9 @@ err2:
kfree(flowtable->name);
err1:
kfree(flowtable);
+flowtable_alloc:
+ nft_use_dec_restore(&table->use);
+
return err;
}
@@ -8711,6 +8987,12 @@ static const struct nfnl_callback nf_tables_cb[NFT_MSG_MAX] = {
.attr_count = NFTA_SET_ELEM_LIST_MAX,
.policy = nft_set_elem_list_policy,
},
+ [NFT_MSG_GETSETELEM_RESET] = {
+ .call = nf_tables_getsetelem,
+ .type = NFNL_CB_RCU,
+ .attr_count = NFTA_SET_ELEM_LIST_MAX,
+ .policy = nft_set_elem_list_policy,
+ },
[NFT_MSG_DELSETELEM] = {
.call = nf_tables_delsetelem,
.type = NFNL_CB_BATCH,
@@ -8939,7 +9221,7 @@ static void nf_tables_trans_destroy_work(struct work_struct *w)
synchronize_rcu();
list_for_each_entry_safe(trans, next, &head, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nft_commit_release(trans);
}
}
@@ -9005,7 +9287,7 @@ static int nf_tables_commit_chain_prepare(struct net *net, struct nft_chain *cha
continue;
}
- if (WARN_ON_ONCE(data + expr->ops->size > data_boundary))
+ if (WARN_ON_ONCE(data + size + expr->ops->size > data_boundary))
return -ENOMEM;
memcpy(data + size, expr, expr->ops->size);
@@ -9273,10 +9555,25 @@ static void nf_tables_commit_audit_log(struct list_head *adl, u32 generation)
}
}
+static void nft_set_commit_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->commit)
+ continue;
+
+ set->ops->commit(set);
+ }
+}
+
static int nf_tables_commit(struct net *net, struct sk_buff *skb)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
struct nft_chain *chain;
struct nft_table *table;
@@ -9289,6 +9586,27 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
return 0;
}
+ list_for_each_entry(trans, &nft_net->binding_list, binding_list) {
+ switch (trans->msg_type) {
+ case NFT_MSG_NEWSET:
+ if (!nft_trans_set_update(trans) &&
+ nft_set_is_anonymous(nft_trans_set(trans)) &&
+ !nft_trans_set_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound set\n");
+ return -EINVAL;
+ }
+ break;
+ case NFT_MSG_NEWCHAIN:
+ if (!nft_trans_chain_update(trans) &&
+ nft_chain_binding(nft_trans_chain(trans)) &&
+ !nft_trans_chain_bound(trans)) {
+ pr_warn_once("nftables ruleset with unbound chain\n");
+ return -EINVAL;
+ }
+ break;
+ }
+ }
+
/* 0. Validate ruleset, otherwise roll back for error reporting. */
if (nf_tables_validate(net) < 0)
return -EAGAIN;
@@ -9425,6 +9743,9 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
WRITE_ONCE(set->timeout, nft_trans_set_timeout(trans));
WRITE_ONCE(set->gc_int, nft_trans_set_gc_int(trans));
+
+ if (nft_trans_set_size(trans))
+ WRITE_ONCE(set->size, nft_trans_set_size(trans));
} else {
nft_clear(net, nft_trans_set(trans));
/* This avoids hitting -EBUSY when deleting the table
@@ -9432,7 +9753,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
*/
if (nft_set_is_anonymous(nft_trans_set(trans)) &&
!list_empty(&nft_trans_set(trans)->bindings))
- trans->ctx.table->use--;
+ nft_use_dec(&trans->ctx.table->use);
}
nf_tables_set_notify(&trans->ctx, nft_trans_set(trans),
NFT_MSG_NEWSET, GFP_KERNEL);
@@ -9451,6 +9772,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
nf_tables_setelem_notify(&trans->ctx, te->set,
&te->elem,
NFT_MSG_NEWSETELEM);
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_DELSETELEM:
@@ -9465,6 +9791,11 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
atomic_dec(&te->set->nelems);
te->set->ndeact--;
}
+ if (te->set->ops->commit &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_NEWOBJ:
if (nft_trans_obj_update(trans)) {
@@ -9527,6 +9858,8 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
}
}
+ nft_set_commit_update(&set_update_list);
+
nft_commit_notify(net, NETLINK_CB(skb).portid);
nf_tables_gen_notify(net, skb, NFT_MSG_NEWGEN);
nf_tables_commit_audit_log(&adl, nft_net->base_seq);
@@ -9586,10 +9919,25 @@ static void nf_tables_abort_release(struct nft_trans *trans)
kfree(trans);
}
+static void nft_set_abort_update(struct list_head *set_update_list)
+{
+ struct nft_set *set, *next;
+
+ list_for_each_entry_safe(set, next, set_update_list, pending_update) {
+ list_del_init(&set->pending_update);
+
+ if (!set->ops->abort)
+ continue;
+
+ set->ops->abort(set);
+ }
+}
+
static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
{
struct nftables_pernet *nft_net = nft_pernet(net);
struct nft_trans *trans, *next;
+ LIST_HEAD(set_update_list);
struct nft_trans_elem *te;
if (action == NFNL_ABORT_VALIDATE &&
@@ -9631,11 +9979,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
kfree(nft_trans_chain_name(trans));
nft_trans_destroy(trans);
} else {
- if (nft_chain_is_bound(trans->ctx.chain)) {
+ if (nft_trans_chain_bound(trans)) {
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_chain_del(trans->ctx.chain);
nf_tables_unregister_hook(trans->ctx.net,
trans->ctx.table,
@@ -9648,13 +9996,17 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_chain_hooks(trans),
&nft_trans_basechain(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, trans->ctx.chain);
}
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWRULE:
- trans->ctx.chain->use--;
+ if (nft_trans_rule_bound(trans)) {
+ nft_trans_destroy(trans);
+ break;
+ }
+ nft_use_dec_restore(&trans->ctx.chain->use);
list_del_rcu(&nft_trans_rule(trans)->list);
nft_rule_expr_deactivate(&trans->ctx,
nft_trans_rule(trans),
@@ -9664,7 +10016,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELRULE:
case NFT_MSG_DESTROYRULE:
- trans->ctx.chain->use++;
+ nft_use_inc_restore(&trans->ctx.chain->use);
nft_clear(trans->ctx.net, nft_trans_rule(trans));
nft_rule_expr_activate(&trans->ctx, nft_trans_rule(trans));
if (trans->ctx.chain->flags & NFT_CHAIN_HW_OFFLOAD)
@@ -9677,7 +10029,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_trans_destroy(trans);
break;
}
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
if (nft_trans_set_bound(trans)) {
nft_trans_destroy(trans);
break;
@@ -9686,8 +10038,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
break;
case NFT_MSG_DELSET:
case NFT_MSG_DESTROYSET:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_set(trans));
+ if (nft_trans_set(trans)->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_activate(&trans->ctx, nft_trans_set(trans));
+
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWSETELEM:
@@ -9699,6 +10054,12 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_setelem_remove(net, te->set, &te->elem);
if (!nft_setelem_is_catchall(te->set, &te->elem))
atomic_dec(&te->set->nelems);
+
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
break;
case NFT_MSG_DELSETELEM:
case NFT_MSG_DESTROYSETELEM:
@@ -9709,6 +10070,11 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
if (!nft_setelem_is_catchall(te->set, &te->elem))
te->set->ndeact--;
+ if (te->set->ops->abort &&
+ list_empty(&te->set->pending_update)) {
+ list_add_tail(&te->set->pending_update,
+ &set_update_list);
+ }
nft_trans_destroy(trans);
break;
case NFT_MSG_NEWOBJ:
@@ -9716,13 +10082,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_obj_destroy(&trans->ctx, nft_trans_obj_newobj(trans));
nft_trans_destroy(trans);
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
nft_obj_del(nft_trans_obj(trans));
}
break;
case NFT_MSG_DELOBJ:
case NFT_MSG_DESTROYOBJ:
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_obj(trans));
nft_trans_destroy(trans);
break;
@@ -9731,7 +10097,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable_hooks(trans));
} else {
- trans->ctx.table->use--;
+ nft_use_dec_restore(&trans->ctx.table->use);
list_del_rcu(&nft_trans_flowtable(trans)->list);
nft_unregister_flowtable_net_hooks(net,
&nft_trans_flowtable(trans)->hook_list);
@@ -9743,7 +10109,7 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
list_splice(&nft_trans_flowtable_hooks(trans),
&nft_trans_flowtable(trans)->hook_list);
} else {
- trans->ctx.table->use++;
+ nft_use_inc_restore(&trans->ctx.table->use);
nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
}
nft_trans_destroy(trans);
@@ -9751,11 +10117,13 @@ static int __nf_tables_abort(struct net *net, enum nfnl_abort_action action)
}
}
+ nft_set_abort_update(&set_update_list);
+
synchronize_rcu();
list_for_each_entry_safe_reverse(trans, next,
&nft_net->commit_list, list) {
- list_del(&trans->list);
+ nft_trans_list_del(trans);
nf_tables_abort_release(trans);
}
@@ -10174,7 +10542,8 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
genmask);
} else if (tb[NFTA_VERDICT_CHAIN_ID]) {
chain = nft_chain_lookup_byid(ctx->net, ctx->table,
- tb[NFTA_VERDICT_CHAIN_ID]);
+ tb[NFTA_VERDICT_CHAIN_ID],
+ genmask);
if (IS_ERR(chain))
return PTR_ERR(chain);
} else {
@@ -10190,8 +10559,9 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
if (desc->flags & NFT_DATA_DESC_SETELEM &&
chain->flags & NFT_CHAIN_BINDING)
return -EINVAL;
+ if (!nft_use_inc(&chain->use))
+ return -EMFILE;
- chain->use++;
data->verdict.chain = chain;
break;
}
@@ -10204,22 +10574,12 @@ static int nft_verdict_init(const struct nft_ctx *ctx, struct nft_data *data,
static void nft_verdict_uninit(const struct nft_data *data)
{
struct nft_chain *chain;
- struct nft_rule *rule;
switch (data->verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
chain = data->verdict.chain;
- chain->use--;
-
- if (!nft_chain_is_bound(chain))
- break;
-
- chain->table->use--;
- list_for_each_entry(rule, &chain->rules, list)
- chain->use--;
-
- nft_chain_del(chain);
+ nft_use_dec(&chain->use);
break;
}
}
@@ -10388,11 +10748,11 @@ int __nft_release_basechain(struct nft_ctx *ctx)
nf_tables_unregister_hook(ctx->net, ctx->chain->table, ctx->chain);
list_for_each_entry_safe(rule, nr, &ctx->chain->rules, list) {
list_del(&rule->list);
- ctx->chain->use--;
+ nft_use_dec(&ctx->chain->use);
nf_tables_rule_release(ctx, rule);
}
nft_chain_del(ctx->chain);
- ctx->table->use--;
+ nft_use_dec(&ctx->table->use);
nf_tables_chain_destroy(ctx);
return 0;
@@ -10442,29 +10802,32 @@ static void __nft_release_table(struct net *net, struct nft_table *table)
ctx.chain = chain;
list_for_each_entry_safe(rule, nr, &chain->rules, list) {
list_del(&rule->list);
- chain->use--;
+ nft_use_dec(&chain->use);
nf_tables_rule_release(&ctx, rule);
}
}
list_for_each_entry_safe(flowtable, nf, &table->flowtables, list) {
list_del(&flowtable->list);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_flowtable_destroy(flowtable);
}
list_for_each_entry_safe(set, ns, &table->sets, list) {
list_del(&set->list);
- table->use--;
+ nft_use_dec(&table->use);
+ if (set->flags & (NFT_SET_MAP | NFT_SET_OBJECT))
+ nft_map_deactivate(&ctx, set);
+
nft_set_destroy(&ctx, set);
}
list_for_each_entry_safe(obj, ne, &table->objects, list) {
nft_obj_del(obj);
- table->use--;
+ nft_use_dec(&table->use);
nft_obj_destroy(&ctx, obj);
}
list_for_each_entry_safe(chain, nc, &table->chains, list) {
ctx.chain = chain;
nft_chain_del(chain);
- table->use--;
+ nft_use_dec(&table->use);
nf_tables_chain_destroy(&ctx);
}
nf_tables_table_destroy(&ctx);
@@ -10538,6 +10901,7 @@ static int __net_init nf_tables_init_net(struct net *net)
INIT_LIST_HEAD(&nft_net->tables);
INIT_LIST_HEAD(&nft_net->commit_list);
+ INIT_LIST_HEAD(&nft_net->binding_list);
INIT_LIST_HEAD(&nft_net->module_list);
INIT_LIST_HEAD(&nft_net->notify_list);
mutex_init(&nft_net->commit_mutex);
diff --git a/net/netfilter/nfnetlink.c b/net/netfilter/nfnetlink.c
index ae7146475d17..c9fbe0f707b5 100644
--- a/net/netfilter/nfnetlink.c
+++ b/net/netfilter/nfnetlink.c
@@ -533,7 +533,8 @@ ack:
* processed, this avoids that the same error is
* reported several times when replaying the batch.
*/
- if (nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
+ if (err == -ENOMEM ||
+ nfnl_err_add(&err_list, nlh, err, &extack) < 0) {
/* We failed to enqueue an error, reset the
* list of errors and send OOM to userspace
* pointing to the batch header.
diff --git a/net/netfilter/nfnetlink_osf.c b/net/netfilter/nfnetlink_osf.c
index ee6840bd5933..8f1bfa6ccc2d 100644
--- a/net/netfilter/nfnetlink_osf.c
+++ b/net/netfilter/nfnetlink_osf.c
@@ -439,3 +439,4 @@ module_init(nfnl_osf_init);
module_exit(nfnl_osf_fini);
MODULE_LICENSE("GPL");
+MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);
diff --git a/net/netfilter/nfnetlink_queue.c b/net/netfilter/nfnetlink_queue.c
index e311462f6d98..556bc902af00 100644
--- a/net/netfilter/nfnetlink_queue.c
+++ b/net/netfilter/nfnetlink_queue.c
@@ -30,6 +30,7 @@
#include <linux/netfilter/nf_conntrack_common.h>
#include <linux/list.h>
#include <linux/cgroup-defs.h>
+#include <net/gso.h>
#include <net/sock.h>
#include <net/tcp_states.h>
#include <net/netfilter/nf_queue.h>
diff --git a/net/netfilter/nft_bitwise.c b/net/netfilter/nft_bitwise.c
index 84eae7cabc67..ca857afbf061 100644
--- a/net/netfilter/nft_bitwise.c
+++ b/net/netfilter/nft_bitwise.c
@@ -86,7 +86,7 @@ static const struct nla_policy nft_bitwise_policy[NFTA_BITWISE_MAX + 1] = {
[NFTA_BITWISE_LEN] = { .type = NLA_U32 },
[NFTA_BITWISE_MASK] = { .type = NLA_NESTED },
[NFTA_BITWISE_XOR] = { .type = NLA_NESTED },
- [NFTA_BITWISE_OP] = { .type = NLA_U32 },
+ [NFTA_BITWISE_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_BITWISE_DATA] = { .type = NLA_NESTED },
};
@@ -323,7 +323,7 @@ static bool nft_bitwise_reduce(struct nft_regs_track *track,
dreg = priv->dreg;
regcount = DIV_ROUND_UP(priv->len, NFT_REG32_SIZE);
for (i = 0; i < regcount; i++, dreg++)
- track->regs[priv->dreg].bitwise = expr;
+ track->regs[dreg].bitwise = expr;
return false;
}
diff --git a/net/netfilter/nft_byteorder.c b/net/netfilter/nft_byteorder.c
index b66647a5a171..e596d1a842f7 100644
--- a/net/netfilter/nft_byteorder.c
+++ b/net/netfilter/nft_byteorder.c
@@ -30,11 +30,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
const struct nft_byteorder *priv = nft_expr_priv(expr);
u32 *src = &regs->data[priv->sreg];
u32 *dst = &regs->data[priv->dreg];
- union { u32 u32; u16 u16; } *s, *d;
+ u16 *s16, *d16;
unsigned int i;
- s = (void *)src;
- d = (void *)dst;
+ s16 = (void *)src;
+ d16 = (void *)dst;
switch (priv->size) {
case 8: {
@@ -62,11 +62,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = ntohl((__force __be32)s[i].u32);
+ dst[i] = ntohl((__force __be32)src[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 4; i++)
- d[i].u32 = (__force __u32)htonl(s[i].u32);
+ dst[i] = (__force __u32)htonl(src[i]);
break;
}
break;
@@ -74,11 +74,11 @@ void nft_byteorder_eval(const struct nft_expr *expr,
switch (priv->op) {
case NFT_BYTEORDER_NTOH:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = ntohs((__force __be16)s[i].u16);
+ d16[i] = ntohs((__force __be16)s16[i]);
break;
case NFT_BYTEORDER_HTON:
for (i = 0; i < priv->len / 2; i++)
- d[i].u16 = (__force __u16)htons(s[i].u16);
+ d16[i] = (__force __u16)htons(s16[i]);
break;
}
break;
@@ -88,9 +88,9 @@ void nft_byteorder_eval(const struct nft_expr *expr,
static const struct nla_policy nft_byteorder_policy[NFTA_BYTEORDER_MAX + 1] = {
[NFTA_BYTEORDER_SREG] = { .type = NLA_U32 },
[NFTA_BYTEORDER_DREG] = { .type = NLA_U32 },
- [NFTA_BYTEORDER_OP] = { .type = NLA_U32 },
- [NFTA_BYTEORDER_LEN] = { .type = NLA_U32 },
- [NFTA_BYTEORDER_SIZE] = { .type = NLA_U32 },
+ [NFTA_BYTEORDER_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
+ [NFTA_BYTEORDER_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
+ [NFTA_BYTEORDER_SIZE] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_byteorder_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index b9c84499438b..38958e067aa8 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -332,7 +332,7 @@ static void nft_ct_set_eval(const struct nft_expr *expr,
static const struct nla_policy nft_ct_policy[NFTA_CT_MAX + 1] = {
[NFTA_CT_DREG] = { .type = NLA_U32 },
- [NFTA_CT_KEY] = { .type = NLA_U32 },
+ [NFTA_CT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_CT_DIRECTION] = { .type = NLA_U8 },
[NFTA_CT_SREG] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_dynset.c b/net/netfilter/nft_dynset.c
index bd19c7aec92e..4fb34d76dbea 100644
--- a/net/netfilter/nft_dynset.c
+++ b/net/netfilter/nft_dynset.c
@@ -148,7 +148,7 @@ static const struct nla_policy nft_dynset_policy[NFTA_DYNSET_MAX + 1] = {
[NFTA_DYNSET_SET_NAME] = { .type = NLA_STRING,
.len = NFT_SET_MAXNAMELEN - 1 },
[NFTA_DYNSET_SET_ID] = { .type = NLA_U32 },
- [NFTA_DYNSET_OP] = { .type = NLA_U32 },
+ [NFTA_DYNSET_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_DYNSET_SREG_KEY] = { .type = NLA_U32 },
[NFTA_DYNSET_SREG_DATA] = { .type = NLA_U32 },
[NFTA_DYNSET_TIMEOUT] = { .type = NLA_U64 },
diff --git a/net/netfilter/nft_exthdr.c b/net/netfilter/nft_exthdr.c
index a54a7f772cec..7f856ceb3a66 100644
--- a/net/netfilter/nft_exthdr.c
+++ b/net/netfilter/nft_exthdr.c
@@ -10,6 +10,7 @@
#include <linux/netlink.h>
#include <linux/netfilter.h>
#include <linux/netfilter/nf_tables.h>
+#include <linux/dccp.h>
#include <linux/sctp.h>
#include <net/netfilter/nf_tables_core.h>
#include <net/netfilter/nf_tables.h>
@@ -406,13 +407,89 @@ err:
regs->verdict.code = NFT_BREAK;
}
+static void nft_exthdr_dccp_eval(const struct nft_expr *expr,
+ struct nft_regs *regs,
+ const struct nft_pktinfo *pkt)
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ unsigned int thoff, dataoff, optoff, optlen, i;
+ u32 *dest = &regs->data[priv->dreg];
+ const struct dccp_hdr *dh;
+ struct dccp_hdr _dh;
+
+ if (pkt->tprot != IPPROTO_DCCP || pkt->fragoff)
+ goto err;
+
+ thoff = nft_thoff(pkt);
+
+ dh = skb_header_pointer(pkt->skb, thoff, sizeof(_dh), &_dh);
+ if (!dh)
+ goto err;
+
+ dataoff = dh->dccph_doff * sizeof(u32);
+ optoff = __dccp_hdr_len(dh);
+ if (dataoff <= optoff)
+ goto err;
+
+ optlen = dataoff - optoff;
+
+ for (i = 0; i < optlen; ) {
+ /* Options 0 (DCCPO_PADDING) - 31 (DCCPO_MAX_RESERVED) are 1B in
+ * the length; the remaining options are at least 2B long. In
+ * all cases, the first byte contains the option type. In
+ * multi-byte options, the second byte contains the option
+ * length, which must be at least two: 1 for the type plus 1 for
+ * the length plus 0-253 for any following option data. We
+ * aren't interested in the option data, only the type and the
+ * length, so we don't need to read more than two bytes at a
+ * time.
+ */
+ unsigned int buflen = optlen - i;
+ u8 buf[2], *bufp;
+ u8 type, len;
+
+ if (buflen > sizeof(buf))
+ buflen = sizeof(buf);
+
+ bufp = skb_header_pointer(pkt->skb, thoff + optoff + i, buflen,
+ &buf);
+ if (!bufp)
+ goto err;
+
+ type = bufp[0];
+
+ if (type == priv->type) {
+ *dest = 1;
+ return;
+ }
+
+ if (type <= DCCPO_MAX_RESERVED) {
+ i++;
+ continue;
+ }
+
+ if (buflen < 2)
+ goto err;
+
+ len = bufp[1];
+
+ if (len < 2)
+ goto err;
+
+ i += len;
+ }
+
+err:
+ *dest = 0;
+}
+
static const struct nla_policy nft_exthdr_policy[NFTA_EXTHDR_MAX + 1] = {
[NFTA_EXTHDR_DREG] = { .type = NLA_U32 },
[NFTA_EXTHDR_TYPE] = { .type = NLA_U8 },
[NFTA_EXTHDR_OFFSET] = { .type = NLA_U32 },
- [NFTA_EXTHDR_LEN] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_EXTHDR_FLAGS] = { .type = NLA_U32 },
- [NFTA_EXTHDR_OP] = { .type = NLA_U32 },
+ [NFTA_EXTHDR_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_EXTHDR_SREG] = { .type = NLA_U32 },
};
@@ -557,6 +634,22 @@ static int nft_exthdr_ipv4_init(const struct nft_ctx *ctx,
return 0;
}
+static int nft_exthdr_dccp_init(const struct nft_ctx *ctx,
+ const struct nft_expr *expr,
+ const struct nlattr * const tb[])
+{
+ struct nft_exthdr *priv = nft_expr_priv(expr);
+ int err = nft_exthdr_init(ctx, expr, tb);
+
+ if (err < 0)
+ return err;
+
+ if (!(priv->flags & NFT_EXTHDR_F_PRESENT))
+ return -EOPNOTSUPP;
+
+ return 0;
+}
+
static int nft_exthdr_dump_common(struct sk_buff *skb, const struct nft_exthdr *priv)
{
if (nla_put_u8(skb, NFTA_EXTHDR_TYPE, priv->type))
@@ -686,6 +779,15 @@ static const struct nft_expr_ops nft_exthdr_sctp_ops = {
.reduce = nft_exthdr_reduce,
};
+static const struct nft_expr_ops nft_exthdr_dccp_ops = {
+ .type = &nft_exthdr_type,
+ .size = NFT_EXPR_SIZE(sizeof(struct nft_exthdr)),
+ .eval = nft_exthdr_dccp_eval,
+ .init = nft_exthdr_dccp_init,
+ .dump = nft_exthdr_dump,
+ .reduce = nft_exthdr_reduce,
+};
+
static const struct nft_expr_ops *
nft_exthdr_select_ops(const struct nft_ctx *ctx,
const struct nlattr * const tb[])
@@ -720,6 +822,10 @@ nft_exthdr_select_ops(const struct nft_ctx *ctx,
if (tb[NFTA_EXTHDR_DREG])
return &nft_exthdr_sctp_ops;
break;
+ case NFT_EXTHDR_OP_DCCP:
+ if (tb[NFTA_EXTHDR_DREG])
+ return &nft_exthdr_dccp_ops;
+ break;
}
return ERR_PTR(-EOPNOTSUPP);
diff --git a/net/netfilter/nft_flow_offload.c b/net/netfilter/nft_flow_offload.c
index e860d8fe0e5e..ab3362c483b4 100644
--- a/net/netfilter/nft_flow_offload.c
+++ b/net/netfilter/nft_flow_offload.c
@@ -250,9 +250,14 @@ static int nft_flow_route(const struct nft_pktinfo *pkt,
break;
}
+ if (!dst_hold_safe(this_dst))
+ return -ENOENT;
+
nf_route(nft_net(pkt), &other_dst, &fl, false, nft_pf(pkt));
- if (!other_dst)
+ if (!other_dst) {
+ dst_release(this_dst);
return -ENOENT;
+ }
nft_default_forward_path(route, this_dst, dir);
nft_default_forward_path(route, other_dst, !dir);
@@ -349,8 +354,7 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (!flow)
goto err_flow_alloc;
- if (flow_offload_route_init(flow, &route) < 0)
- goto err_flow_add;
+ flow_offload_route_init(flow, &route);
if (tcph) {
ct->proto.tcp.seen[0].flags |= IP_CT_TCP_FLAG_BE_LIBERAL;
@@ -361,12 +365,12 @@ static void nft_flow_offload_eval(const struct nft_expr *expr,
if (ret < 0)
goto err_flow_add;
- dst_release(route.tuple[!dir].dst);
return;
err_flow_add:
flow_offload_free(flow);
err_flow_alloc:
+ dst_release(route.tuple[dir].dst);
dst_release(route.tuple[!dir].dst);
err_flow_route:
clear_bit(IPS_OFFLOAD_BIT, &ct->status);
@@ -404,8 +408,10 @@ static int nft_flow_offload_init(const struct nft_ctx *ctx,
if (IS_ERR(flowtable))
return PTR_ERR(flowtable);
+ if (!nft_use_inc(&flowtable->use))
+ return -EMFILE;
+
priv->flowtable = flowtable;
- flowtable->use++;
return nf_ct_netns_get(ctx->net, ctx->family);
}
@@ -424,7 +430,7 @@ static void nft_flow_offload_activate(const struct nft_ctx *ctx,
{
struct nft_flow_offload *priv = nft_expr_priv(expr);
- priv->flowtable->use++;
+ nft_use_inc_restore(&priv->flowtable->use);
}
static void nft_flow_offload_destroy(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_fwd_netdev.c b/net/netfilter/nft_fwd_netdev.c
index 7b9d4d1bd17c..a5268e6dd32f 100644
--- a/net/netfilter/nft_fwd_netdev.c
+++ b/net/netfilter/nft_fwd_netdev.c
@@ -40,7 +40,7 @@ static void nft_fwd_netdev_eval(const struct nft_expr *expr,
static const struct nla_policy nft_fwd_netdev_policy[NFTA_FWD_MAX + 1] = {
[NFTA_FWD_SREG_DEV] = { .type = NLA_U32 },
[NFTA_FWD_SREG_ADDR] = { .type = NLA_U32 },
- [NFTA_FWD_NFPROTO] = { .type = NLA_U32 },
+ [NFTA_FWD_NFPROTO] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_fwd_netdev_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_hash.c b/net/netfilter/nft_hash.c
index ee8d487b69c0..92d47e469204 100644
--- a/net/netfilter/nft_hash.c
+++ b/net/netfilter/nft_hash.c
@@ -59,7 +59,7 @@ static void nft_symhash_eval(const struct nft_expr *expr,
static const struct nla_policy nft_hash_policy[NFTA_HASH_MAX + 1] = {
[NFTA_HASH_SREG] = { .type = NLA_U32 },
[NFTA_HASH_DREG] = { .type = NLA_U32 },
- [NFTA_HASH_LEN] = { .type = NLA_U32 },
+ [NFTA_HASH_LEN] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_HASH_MODULUS] = { .type = NLA_U32 },
[NFTA_HASH_SEED] = { .type = NLA_U32 },
[NFTA_HASH_OFFSET] = { .type = NLA_U32 },
diff --git a/net/netfilter/nft_immediate.c b/net/netfilter/nft_immediate.c
index c9d2f7c29f53..407d7197f75b 100644
--- a/net/netfilter/nft_immediate.c
+++ b/net/netfilter/nft_immediate.c
@@ -76,11 +76,9 @@ static int nft_immediate_init(const struct nft_ctx *ctx,
switch (priv->data.verdict.code) {
case NFT_JUMP:
case NFT_GOTO:
- if (nft_chain_is_bound(chain)) {
- err = -EBUSY;
- goto err1;
- }
- chain->bound = true;
+ err = nf_tables_bind_chain(ctx, chain);
+ if (err < 0)
+ return err;
break;
default:
break;
@@ -98,6 +96,31 @@ static void nft_immediate_activate(const struct nft_ctx *ctx,
const struct nft_expr *expr)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_activate(&chain_ctx, rule);
+
+ nft_clear(ctx->net, chain);
+ break;
+ default:
+ break;
+ }
+ }
return nft_data_hold(&priv->data, nft_dreg_to_type(priv->dreg));
}
@@ -107,6 +130,43 @@ static void nft_immediate_deactivate(const struct nft_ctx *ctx,
enum nft_trans_phase phase)
{
const struct nft_immediate_expr *priv = nft_expr_priv(expr);
+ const struct nft_data *data = &priv->data;
+ struct nft_ctx chain_ctx;
+ struct nft_chain *chain;
+ struct nft_rule *rule;
+
+ if (priv->dreg == NFT_REG_VERDICT) {
+ switch (data->verdict.code) {
+ case NFT_JUMP:
+ case NFT_GOTO:
+ chain = data->verdict.chain;
+ if (!nft_chain_binding(chain))
+ break;
+
+ chain_ctx = *ctx;
+ chain_ctx.chain = chain;
+
+ list_for_each_entry(rule, &chain->rules, list)
+ nft_rule_expr_deactivate(&chain_ctx, rule, phase);
+
+ switch (phase) {
+ case NFT_TRANS_PREPARE_ERROR:
+ nf_tables_unbind_chain(ctx, chain);
+ fallthrough;
+ case NFT_TRANS_PREPARE:
+ nft_deactivate_next(ctx->net, chain);
+ break;
+ default:
+ nft_chain_del(chain);
+ chain->bound = false;
+ nft_use_dec(&chain->table->use);
+ break;
+ }
+ break;
+ default:
+ break;
+ }
+ }
if (phase == NFT_TRANS_COMMIT)
return;
@@ -131,15 +191,27 @@ static void nft_immediate_destroy(const struct nft_ctx *ctx,
case NFT_GOTO:
chain = data->verdict.chain;
- if (!nft_chain_is_bound(chain))
+ if (!nft_chain_binding(chain))
+ break;
+
+ /* Rule construction failed, but chain is already bound:
+ * let the transaction records release this chain and its rules.
+ */
+ if (chain->bound) {
+ nft_use_dec(&chain->use);
break;
+ }
+ /* Rule has been deleted, release chain and its rules. */
chain_ctx = *ctx;
chain_ctx.chain = chain;
- list_for_each_entry_safe(rule, n, &chain->rules, list)
- nf_tables_rule_release(&chain_ctx, rule);
-
+ nft_use_dec(&chain->use);
+ list_for_each_entry_safe(rule, n, &chain->rules, list) {
+ nft_use_dec(&chain->use);
+ list_del(&rule->list);
+ nf_tables_rule_destroy(&chain_ctx, rule);
+ }
nf_tables_chain_destroy(&chain_ctx);
break;
default:
diff --git a/net/netfilter/nft_lookup.c b/net/netfilter/nft_lookup.c
index 03ef4fdaa460..29ac48cdd6db 100644
--- a/net/netfilter/nft_lookup.c
+++ b/net/netfilter/nft_lookup.c
@@ -19,6 +19,7 @@ struct nft_lookup {
struct nft_set *set;
u8 sreg;
u8 dreg;
+ bool dreg_set;
bool invert;
struct nft_set_binding binding;
};
@@ -75,7 +76,7 @@ void nft_lookup_eval(const struct nft_expr *expr,
}
if (ext) {
- if (set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
nft_data_copy(&regs->data[priv->dreg],
nft_set_ext_data(ext), set->dlen);
@@ -122,11 +123,8 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
if (flags & ~NFT_LOOKUP_F_INV)
return -EINVAL;
- if (flags & NFT_LOOKUP_F_INV) {
- if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ if (flags & NFT_LOOKUP_F_INV)
priv->invert = true;
- }
}
if (tb[NFTA_LOOKUP_DREG] != NULL) {
@@ -140,8 +138,17 @@ static int nft_lookup_init(const struct nft_ctx *ctx,
set->dlen);
if (err < 0)
return err;
- } else if (set->flags & NFT_SET_MAP)
- return -EINVAL;
+ priv->dreg_set = true;
+ } else if (set->flags & NFT_SET_MAP) {
+ /* Map given, but user asks for lookup only (i.e. to
+ * ignore value assoicated with key).
+ *
+ * This makes no sense for anonymous maps since they are
+ * scoped to the rule, but for named sets this can be useful.
+ */
+ if (set->flags & NFT_SET_ANONYMOUS)
+ return -EINVAL;
+ }
priv->binding.flags = set->flags & NFT_SET_MAP;
@@ -188,7 +195,7 @@ static int nft_lookup_dump(struct sk_buff *skb,
goto nla_put_failure;
if (nft_dump_register(skb, NFTA_LOOKUP_SREG, priv->sreg))
goto nla_put_failure;
- if (priv->set->flags & NFT_SET_MAP)
+ if (priv->dreg_set)
if (nft_dump_register(skb, NFTA_LOOKUP_DREG, priv->dreg))
goto nla_put_failure;
if (nla_put_be32(skb, NFTA_LOOKUP_FLAGS, htonl(flags)))
diff --git a/net/netfilter/nft_meta.c b/net/netfilter/nft_meta.c
index e384e0de7a54..8fdc7318c03c 100644
--- a/net/netfilter/nft_meta.c
+++ b/net/netfilter/nft_meta.c
@@ -458,7 +458,7 @@ EXPORT_SYMBOL_GPL(nft_meta_set_eval);
const struct nla_policy nft_meta_policy[NFTA_META_MAX + 1] = {
[NFTA_META_DREG] = { .type = NLA_U32 },
- [NFTA_META_KEY] = { .type = NLA_U32 },
+ [NFTA_META_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_META_SREG] = { .type = NLA_U32 },
};
EXPORT_SYMBOL_GPL(nft_meta_policy);
diff --git a/net/netfilter/nft_objref.c b/net/netfilter/nft_objref.c
index a48dd5b5d45b..509011b1ef59 100644
--- a/net/netfilter/nft_objref.c
+++ b/net/netfilter/nft_objref.c
@@ -41,8 +41,10 @@ static int nft_objref_init(const struct nft_ctx *ctx,
if (IS_ERR(obj))
return -ENOENT;
+ if (!nft_use_inc(&obj->use))
+ return -EMFILE;
+
nft_objref_priv(expr) = obj;
- obj->use++;
return 0;
}
@@ -72,7 +74,7 @@ static void nft_objref_deactivate(const struct nft_ctx *ctx,
if (phase == NFT_TRANS_COMMIT)
return;
- obj->use--;
+ nft_use_dec(&obj->use);
}
static void nft_objref_activate(const struct nft_ctx *ctx,
@@ -80,7 +82,7 @@ static void nft_objref_activate(const struct nft_ctx *ctx,
{
struct nft_object *obj = nft_objref_priv(expr);
- obj->use++;
+ nft_use_inc_restore(&obj->use);
}
static const struct nft_expr_ops nft_objref_ops = {
diff --git a/net/netfilter/nft_payload.c b/net/netfilter/nft_payload.c
index 3a3c7746e88f..8cb800989947 100644
--- a/net/netfilter/nft_payload.c
+++ b/net/netfilter/nft_payload.c
@@ -171,7 +171,8 @@ void nft_payload_eval(const struct nft_expr *expr,
if (!skb_mac_header_was_set(skb))
goto err;
- if (skb_vlan_tag_present(skb)) {
+ if (skb_vlan_tag_present(skb) &&
+ priv->offset >= offsetof(struct ethhdr, h_proto)) {
if (!nft_payload_copy_vlan(dest, skb,
priv->offset, priv->len))
goto err;
diff --git a/net/netfilter/nft_range.c b/net/netfilter/nft_range.c
index 0566d6aaf1e5..51ae64cd268f 100644
--- a/net/netfilter/nft_range.c
+++ b/net/netfilter/nft_range.c
@@ -42,7 +42,7 @@ void nft_range_eval(const struct nft_expr *expr,
static const struct nla_policy nft_range_policy[NFTA_RANGE_MAX + 1] = {
[NFTA_RANGE_SREG] = { .type = NLA_U32 },
- [NFTA_RANGE_OP] = { .type = NLA_U32 },
+ [NFTA_RANGE_OP] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_RANGE_FROM_DATA] = { .type = NLA_NESTED },
[NFTA_RANGE_TO_DATA] = { .type = NLA_NESTED },
};
diff --git a/net/netfilter/nft_reject.c b/net/netfilter/nft_reject.c
index f2addc844dd2..ed2e668474d6 100644
--- a/net/netfilter/nft_reject.c
+++ b/net/netfilter/nft_reject.c
@@ -18,7 +18,7 @@
#include <linux/icmpv6.h>
const struct nla_policy nft_reject_policy[NFTA_REJECT_MAX + 1] = {
- [NFTA_REJECT_TYPE] = { .type = NLA_U32 },
+ [NFTA_REJECT_TYPE] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_REJECT_ICMP_CODE] = { .type = NLA_U8 },
};
EXPORT_SYMBOL_GPL(nft_reject_policy);
diff --git a/net/netfilter/nft_rt.c b/net/netfilter/nft_rt.c
index 5990fdd7b3cc..35a2c28caa60 100644
--- a/net/netfilter/nft_rt.c
+++ b/net/netfilter/nft_rt.c
@@ -104,7 +104,7 @@ err:
static const struct nla_policy nft_rt_policy[NFTA_RT_MAX + 1] = {
[NFTA_RT_DREG] = { .type = NLA_U32 },
- [NFTA_RT_KEY] = { .type = NLA_U32 },
+ [NFTA_RT_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_rt_get_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_set_bitmap.c b/net/netfilter/nft_set_bitmap.c
index 96081ac8d2b4..1e5e7a181e0b 100644
--- a/net/netfilter/nft_set_bitmap.c
+++ b/net/netfilter/nft_set_bitmap.c
@@ -271,13 +271,14 @@ static int nft_bitmap_init(const struct nft_set *set,
return 0;
}
-static void nft_bitmap_destroy(const struct nft_set *set)
+static void nft_bitmap_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_bitmap *priv = nft_set_priv(set);
struct nft_bitmap_elem *be, *n;
list_for_each_entry_safe(be, n, &priv->list, head)
- nft_set_elem_destroy(set, be, true);
+ nf_tables_set_elem_destroy(ctx, set, be);
}
static bool nft_bitmap_estimate(const struct nft_set_desc *desc, u32 features,
diff --git a/net/netfilter/nft_set_hash.c b/net/netfilter/nft_set_hash.c
index 76de6c8d9865..0b73cb0e752f 100644
--- a/net/netfilter/nft_set_hash.c
+++ b/net/netfilter/nft_set_hash.c
@@ -400,19 +400,31 @@ static int nft_rhash_init(const struct nft_set *set,
return 0;
}
+struct nft_rhash_ctx {
+ const struct nft_ctx ctx;
+ const struct nft_set *set;
+};
+
static void nft_rhash_elem_destroy(void *ptr, void *arg)
{
- nft_set_elem_destroy(arg, ptr, true);
+ struct nft_rhash_ctx *rhash_ctx = arg;
+
+ nf_tables_set_elem_destroy(&rhash_ctx->ctx, rhash_ctx->set, ptr);
}
-static void nft_rhash_destroy(const struct nft_set *set)
+static void nft_rhash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rhash *priv = nft_set_priv(set);
+ struct nft_rhash_ctx rhash_ctx = {
+ .ctx = *ctx,
+ .set = set,
+ };
cancel_delayed_work_sync(&priv->gc_work);
rcu_barrier();
rhashtable_free_and_destroy(&priv->ht, nft_rhash_elem_destroy,
- (void *)set);
+ (void *)&rhash_ctx);
}
/* Number of buckets is stored in u32, so cap our result to 1U<<31 */
@@ -643,7 +655,8 @@ static int nft_hash_init(const struct nft_set *set,
return 0;
}
-static void nft_hash_destroy(const struct nft_set *set)
+static void nft_hash_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_hash *priv = nft_set_priv(set);
struct nft_hash_elem *he;
@@ -653,7 +666,7 @@ static void nft_hash_destroy(const struct nft_set *set)
for (i = 0; i < priv->buckets; i++) {
hlist_for_each_entry_safe(he, next, &priv->table[i], node) {
hlist_del_rcu(&he->node);
- nft_set_elem_destroy(set, he, true);
+ nf_tables_set_elem_destroy(ctx, set, he);
}
}
}
diff --git a/net/netfilter/nft_set_pipapo.c b/net/netfilter/nft_set_pipapo.c
index 06d46d182634..db526cb7a485 100644
--- a/net/netfilter/nft_set_pipapo.c
+++ b/net/netfilter/nft_set_pipapo.c
@@ -1274,8 +1274,7 @@ static struct nft_pipapo_match *pipapo_clone(struct nft_pipapo_match *old)
struct nft_pipapo_match *new;
int i;
- new = kmalloc(sizeof(*new) + sizeof(*dst) * old->field_count,
- GFP_KERNEL);
+ new = kmalloc(struct_size(new, f, old->field_count), GFP_KERNEL);
if (!new)
return ERR_PTR(-ENOMEM);
@@ -1600,17 +1599,10 @@ static void pipapo_free_fields(struct nft_pipapo_match *m)
}
}
-/**
- * pipapo_reclaim_match - RCU callback to free fields from old matching data
- * @rcu: RCU head
- */
-static void pipapo_reclaim_match(struct rcu_head *rcu)
+static void pipapo_free_match(struct nft_pipapo_match *m)
{
- struct nft_pipapo_match *m;
int i;
- m = container_of(rcu, struct nft_pipapo_match, rcu);
-
for_each_possible_cpu(i)
kfree(*per_cpu_ptr(m->scratch, i));
@@ -1625,7 +1617,19 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
}
/**
- * pipapo_commit() - Replace lookup data with current working copy
+ * pipapo_reclaim_match - RCU callback to free fields from old matching data
+ * @rcu: RCU head
+ */
+static void pipapo_reclaim_match(struct rcu_head *rcu)
+{
+ struct nft_pipapo_match *m;
+
+ m = container_of(rcu, struct nft_pipapo_match, rcu);
+ pipapo_free_match(m);
+}
+
+/**
+ * nft_pipapo_commit() - Replace lookup data with current working copy
* @set: nftables API set representation
*
* While at it, check if we should perform garbage collection on the working
@@ -1635,7 +1639,7 @@ static void pipapo_reclaim_match(struct rcu_head *rcu)
* We also need to create a new working copy for subsequent insertions and
* deletions.
*/
-static void pipapo_commit(const struct nft_set *set)
+static void nft_pipapo_commit(const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *new_clone, *old;
@@ -1660,6 +1664,26 @@ static void pipapo_commit(const struct nft_set *set)
priv->clone = new_clone;
}
+static void nft_pipapo_abort(const struct nft_set *set)
+{
+ struct nft_pipapo *priv = nft_set_priv(set);
+ struct nft_pipapo_match *new_clone, *m;
+
+ if (!priv->dirty)
+ return;
+
+ m = rcu_dereference(priv->match);
+
+ new_clone = pipapo_clone(m);
+ if (IS_ERR(new_clone))
+ return;
+
+ priv->dirty = false;
+
+ pipapo_free_match(priv->clone);
+ priv->clone = new_clone;
+}
+
/**
* nft_pipapo_activate() - Mark element reference as active given key, commit
* @net: Network namespace
@@ -1667,8 +1691,7 @@ static void pipapo_commit(const struct nft_set *set)
* @elem: nftables API element representation containing key data
*
* On insertion, elements are added to a copy of the matching data currently
- * in use for lookups, and not directly inserted into current lookup data, so
- * we'll take care of that by calling pipapo_commit() here. Both
+ * in use for lookups, and not directly inserted into current lookup data. Both
* nft_pipapo_insert() and nft_pipapo_activate() are called once for each
* element, hence we can't purpose either one as a real commit operation.
*/
@@ -1684,8 +1707,6 @@ static void nft_pipapo_activate(const struct net *net,
nft_set_elem_change_active(net, set, &e->ext);
nft_set_elem_clear_busy(&e->ext);
-
- pipapo_commit(set);
}
/**
@@ -1931,7 +1952,6 @@ static void nft_pipapo_remove(const struct net *net, const struct nft_set *set,
if (i == m->field_count) {
priv->dirty = true;
pipapo_drop(m, rulemap);
- pipapo_commit(set);
return;
}
@@ -1953,12 +1973,16 @@ static void nft_pipapo_walk(const struct nft_ctx *ctx, struct nft_set *set,
struct nft_set_iter *iter)
{
struct nft_pipapo *priv = nft_set_priv(set);
+ struct net *net = read_pnet(&set->net);
struct nft_pipapo_match *m;
struct nft_pipapo_field *f;
int i, r;
rcu_read_lock();
- m = rcu_dereference(priv->match);
+ if (iter->genmask == nft_genmask_cur(net))
+ m = rcu_dereference(priv->match);
+ else
+ m = priv->clone;
if (unlikely(!m))
goto out;
@@ -2059,8 +2083,7 @@ static int nft_pipapo_init(const struct nft_set *set,
if (field_count > NFT_PIPAPO_MAX_FIELDS)
return -EINVAL;
- m = kmalloc(sizeof(*priv->match) + sizeof(*f) * field_count,
- GFP_KERNEL);
+ m = kmalloc(struct_size(m, f, field_count), GFP_KERNEL);
if (!m)
return -ENOMEM;
@@ -2127,10 +2150,12 @@ out_scratch:
/**
* nft_set_pipapo_match_destroy() - Destroy elements from key mapping array
+ * @ctx: context
* @set: nftables API set representation
* @m: matching data pointing to key mapping array
*/
-static void nft_set_pipapo_match_destroy(const struct nft_set *set,
+static void nft_set_pipapo_match_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set,
struct nft_pipapo_match *m)
{
struct nft_pipapo_field *f;
@@ -2147,15 +2172,17 @@ static void nft_set_pipapo_match_destroy(const struct nft_set *set,
e = f->mt[r].e;
- nft_set_elem_destroy(set, e, true);
+ nf_tables_set_elem_destroy(ctx, set, e);
}
}
/**
* nft_pipapo_destroy() - Free private data for set and all committed elements
+ * @ctx: context
* @set: nftables API set representation
*/
-static void nft_pipapo_destroy(const struct nft_set *set)
+static void nft_pipapo_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_pipapo *priv = nft_set_priv(set);
struct nft_pipapo_match *m;
@@ -2165,7 +2192,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
if (m) {
rcu_barrier();
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(m->scratch_aligned);
@@ -2182,7 +2209,7 @@ static void nft_pipapo_destroy(const struct nft_set *set)
m = priv->clone;
if (priv->dirty)
- nft_set_pipapo_match_destroy(set, m);
+ nft_set_pipapo_match_destroy(ctx, set, m);
#ifdef NFT_PIPAPO_ALIGN
free_percpu(priv->clone->scratch_aligned);
@@ -2230,6 +2257,8 @@ const struct nft_set_type nft_set_pipapo_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
@@ -2252,6 +2281,8 @@ const struct nft_set_type nft_set_pipapo_avx2_type = {
.init = nft_pipapo_init,
.destroy = nft_pipapo_destroy,
.gc_init = nft_pipapo_gc_init,
+ .commit = nft_pipapo_commit,
+ .abort = nft_pipapo_abort,
.elemsize = offsetof(struct nft_pipapo_elem, ext),
},
};
diff --git a/net/netfilter/nft_set_rbtree.c b/net/netfilter/nft_set_rbtree.c
index 2f114aa10f1a..5c05c9b990fb 100644
--- a/net/netfilter/nft_set_rbtree.c
+++ b/net/netfilter/nft_set_rbtree.c
@@ -664,7 +664,8 @@ static int nft_rbtree_init(const struct nft_set *set,
return 0;
}
-static void nft_rbtree_destroy(const struct nft_set *set)
+static void nft_rbtree_destroy(const struct nft_ctx *ctx,
+ const struct nft_set *set)
{
struct nft_rbtree *priv = nft_set_priv(set);
struct nft_rbtree_elem *rbe;
@@ -675,7 +676,7 @@ static void nft_rbtree_destroy(const struct nft_set *set)
while ((node = priv->root.rb_node) != NULL) {
rb_erase(node, &priv->root);
rbe = rb_entry(node, struct nft_rbtree_elem, node);
- nft_set_elem_destroy(set, rbe, true);
+ nf_tables_set_elem_destroy(ctx, set, rbe);
}
}
diff --git a/net/netfilter/nft_socket.c b/net/netfilter/nft_socket.c
index 85f8df87efda..84def74698b7 100644
--- a/net/netfilter/nft_socket.c
+++ b/net/netfilter/nft_socket.c
@@ -138,9 +138,9 @@ static void nft_socket_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_socket_policy[NFTA_SOCKET_MAX + 1] = {
- [NFTA_SOCKET_KEY] = { .type = NLA_U32 },
+ [NFTA_SOCKET_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_SOCKET_DREG] = { .type = NLA_U32 },
- [NFTA_SOCKET_LEVEL] = { .type = NLA_U32 },
+ [NFTA_SOCKET_LEVEL] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_socket_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_tproxy.c b/net/netfilter/nft_tproxy.c
index ea83f661417e..ae15cd693f0e 100644
--- a/net/netfilter/nft_tproxy.c
+++ b/net/netfilter/nft_tproxy.c
@@ -183,7 +183,7 @@ static void nft_tproxy_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_tproxy_policy[NFTA_TPROXY_MAX + 1] = {
- [NFTA_TPROXY_FAMILY] = { .type = NLA_U32 },
+ [NFTA_TPROXY_FAMILY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TPROXY_REG_ADDR] = { .type = NLA_U32 },
[NFTA_TPROXY_REG_PORT] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/nft_tunnel.c b/net/netfilter/nft_tunnel.c
index b059aa541798..9f21953c7433 100644
--- a/net/netfilter/nft_tunnel.c
+++ b/net/netfilter/nft_tunnel.c
@@ -66,9 +66,9 @@ static void nft_tunnel_get_eval(const struct nft_expr *expr,
}
static const struct nla_policy nft_tunnel_policy[NFTA_TUNNEL_MAX + 1] = {
- [NFTA_TUNNEL_KEY] = { .type = NLA_U32 },
+ [NFTA_TUNNEL_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_TUNNEL_DREG] = { .type = NLA_U32 },
- [NFTA_TUNNEL_MODE] = { .type = NLA_U32 },
+ [NFTA_TUNNEL_MODE] = NLA_POLICY_MAX(NLA_BE32, 255),
};
static int nft_tunnel_get_init(const struct nft_ctx *ctx,
diff --git a/net/netfilter/nft_xfrm.c b/net/netfilter/nft_xfrm.c
index c88fd078a9ae..452f8587adda 100644
--- a/net/netfilter/nft_xfrm.c
+++ b/net/netfilter/nft_xfrm.c
@@ -16,9 +16,9 @@
#include <net/xfrm.h>
static const struct nla_policy nft_xfrm_policy[NFTA_XFRM_MAX + 1] = {
- [NFTA_XFRM_KEY] = { .type = NLA_U32 },
+ [NFTA_XFRM_KEY] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_XFRM_DIR] = { .type = NLA_U8 },
- [NFTA_XFRM_SPNUM] = { .type = NLA_U32 },
+ [NFTA_XFRM_SPNUM] = NLA_POLICY_MAX(NLA_BE32, 255),
[NFTA_XFRM_DREG] = { .type = NLA_U32 },
};
diff --git a/net/netfilter/xt_LED.c b/net/netfilter/xt_LED.c
index 66b0f941d8fb..36c9720ad8d6 100644
--- a/net/netfilter/xt_LED.c
+++ b/net/netfilter/xt_LED.c
@@ -43,7 +43,6 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
{
const struct xt_led_info *ledinfo = par->targinfo;
struct xt_led_info_internal *ledinternal = ledinfo->internal_data;
- unsigned long led_delay = XT_LED_BLINK_DELAY;
/*
* If "always blink" is enabled, and there's still some time until the
@@ -52,7 +51,7 @@ led_tg(struct sk_buff *skb, const struct xt_action_param *par)
if ((ledinfo->delay > 0) && ledinfo->always_blink &&
timer_pending(&ledinternal->timer))
led_trigger_blink_oneshot(&ledinternal->netfilter_led_trigger,
- &led_delay, &led_delay, 1);
+ XT_LED_BLINK_DELAY, XT_LED_BLINK_DELAY, 1);
else
led_trigger_event(&ledinternal->netfilter_led_trigger, LED_FULL);
diff --git a/net/netfilter/xt_osf.c b/net/netfilter/xt_osf.c
index e1990baf3a3b..dc9485854002 100644
--- a/net/netfilter/xt_osf.c
+++ b/net/netfilter/xt_osf.c
@@ -71,4 +71,3 @@ MODULE_AUTHOR("Evgeniy Polyakov <[email protected]>");
MODULE_DESCRIPTION("Passive OS fingerprint matching.");
MODULE_ALIAS("ipt_osf");
MODULE_ALIAS("ip6t_osf");
-MODULE_ALIAS_NFNL_SUBSYS(NFNL_SUBSYS_OSF);