diff options
author | David S. Miller <[email protected]> | 2022-12-12 10:14:03 +0000 |
---|---|---|
committer | David S. Miller <[email protected]> | 2022-12-12 10:14:03 +0000 |
commit | 21af0d557ea932e50094a48adb20e356e2f6ffef (patch) | |
tree | 229260979b87bd6cd585ebf94fbd7939fa8254cd | |
parent | a2d40ce7acdbf4641235ee8be4b4d338746c03b5 (diff) | |
parent | ebddb1404900657b7f03a56ee4c34a9d218c4030 (diff) |
Merge branch 'ovs-tc-dedup'
Xin Long says:
====================
net: eliminate the duplicate code in the ct nat functions of ovs and tc
The changes in the patchset:
"net: add helper support in tc act_ct for ovs offloading"
had moved some common ct code used by both OVS and TC into netfilter.
There are still some big functions pretty similar defined and used in
each of OVS and TC. It is not good to maintain such big function in 2
places. This patchset is to extract the functions for NAT processing
from OVS and TC to netfilter.
To make this change clear and safe, this patchset gets the common code
out of OVS and TC step by step: The patch 1-4 make some minor changes
in OVS and TC to make the NAT code of them completely the same, then
the patch 5 moves the common code to the netfilter and exports one
function called by each of OVS and TC.
v1->v2:
- Create nf_nat_ovs.c to include the nat functions, as Pablo suggested.
v2->v3:
- fix a typo in subject of patch 2/5, as Marcelo noticed.
- fix in openvswitch to keep OVS ct nat and TC ct nat consistent in
patch 3/5 instead of in tc, as Marcelo noticed.
- use BIT(var) macro instead of (1 << var) in patch 5/5, as Marcelo
suggested.
- use ifdef in netfilter/Makefile to build nf_nat_ovs only when OVS
or TC ct action is enabled in patch 5/5, as Marcelo suggested.
v3->v4:
- add NF_NAT_OVS in netfilter/Kconfig and add select NF_NAT_OVS in
OVS and TC Kconfig instead of using ifdef in netfilter/Makefile,
as Pablo suggested.
====================
Signed-off-by: David S. Miller <[email protected]>
-rw-r--r-- | include/net/netfilter/nf_nat.h | 4 | ||||
-rw-r--r-- | net/netfilter/Kconfig | 3 | ||||
-rw-r--r-- | net/netfilter/Makefile | 1 | ||||
-rw-r--r-- | net/netfilter/nf_nat_ovs.c | 135 | ||||
-rw-r--r-- | net/openvswitch/Kconfig | 1 | ||||
-rw-r--r-- | net/openvswitch/conntrack.c | 146 | ||||
-rw-r--r-- | net/sched/Kconfig | 1 | ||||
-rw-r--r-- | net/sched/act_ct.c | 136 |
8 files changed, 169 insertions, 258 deletions
diff --git a/include/net/netfilter/nf_nat.h b/include/net/netfilter/nf_nat.h index e9eb01e99d2f..9877f064548a 100644 --- a/include/net/netfilter/nf_nat.h +++ b/include/net/netfilter/nf_nat.h @@ -104,6 +104,10 @@ unsigned int nf_nat_inet_fn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); +int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int *action, + const struct nf_nat_range2 *range, bool commit); + static inline int nf_nat_initialized(const struct nf_conn *ct, enum nf_nat_manip_type manip) { diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig index 0846bd75b1da..f71b41c7ce2f 100644 --- a/net/netfilter/Kconfig +++ b/net/netfilter/Kconfig @@ -459,6 +459,9 @@ config NF_NAT_REDIRECT config NF_NAT_MASQUERADE bool +config NF_NAT_OVS + bool + config NETFILTER_SYNPROXY tristate diff --git a/net/netfilter/Makefile b/net/netfilter/Makefile index 1d4db1943936..3754eb06fb41 100644 --- a/net/netfilter/Makefile +++ b/net/netfilter/Makefile @@ -59,6 +59,7 @@ obj-$(CONFIG_NF_LOG_SYSLOG) += nf_log_syslog.o obj-$(CONFIG_NF_NAT) += nf_nat.o nf_nat-$(CONFIG_NF_NAT_REDIRECT) += nf_nat_redirect.o nf_nat-$(CONFIG_NF_NAT_MASQUERADE) += nf_nat_masquerade.o +nf_nat-$(CONFIG_NF_NAT_OVS) += nf_nat_ovs.o ifeq ($(CONFIG_NF_NAT),m) nf_nat-$(CONFIG_DEBUG_INFO_BTF_MODULES) += nf_nat_bpf.o diff --git a/net/netfilter/nf_nat_ovs.c b/net/netfilter/nf_nat_ovs.c new file mode 100644 index 000000000000..551abd2da614 --- /dev/null +++ b/net/netfilter/nf_nat_ovs.c @@ -0,0 +1,135 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Support nat functions for openvswitch and used by OVS and TC conntrack. */ + +#include <net/netfilter/nf_nat.h> + +/* Modelled after nf_nat_ipv[46]_fn(). + * range is only used for new, uninitialized NAT state. + * Returns either NF_ACCEPT or NF_DROP. + */ +static int nf_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int *action, + const struct nf_nat_range2 *range, + enum nf_nat_manip_type maniptype) +{ + __be16 proto = skb_protocol(skb, true); + int hooknum, err = NF_ACCEPT; + + /* See HOOK2MANIP(). */ + if (maniptype == NF_NAT_MANIP_SRC) + hooknum = NF_INET_LOCAL_IN; /* Source NAT */ + else + hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ + + switch (ctinfo) { + case IP_CT_RELATED: + case IP_CT_RELATED_REPLY: + if (proto == htons(ETH_P_IP) && + ip_hdr(skb)->protocol == IPPROTO_ICMP) { + if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, + hooknum)) + err = NF_DROP; + goto out; + } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { + __be16 frag_off; + u8 nexthdr = ipv6_hdr(skb)->nexthdr; + int hdrlen = ipv6_skip_exthdr(skb, + sizeof(struct ipv6hdr), + &nexthdr, &frag_off); + + if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { + if (!nf_nat_icmpv6_reply_translation(skb, ct, + ctinfo, + hooknum, + hdrlen)) + err = NF_DROP; + goto out; + } + } + /* Non-ICMP, fall thru to initialize if needed. */ + fallthrough; + case IP_CT_NEW: + /* Seen it before? This can happen for loopback, retrans, + * or local packets. + */ + if (!nf_nat_initialized(ct, maniptype)) { + /* Initialize according to the NAT action. */ + err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) + /* Action is set up to establish a new + * mapping. + */ + ? nf_nat_setup_info(ct, range, maniptype) + : nf_nat_alloc_null_binding(ct, hooknum); + if (err != NF_ACCEPT) + goto out; + } + break; + + case IP_CT_ESTABLISHED: + case IP_CT_ESTABLISHED_REPLY: + break; + + default: + err = NF_DROP; + goto out; + } + + err = nf_nat_packet(ct, ctinfo, hooknum, skb); + if (err == NF_ACCEPT) + *action |= BIT(maniptype); +out: + return err; +} + +int nf_ct_nat(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, int *action, + const struct nf_nat_range2 *range, bool commit) +{ + enum nf_nat_manip_type maniptype; + int err, ct_action = *action; + + *action = 0; + + /* Add NAT extension if not confirmed yet. */ + if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) + return NF_DROP; /* Can't NAT. */ + + if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && + (ctinfo != IP_CT_RELATED || commit)) { + /* NAT an established or related connection like before. */ + if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) + /* This is the REPLY direction for a connection + * for which NAT was applied in the forward + * direction. Do the reverse NAT. + */ + maniptype = ct->status & IPS_SRC_NAT + ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; + else + maniptype = ct->status & IPS_SRC_NAT + ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; + } else if (ct_action & BIT(NF_NAT_MANIP_SRC)) { + maniptype = NF_NAT_MANIP_SRC; + } else if (ct_action & BIT(NF_NAT_MANIP_DST)) { + maniptype = NF_NAT_MANIP_DST; + } else { + return NF_ACCEPT; + } + + err = nf_ct_nat_execute(skb, ct, ctinfo, action, range, maniptype); + if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { + if (ct->status & IPS_SRC_NAT) { + if (maniptype == NF_NAT_MANIP_SRC) + maniptype = NF_NAT_MANIP_DST; + else + maniptype = NF_NAT_MANIP_SRC; + + err = nf_ct_nat_execute(skb, ct, ctinfo, action, range, + maniptype); + } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { + err = nf_ct_nat_execute(skb, ct, ctinfo, action, NULL, + NF_NAT_MANIP_SRC); + } + } + return err; +} +EXPORT_SYMBOL_GPL(nf_ct_nat); diff --git a/net/openvswitch/Kconfig b/net/openvswitch/Kconfig index 15bd287f5cbd..747d537a3f06 100644 --- a/net/openvswitch/Kconfig +++ b/net/openvswitch/Kconfig @@ -15,6 +15,7 @@ config OPENVSWITCH select NET_MPLS_GSO select DST_CACHE select NET_NSH + select NF_NAT_OVS if NF_NAT help Open vSwitch is a multilayer Ethernet switch targeted at virtualized environments. In addition to supporting a variety of features diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c index d78f0fc4337d..c8b137649ca4 100644 --- a/net/openvswitch/conntrack.c +++ b/net/openvswitch/conntrack.c @@ -726,147 +726,27 @@ static void ovs_nat_update_key(struct sw_flow_key *key, } } -/* Modelled after nf_nat_ipv[46]_fn(). - * range is only used for new, uninitialized NAT state. - * Returns either NF_ACCEPT or NF_DROP. - */ -static int ovs_ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype, struct sw_flow_key *key) -{ - int hooknum, nh_off, err = NF_ACCEPT; - - nh_off = skb_network_offset(skb); - skb_pull_rcsum(skb, nh_off); - - /* See HOOK2MANIP(). */ - if (maniptype == NF_NAT_MANIP_SRC) - hooknum = NF_INET_LOCAL_IN; /* Source NAT */ - else - hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - if (IS_ENABLED(CONFIG_NF_NAT) && - skb->protocol == htons(ETH_P_IP) && - ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) - err = NF_DROP; - goto push; - } else if (IS_ENABLED(CONFIG_IPV6) && - skb->protocol == htons(ETH_P_IPV6)) { - __be16 frag_off; - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - int hdrlen = ipv6_skip_exthdr(skb, - sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, - ctinfo, - hooknum, - hdrlen)) - err = NF_DROP; - goto push; - } - } - /* Non-ICMP, fall thru to initialize if needed. */ - fallthrough; - case IP_CT_NEW: - /* Seen it before? This can happen for loopback, retrans, - * or local packets. - */ - if (!nf_nat_initialized(ct, maniptype)) { - /* Initialize according to the NAT action. */ - err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) - /* Action is set up to establish a new - * mapping. - */ - ? nf_nat_setup_info(ct, range, maniptype) - : nf_nat_alloc_null_binding(ct, hooknum); - if (err != NF_ACCEPT) - goto push; - } - break; - - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED_REPLY: - break; - - default: - err = NF_DROP; - goto push; - } - - err = nf_nat_packet(ct, ctinfo, hooknum, skb); -push: - skb_push_rcsum(skb, nh_off); - - /* Update the flow key if NAT successful. */ - if (err == NF_ACCEPT) - ovs_nat_update_key(key, skb, maniptype); - - return err; -} - /* Returns NF_DROP if the packet should be dropped, NF_ACCEPT otherwise. */ static int ovs_ct_nat(struct net *net, struct sw_flow_key *key, const struct ovs_conntrack_info *info, struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo) { - enum nf_nat_manip_type maniptype; - int err; + int err, action = 0; - /* Add NAT extension if not confirmed yet. */ - if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) - return NF_ACCEPT; /* Can't NAT. */ + if (!(info->nat & OVS_CT_NAT)) + return NF_ACCEPT; + if (info->nat & OVS_CT_SRC_NAT) + action |= BIT(NF_NAT_MANIP_SRC); + if (info->nat & OVS_CT_DST_NAT) + action |= BIT(NF_NAT_MANIP_DST); - /* Determine NAT type. - * Check if the NAT type can be deduced from the tracked connection. - * Make sure new expected connections (IP_CT_RELATED) are NATted only - * when committing. - */ - if (info->nat & OVS_CT_NAT && ctinfo != IP_CT_NEW && - ct->status & IPS_NAT_MASK && - (ctinfo != IP_CT_RELATED || info->commit)) { - /* NAT an established or related connection like before. */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) - /* This is the REPLY direction for a connection - * for which NAT was applied in the forward - * direction. Do the reverse NAT. - */ - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; - else - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; - } else if (info->nat & OVS_CT_SRC_NAT) { - maniptype = NF_NAT_MANIP_SRC; - } else if (info->nat & OVS_CT_DST_NAT) { - maniptype = NF_NAT_MANIP_DST; - } else { - return NF_ACCEPT; /* Connection is not NATed. */ - } - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, maniptype, key); - - if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { - if (ct->status & IPS_SRC_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ovs_ct_nat_execute(skb, ct, ctinfo, &info->range, - maniptype, key); - } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { - err = ovs_ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC, key); - } - } + err = nf_ct_nat(skb, ct, ctinfo, &action, &info->range, info->commit); + + if (action & BIT(NF_NAT_MANIP_SRC)) + ovs_nat_update_key(key, skb, NF_NAT_MANIP_SRC); + if (action & BIT(NF_NAT_MANIP_DST)) + ovs_nat_update_key(key, skb, NF_NAT_MANIP_DST); return err; } diff --git a/net/sched/Kconfig b/net/sched/Kconfig index 4662a6ce8a7e..777d6b50505c 100644 --- a/net/sched/Kconfig +++ b/net/sched/Kconfig @@ -977,6 +977,7 @@ config NET_ACT_TUNNEL_KEY config NET_ACT_CT tristate "connection tracking tc action" depends on NET_CLS_ACT && NF_CONNTRACK && (!NF_NAT || NF_NAT) && NF_FLOW_TABLE + select NF_NAT_OVS if NF_NAT help Say Y here to allow sending the packets to conntrack module. diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c index f6df0168c91f..0ca2bb8ed026 100644 --- a/net/sched/act_ct.c +++ b/net/sched/act_ct.c @@ -864,90 +864,6 @@ static void tcf_ct_params_free_rcu(struct rcu_head *head) tcf_ct_params_free(params); } -#if IS_ENABLED(CONFIG_NF_NAT) -/* Modelled after nf_nat_ipv[46]_fn(). - * range is only used for new, uninitialized NAT state. - * Returns either NF_ACCEPT or NF_DROP. - */ -static int ct_nat_execute(struct sk_buff *skb, struct nf_conn *ct, - enum ip_conntrack_info ctinfo, - const struct nf_nat_range2 *range, - enum nf_nat_manip_type maniptype) -{ - __be16 proto = skb_protocol(skb, true); - int hooknum, err = NF_ACCEPT; - - /* See HOOK2MANIP(). */ - if (maniptype == NF_NAT_MANIP_SRC) - hooknum = NF_INET_LOCAL_IN; /* Source NAT */ - else - hooknum = NF_INET_LOCAL_OUT; /* Destination NAT */ - - switch (ctinfo) { - case IP_CT_RELATED: - case IP_CT_RELATED_REPLY: - if (proto == htons(ETH_P_IP) && - ip_hdr(skb)->protocol == IPPROTO_ICMP) { - if (!nf_nat_icmp_reply_translation(skb, ct, ctinfo, - hooknum)) - err = NF_DROP; - goto out; - } else if (IS_ENABLED(CONFIG_IPV6) && proto == htons(ETH_P_IPV6)) { - __be16 frag_off; - u8 nexthdr = ipv6_hdr(skb)->nexthdr; - int hdrlen = ipv6_skip_exthdr(skb, - sizeof(struct ipv6hdr), - &nexthdr, &frag_off); - - if (hdrlen >= 0 && nexthdr == IPPROTO_ICMPV6) { - if (!nf_nat_icmpv6_reply_translation(skb, ct, - ctinfo, - hooknum, - hdrlen)) - err = NF_DROP; - goto out; - } - } - /* Non-ICMP, fall thru to initialize if needed. */ - fallthrough; - case IP_CT_NEW: - /* Seen it before? This can happen for loopback, retrans, - * or local packets. - */ - if (!nf_nat_initialized(ct, maniptype)) { - /* Initialize according to the NAT action. */ - err = (range && range->flags & NF_NAT_RANGE_MAP_IPS) - /* Action is set up to establish a new - * mapping. - */ - ? nf_nat_setup_info(ct, range, maniptype) - : nf_nat_alloc_null_binding(ct, hooknum); - if (err != NF_ACCEPT) - goto out; - } - break; - - case IP_CT_ESTABLISHED: - case IP_CT_ESTABLISHED_REPLY: - break; - - default: - err = NF_DROP; - goto out; - } - - err = nf_nat_packet(ct, ctinfo, hooknum, skb); - if (err == NF_ACCEPT) { - if (maniptype == NF_NAT_MANIP_SRC) - tc_skb_cb(skb)->post_ct_snat = 1; - if (maniptype == NF_NAT_MANIP_DST) - tc_skb_cb(skb)->post_ct_dnat = 1; - } -out: - return err; -} -#endif /* CONFIG_NF_NAT */ - static void tcf_ct_act_set_mark(struct nf_conn *ct, u32 mark, u32 mask) { #if IS_ENABLED(CONFIG_NF_CONNTRACK_MARK) @@ -987,52 +903,22 @@ static int tcf_ct_act_nat(struct sk_buff *skb, bool commit) { #if IS_ENABLED(CONFIG_NF_NAT) - int err; - enum nf_nat_manip_type maniptype; + int err, action = 0; if (!(ct_action & TCA_CT_ACT_NAT)) return NF_ACCEPT; + if (ct_action & TCA_CT_ACT_NAT_SRC) + action |= BIT(NF_NAT_MANIP_SRC); + if (ct_action & TCA_CT_ACT_NAT_DST) + action |= BIT(NF_NAT_MANIP_DST); - /* Add NAT extension if not confirmed yet. */ - if (!nf_ct_is_confirmed(ct) && !nf_ct_nat_ext_add(ct)) - return NF_DROP; /* Can't NAT. */ - - if (ctinfo != IP_CT_NEW && (ct->status & IPS_NAT_MASK) && - (ctinfo != IP_CT_RELATED || commit)) { - /* NAT an established or related connection like before. */ - if (CTINFO2DIR(ctinfo) == IP_CT_DIR_REPLY) - /* This is the REPLY direction for a connection - * for which NAT was applied in the forward - * direction. Do the reverse NAT. - */ - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_DST : NF_NAT_MANIP_SRC; - else - maniptype = ct->status & IPS_SRC_NAT - ? NF_NAT_MANIP_SRC : NF_NAT_MANIP_DST; - } else if (ct_action & TCA_CT_ACT_NAT_SRC) { - maniptype = NF_NAT_MANIP_SRC; - } else if (ct_action & TCA_CT_ACT_NAT_DST) { - maniptype = NF_NAT_MANIP_DST; - } else { - return NF_ACCEPT; - } + err = nf_ct_nat(skb, ct, ctinfo, &action, range, commit); + + if (action & BIT(NF_NAT_MANIP_SRC)) + tc_skb_cb(skb)->post_ct_snat = 1; + if (action & BIT(NF_NAT_MANIP_DST)) + tc_skb_cb(skb)->post_ct_dnat = 1; - err = ct_nat_execute(skb, ct, ctinfo, range, maniptype); - if (err == NF_ACCEPT && ct->status & IPS_DST_NAT) { - if (ct->status & IPS_SRC_NAT) { - if (maniptype == NF_NAT_MANIP_SRC) - maniptype = NF_NAT_MANIP_DST; - else - maniptype = NF_NAT_MANIP_SRC; - - err = ct_nat_execute(skb, ct, ctinfo, range, - maniptype); - } else if (CTINFO2DIR(ctinfo) == IP_CT_DIR_ORIGINAL) { - err = ct_nat_execute(skb, ct, ctinfo, NULL, - NF_NAT_MANIP_SRC); - } - } return err; #else return NF_ACCEPT; |