aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <pabeni@redhat.com>2023-07-20 10:06:38 +0200
committerPaolo Abeni <pabeni@redhat.com>2023-07-20 10:06:38 +0200
commit2d6d7d6ce257e3ea00e1524fdb138a3201df46ed (patch)
treedc860b3eaedc3dc4851872eb5663e99fd0e3c459
parent03b123debcbc8db987bda17ed8412cc011064c22 (diff)
parent8c8b733208058702da451b7d60a12c0ff90b6879 (diff)
Merge branch 'net-handle-the-exp-removal-problem-with-ovs-upcall-properly'
Xin Long says: ==================== net: handle the exp removal problem with ovs upcall properly With the OVS upcall, the original ct in the skb will be dropped, and when the skb comes back from userspace it has to create a new ct again through nf_conntrack_in() in either OVS __ovs_ct_lookup() or TC tcf_ct_act(). However, the new ct will not be able to have the exp as the original ct has taken it away from the hash table in nf_ct_find_expectation(). This will cause some flow never to be matched, like: 'ip,ct_state=-trk,in_port=1 actions=ct(zone=1)' 'ip,ct_state=+trk+new+rel,in_port=1 actions=ct(commit,zone=1)' 'ip,ct_state=+trk+new+rel,in_port=1 actions=ct(commit,zone=2),normal' if the 2nd flow triggers the OVS upcall, the 3rd flow will never get matched. OVS conntrack works around this by adding its own exp lookup function to not remove the exp from the hash table and saving the exp and its master info to the flow keys instead of create a real ct. But this way doesn't work for TC act_ct. The patch 1/3 allows nf_ct_find_expectation() not to remove the exp from the hash table if tmpl is set with IPS_CONFIRMED when doing lookup. This allows both OVS conntrack and TC act_ct to have a simple and clear fix for this problem in the patch 2/3 and 3/3. ==================== Link: https://lore.kernel.org/r/cover.1689541664.git.lucien.xin@gmail.com Signed-off-by: Paolo Abeni <pabeni@redhat.com>
-rw-r--r--include/net/netfilter/nf_conntrack_expect.h2
-rw-r--r--net/netfilter/nf_conntrack_core.c2
-rw-r--r--net/netfilter/nf_conntrack_expect.c4
-rw-r--r--net/netfilter/nft_ct.c2
-rw-r--r--net/openvswitch/conntrack.c78
-rw-r--r--net/sched/act_ct.c3
6 files changed, 18 insertions, 73 deletions
diff --git a/include/net/netfilter/nf_conntrack_expect.h b/include/net/netfilter/nf_conntrack_expect.h
index cf0d81be5a96..165e7a03b8e9 100644
--- a/include/net/netfilter/nf_conntrack_expect.h
+++ b/include/net/netfilter/nf_conntrack_expect.h
@@ -100,7 +100,7 @@ nf_ct_expect_find_get(struct net *net,
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple);
+ const struct nf_conntrack_tuple *tuple, bool unlink);
void nf_ct_unlink_expect_report(struct nf_conntrack_expect *exp,
u32 portid, int report);
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 992393102d5f..9f6f2e643575 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1756,7 +1756,7 @@ init_conntrack(struct net *net, struct nf_conn *tmpl,
cnet = nf_ct_pernet(net);
if (cnet->expect_count) {
spin_lock_bh(&nf_conntrack_expect_lock);
- exp = nf_ct_find_expectation(net, zone, tuple);
+ exp = nf_ct_find_expectation(net, zone, tuple, !tmpl || nf_ct_is_confirmed(tmpl));
if (exp) {
/* Welcome, Mr. Bond. We've been expecting you... */
__set_bit(IPS_EXPECTED_BIT, &ct->status);
diff --git a/net/netfilter/nf_conntrack_expect.c b/net/netfilter/nf_conntrack_expect.c
index 96948e98ec53..81ca348915c9 100644
--- a/net/netfilter/nf_conntrack_expect.c
+++ b/net/netfilter/nf_conntrack_expect.c
@@ -171,7 +171,7 @@ EXPORT_SYMBOL_GPL(nf_ct_expect_find_get);
struct nf_conntrack_expect *
nf_ct_find_expectation(struct net *net,
const struct nf_conntrack_zone *zone,
- const struct nf_conntrack_tuple *tuple)
+ const struct nf_conntrack_tuple *tuple, bool unlink)
{
struct nf_conntrack_net *cnet = nf_ct_pernet(net);
struct nf_conntrack_expect *i, *exp = NULL;
@@ -211,7 +211,7 @@ nf_ct_find_expectation(struct net *net,
!refcount_inc_not_zero(&exp->master->ct_general.use)))
return NULL;
- if (exp->flags & NF_CT_EXPECT_PERMANENT) {
+ if (exp->flags & NF_CT_EXPECT_PERMANENT || !unlink) {
refcount_inc(&exp->use);
return exp;
} else if (del_timer(&exp->timeout)) {
diff --git a/net/netfilter/nft_ct.c b/net/netfilter/nft_ct.c
index 38958e067aa8..e87fd4314c68 100644
--- a/net/netfilter/nft_ct.c
+++ b/net/netfilter/nft_ct.c
@@ -262,6 +262,7 @@ static void nft_ct_set_zone_eval(const struct nft_expr *expr,
regs->verdict.code = NF_DROP;
return;
}
+ __set_bit(IPS_CONFIRMED_BIT, &ct->status);
}
nf_ct_set(skb, ct, IP_CT_NEW);
@@ -368,6 +369,7 @@ static bool nft_ct_tmpl_alloc_pcpu(void)
return false;
}
+ __set_bit(IPS_CONFIRMED_BIT, &tmp->status);
per_cpu(nft_ct_pcpu_template, cpu) = tmp;
}
diff --git a/net/openvswitch/conntrack.c b/net/openvswitch/conntrack.c
index 331730fd3580..fa955e892210 100644
--- a/net/openvswitch/conntrack.c
+++ b/net/openvswitch/conntrack.c
@@ -455,45 +455,6 @@ static int ovs_ct_handle_fragments(struct net *net, struct sw_flow_key *key,
return 0;
}
-static struct nf_conntrack_expect *
-ovs_ct_expect_find(struct net *net, const struct nf_conntrack_zone *zone,
- u16 proto, const struct sk_buff *skb)
-{
- struct nf_conntrack_tuple tuple;
- struct nf_conntrack_expect *exp;
-
- if (!nf_ct_get_tuplepr(skb, skb_network_offset(skb), proto, net, &tuple))
- return NULL;
-
- exp = __nf_ct_expect_find(net, zone, &tuple);
- if (exp) {
- struct nf_conntrack_tuple_hash *h;
-
- /* Delete existing conntrack entry, if it clashes with the
- * expectation. This can happen since conntrack ALGs do not
- * check for clashes between (new) expectations and existing
- * conntrack entries. nf_conntrack_in() will check the
- * expectations only if a conntrack entry can not be found,
- * which can lead to OVS finding the expectation (here) in the
- * init direction, but which will not be removed by the
- * nf_conntrack_in() call, if a matching conntrack entry is
- * found instead. In this case all init direction packets
- * would be reported as new related packets, while reply
- * direction packets would be reported as un-related
- * established packets.
- */
- h = nf_conntrack_find_get(net, zone, &tuple);
- if (h) {
- struct nf_conn *ct = nf_ct_tuplehash_to_ctrack(h);
-
- nf_ct_delete(ct, 0, 0);
- nf_ct_put(ct);
- }
- }
-
- return exp;
-}
-
/* This replicates logic from nf_conntrack_core.c that is not exported. */
static enum ip_conntrack_info
ovs_ct_get_info(const struct nf_conntrack_tuple_hash *h)
@@ -852,36 +813,16 @@ static int ovs_ct_lookup(struct net *net, struct sw_flow_key *key,
const struct ovs_conntrack_info *info,
struct sk_buff *skb)
{
- struct nf_conntrack_expect *exp;
-
- /* If we pass an expected packet through nf_conntrack_in() the
- * expectation is typically removed, but the packet could still be
- * lost in upcall processing. To prevent this from happening we
- * perform an explicit expectation lookup. Expected connections are
- * always new, and will be passed through conntrack only when they are
- * committed, as it is OK to remove the expectation at that time.
- */
- exp = ovs_ct_expect_find(net, &info->zone, info->family, skb);
- if (exp) {
- u8 state;
-
- /* NOTE: New connections are NATted and Helped only when
- * committed, so we are not calling into NAT here.
- */
- state = OVS_CS_F_TRACKED | OVS_CS_F_NEW | OVS_CS_F_RELATED;
- __ovs_ct_update_key(key, state, &info->zone, exp->master);
- } else {
- struct nf_conn *ct;
- int err;
+ struct nf_conn *ct;
+ int err;
- err = __ovs_ct_lookup(net, key, info, skb);
- if (err)
- return err;
+ err = __ovs_ct_lookup(net, key, info, skb);
+ if (err)
+ return err;
- ct = (struct nf_conn *)skb_nfct(skb);
- if (ct)
- nf_ct_deliver_cached_events(ct);
- }
+ ct = (struct nf_conn *)skb_nfct(skb);
+ if (ct)
+ nf_ct_deliver_cached_events(ct);
return 0;
}
@@ -1460,7 +1401,8 @@ int ovs_ct_copy_action(struct net *net, const struct nlattr *attr,
if (err)
goto err_free_ct;
- __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
+ if (ct_info.commit)
+ __set_bit(IPS_CONFIRMED_BIT, &ct_info.ct->status);
return 0;
err_free_ct:
__ovs_ct_free_action(&ct_info);
diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index abc71a06d634..7c652d14528b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -1238,7 +1238,8 @@ static int tcf_ct_fill_params(struct net *net,
}
}
- __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
+ if (p->ct_action & TCA_CT_ACT_COMMIT)
+ __set_bit(IPS_CONFIRMED_BIT, &tmpl->status);
return 0;
err:
nf_ct_put(p->tmpl);