aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPaolo Abeni <[email protected]>2024-08-20 14:57:09 +0200
committerPaolo Abeni <[email protected]>2024-08-20 14:57:09 +0200
commit6b2efdc454406ffcaa93ca925cebad448ea3241e (patch)
treeeb50691db2228b4fc1840e8a2b1f4df7f4f2038c
parentccb445ae460e90b6864e5d77d72c42ae2b3cf377 (diff)
parent1fa3314c14c6a20d098991a0a6980f9b18b2f930 (diff)
Merge branch 'preparations-for-fib-rule-dscp-selector'
Ido Schimmel says: ==================== Preparations for FIB rule DSCP selector This patchset moves the masking of the upper DSCP bits in 'flowi4_tos' to the core instead of relying on callers of the FIB lookup API to do it. This will allow us to start changing users of the API to initialize the 'flowi4_tos' field with all six bits of the DSCP field. In turn, this will allow us to extend FIB rules with a new DSCP selector. By masking the upper DSCP bits in the core we are able to maintain the behavior of the TOS selector in FIB rules and routes to only match on the lower DSCP bits. While working on this I found two users of the API that do not mask the upper DSCP bits before performing the lookup. The first is an ancient netlink family that is unlikely to be used. It is adjusted in patch #1 to mask both the upper DSCP bits and the ECN bits before calling the API. The second user is a nftables module that differs in this regard from its equivalent iptables module. It is adjusted in patch #2 to invoke the API with the upper DSCP bits masked, like all other callers. The relevant selftest passed, but in the unlikely case that regressions are reported because of this change, we can restore the existing behavior using a new flow information flag as discussed here [1]. The last patch moves the masking of the upper DSCP bits to the core, making the first two patches redundant, but I wanted to post them separately to call attention to the behavior change for these two users of the FIB lookup API. Future patchsets (around 3) will start unmasking the upper DSCP bits throughout the networking stack before adding support for the new FIB rule DSCP selector. Changes from v1 [2]: Patch #3: Include <linux/ip.h> in <linux/in_route.h> instead of including it in net/ip_fib.h [1] https://lore.kernel.org/netdev/ZpqpB8vJU%2FQ6LSqa@debian/ [2] https://lore.kernel.org/netdev/[email protected]/ ==================== Link: https://patch.msgid.link/[email protected] Signed-off-by: Paolo Abeni <[email protected]>
-rw-r--r--include/net/ip_fib.h6
-rw-r--r--include/uapi/linux/in_route.h2
-rw-r--r--net/ipv4/fib_frontend.c2
-rw-r--r--net/ipv4/fib_rules.c2
-rw-r--r--net/ipv4/fib_semantics.c3
-rw-r--r--net/ipv4/fib_trie.c3
-rw-r--r--net/ipv4/netfilter/nft_fib_ipv4.c4
7 files changed, 13 insertions, 9 deletions
diff --git a/include/net/ip_fib.h b/include/net/ip_fib.h
index 72af2f223e59..269ec10f63e4 100644
--- a/include/net/ip_fib.h
+++ b/include/net/ip_fib.h
@@ -22,6 +22,7 @@
#include <linux/percpu.h>
#include <linux/notifier.h>
#include <linux/refcount.h>
+#include <linux/in_route.h>
struct fib_config {
u8 fc_dst_len;
@@ -434,6 +435,11 @@ static inline bool fib4_rules_early_flow_dissect(struct net *net,
#endif /* CONFIG_IP_MULTIPLE_TABLES */
+static inline bool fib_dscp_masked_match(dscp_t dscp, const struct flowi4 *fl4)
+{
+ return dscp == inet_dsfield_to_dscp(RT_TOS(fl4->flowi4_tos));
+}
+
/* Exported by fib_frontend.c */
extern const struct nla_policy rtm_ipv4_policy[];
void ip_fib_init(void);
diff --git a/include/uapi/linux/in_route.h b/include/uapi/linux/in_route.h
index 0cc2c23b47f8..10bdd7e7107f 100644
--- a/include/uapi/linux/in_route.h
+++ b/include/uapi/linux/in_route.h
@@ -2,6 +2,8 @@
#ifndef _LINUX_IN_ROUTE_H
#define _LINUX_IN_ROUTE_H
+#include <linux/ip.h>
+
/* IPv4 routing cache flags */
#define RTCF_DEAD RTNH_F_DEAD
diff --git a/net/ipv4/fib_frontend.c b/net/ipv4/fib_frontend.c
index 7ad2cafb9276..da540ddb7af6 100644
--- a/net/ipv4/fib_frontend.c
+++ b/net/ipv4/fib_frontend.c
@@ -1343,7 +1343,7 @@ static void nl_fib_lookup(struct net *net, struct fib_result_nl *frn)
struct flowi4 fl4 = {
.flowi4_mark = frn->fl_mark,
.daddr = frn->fl_addr,
- .flowi4_tos = frn->fl_tos,
+ .flowi4_tos = frn->fl_tos & IPTOS_RT_MASK,
.flowi4_scope = frn->fl_scope,
};
struct fib_table *tb;
diff --git a/net/ipv4/fib_rules.c b/net/ipv4/fib_rules.c
index 5bdd1c016009..c26776b71e97 100644
--- a/net/ipv4/fib_rules.c
+++ b/net/ipv4/fib_rules.c
@@ -186,7 +186,7 @@ INDIRECT_CALLABLE_SCOPE int fib4_rule_match(struct fib_rule *rule,
((daddr ^ r->dst) & r->dstmask))
return 0;
- if (r->dscp && r->dscp != inet_dsfield_to_dscp(fl4->flowi4_tos))
+ if (r->dscp && !fib_dscp_masked_match(r->dscp, fl4))
return 0;
if (rule->ip_proto && (rule->ip_proto != fl4->flowi4_proto))
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 2b57cd2b96e2..0f70341cb8b5 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -2066,8 +2066,7 @@ static void fib_select_default(const struct flowi4 *flp, struct fib_result *res)
if (fa->fa_slen != slen)
continue;
- if (fa->fa_dscp &&
- fa->fa_dscp != inet_dsfield_to_dscp(flp->flowi4_tos))
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
if (fa->tb_id != tb->tb_id)
continue;
diff --git a/net/ipv4/fib_trie.c b/net/ipv4/fib_trie.c
index 8f30e3f00b7f..09e31757e96c 100644
--- a/net/ipv4/fib_trie.c
+++ b/net/ipv4/fib_trie.c
@@ -1580,8 +1580,7 @@ found:
if (index >= (1ul << fa->fa_slen))
continue;
}
- if (fa->fa_dscp &&
- inet_dscp_to_dsfield(fa->fa_dscp) != flp->flowi4_tos)
+ if (fa->fa_dscp && !fib_dscp_masked_match(fa->fa_dscp, flp))
continue;
/* Paired with WRITE_ONCE() in fib_release_info() */
if (READ_ONCE(fi->fib_dead))
diff --git a/net/ipv4/netfilter/nft_fib_ipv4.c b/net/ipv4/netfilter/nft_fib_ipv4.c
index 9eee535c64dd..df94bc28c3d7 100644
--- a/net/ipv4/netfilter/nft_fib_ipv4.c
+++ b/net/ipv4/netfilter/nft_fib_ipv4.c
@@ -22,8 +22,6 @@ static __be32 get_saddr(__be32 addr)
return addr;
}
-#define DSCP_BITS 0xfc
-
void nft_fib4_eval_type(const struct nft_expr *expr, struct nft_regs *regs,
const struct nft_pktinfo *pkt)
{
@@ -110,7 +108,7 @@ void nft_fib4_eval(const struct nft_expr *expr, struct nft_regs *regs,
if (priv->flags & NFTA_FIB_F_MARK)
fl4.flowi4_mark = pkt->skb->mark;
- fl4.flowi4_tos = iph->tos & DSCP_BITS;
+ fl4.flowi4_tos = iph->tos & IPTOS_RT_MASK;
if (priv->flags & NFTA_FIB_F_DADDR) {
fl4.daddr = iph->daddr;