aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--include/linux/ipv6.h1
-rw-r--r--include/net/ip.h1
-rw-r--r--net/ipv4/ip_input.c3
-rw-r--r--net/ipv4/route.c1
-rw-r--r--net/ipv6/ip6_input.c3
-rw-r--r--net/ipv6/route.c3
-rwxr-xr-xtools/testing/selftests/net/fib_tests.sh155
7 files changed, 164 insertions, 3 deletions
diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h
index 5883551b1ee8..af8a771a053c 100644
--- a/include/linux/ipv6.h
+++ b/include/linux/ipv6.h
@@ -147,6 +147,7 @@ struct inet6_skb_parm {
#define IP6SKB_JUMBOGRAM 128
#define IP6SKB_SEG6 256
#define IP6SKB_FAKEJUMBO 512
+#define IP6SKB_MULTIPATH 1024
};
#if defined(CONFIG_NET_L3_MASTER_DEV)
diff --git a/include/net/ip.h b/include/net/ip.h
index 9276cea775cc..3489a1cca5e7 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -57,6 +57,7 @@ struct inet_skb_parm {
#define IPSKB_FRAG_PMTU BIT(6)
#define IPSKB_L3SLAVE BIT(7)
#define IPSKB_NOPOLICY BIT(8)
+#define IPSKB_MULTIPATH BIT(9)
u16 frag_max_size;
};
diff --git a/net/ipv4/ip_input.c b/net/ipv4/ip_input.c
index fe9ead9ee863..5e9c8156656a 100644
--- a/net/ipv4/ip_input.c
+++ b/net/ipv4/ip_input.c
@@ -584,7 +584,8 @@ static void ip_sublist_rcv_finish(struct list_head *head)
static struct sk_buff *ip_extract_route_hint(const struct net *net,
struct sk_buff *skb, int rt_type)
{
- if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST)
+ if (fib4_has_custom_rules(net) || rt_type == RTN_BROADCAST ||
+ IPCB(skb)->flags & IPSKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index a4e153dd615b..6a3f57a3fa41 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -2144,6 +2144,7 @@ static int ip_mkroute_input(struct sk_buff *skb,
int h = fib_multipath_hash(res->fi->fib_net, NULL, skb, hkeys);
fib_select_multipath(res, h);
+ IPCB(skb)->flags |= IPSKB_MULTIPATH;
}
#endif
diff --git a/net/ipv6/ip6_input.c b/net/ipv6/ip6_input.c
index d94041bb4287..b8378814532c 100644
--- a/net/ipv6/ip6_input.c
+++ b/net/ipv6/ip6_input.c
@@ -99,7 +99,8 @@ static bool ip6_can_use_hint(const struct sk_buff *skb,
static struct sk_buff *ip6_extract_route_hint(const struct net *net,
struct sk_buff *skb)
{
- if (fib6_routes_require_src(net) || fib6_has_custom_rules(net))
+ if (fib6_routes_require_src(net) || fib6_has_custom_rules(net) ||
+ IP6CB(skb)->flags & IP6SKB_MULTIPATH)
return NULL;
return skb;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 846aec8e0093..01d6d352850a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -423,6 +423,9 @@ void fib6_select_path(const struct net *net, struct fib6_result *res,
if (match->nh && have_oif_match && res->nh)
return;
+ if (skb)
+ IP6CB(skb)->flags |= IP6SKB_MULTIPATH;
+
/* We might have already computed the hash for ICMPv6 errors. In such
* case it will always be non-zero. Otherwise now is the time to do it.
*/
diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh
index d328af4a149c..e7d2a530618a 100755
--- a/tools/testing/selftests/net/fib_tests.sh
+++ b/tools/testing/selftests/net/fib_tests.sh
@@ -12,7 +12,8 @@ ksft_skip=4
TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \
ipv6_rt ipv4_rt ipv6_addr_metric ipv4_addr_metric ipv6_route_metrics \
ipv4_route_metrics ipv4_route_v6_gw rp_filter ipv4_del_addr \
- ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test"
+ ipv6_del_addr ipv4_mangle ipv6_mangle ipv4_bcast_neigh fib6_gc_test \
+ ipv4_mpath_list ipv6_mpath_list"
VERBOSE=0
PAUSE_ON_FAIL=no
@@ -2352,6 +2353,156 @@ ipv4_bcast_neigh_test()
cleanup
}
+mpath_dep_check()
+{
+ if [ ! -x "$(command -v mausezahn)" ]; then
+ echo "mausezahn command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v jq)" ]; then
+ echo "jq command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v bc)" ]; then
+ echo "bc command not found. Skipping test"
+ return 1
+ fi
+
+ if [ ! -x "$(command -v perf)" ]; then
+ echo "perf command not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib:* | grep -q fib_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv4 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ perf list fib6:* | grep -q fib6_table_lookup
+ if [ $? -ne 0 ]; then
+ echo "IPv6 FIB tracepoint not found. Skipping test"
+ return 1
+ fi
+
+ return 0
+}
+
+link_stats_get()
+{
+ local ns=$1; shift
+ local dev=$1; shift
+ local dir=$1; shift
+ local stat=$1; shift
+
+ ip -n $ns -j -s link show dev $dev \
+ | jq '.[]["stats64"]["'$dir'"]["'$stat'"]'
+}
+
+list_rcv_eval()
+{
+ local file=$1; shift
+ local expected=$1; shift
+
+ local count=$(tail -n 1 $file | jq '.["counter-value"] | tonumber | floor')
+ local ratio=$(echo "scale=2; $count / $expected" | bc -l)
+ local res=$(echo "$ratio >= 0.95" | bc)
+ [[ $res -eq 1 ]]
+ log_test $? 0 "Multipath route hit ratio ($ratio)"
+}
+
+ipv4_mpath_list_test()
+{
+ echo
+ echo "IPv4 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1"
+ run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2"
+ run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n ns2 route add 203.0.113.0/24
+ nexthop via 172.16.201.2 nexthop via 172.16.202.2"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1"
+ set +e
+
+ local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac
+ -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ run_cmd "perf stat -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
+ipv6_mpath_list_test()
+{
+ echo
+ echo "IPv6 multipath list receive tests"
+
+ mpath_dep_check || return 1
+
+ route_setup
+
+ set -e
+ run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off"
+
+ run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\""
+ run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\""
+ run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on"
+ run_cmd "ip -n ns2 link add name nh1 up type dummy"
+ run_cmd "ip -n ns2 link add name nh2 up type dummy"
+ run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1"
+ run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2"
+ run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1"
+ run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2"
+ run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64
+ nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2"
+ run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1"
+ set +e
+
+ local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]')
+ local tmp_file=$(mktemp)
+ local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac
+ -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q"
+
+ # Packets forwarded in a list using a multipath route must not reuse a
+ # cached result so that a flow always hits the same nexthop. In other
+ # words, the FIB lookup tracepoint needs to be triggered for every
+ # packet.
+ local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ run_cmd "perf stat -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd"
+ local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets)
+ local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l)
+ list_rcv_eval $tmp_file $diff
+
+ rm $tmp_file
+ route_cleanup
+}
+
################################################################################
# usage
@@ -2433,6 +2584,8 @@ do
ipv6_mangle) ipv6_mangle_test;;
ipv4_bcast_neigh) ipv4_bcast_neigh_test;;
fib6_gc_test|ipv6_gc) fib6_gc_test;;
+ ipv4_mpath_list) ipv4_mpath_list_test;;
+ ipv6_mpath_list) ipv6_mpath_list_test;;
help) echo "Test names: $TESTS"; exit 0;;
esac