diff options
Diffstat (limited to 'tools/testing/selftests/net')
230 files changed, 26647 insertions, 5784 deletions
diff --git a/tools/testing/selftests/net/.gitignore b/tools/testing/selftests/net/.gitignore index 2f9d378edec3..49a56eb5d036 100644 --- a/tools/testing/selftests/net/.gitignore +++ b/tools/testing/selftests/net/.gitignore @@ -2,9 +2,9 @@ bind_bhash bind_timewait bind_wildcard -csum cmsg_sender diag_uid +epoll_busy_poll fin_ack_lat gro hwtstamp_config @@ -31,6 +31,7 @@ reuseport_dualstack rxtimestamp sctp_hello scm_pidfd +scm_rights sk_bind_sendto_listen sk_connect_zero_addr socket diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 5b2aca4c5f10..bd01e4a0be2c 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -20,7 +20,6 @@ TEST_PROGS += reuseaddr_ports_exhausted.sh TEST_PROGS += txtimestamp.sh TEST_PROGS += vrf-xfrm-tests.sh TEST_PROGS += rxtimestamp.sh -TEST_PROGS += devlink_port_split.py TEST_PROGS += drop_monitor_tests.sh TEST_PROGS += vrf_route_leaking.sh TEST_PROGS += bareudp.sh @@ -35,6 +34,7 @@ TEST_PROGS += gre_gso.sh TEST_PROGS += cmsg_so_mark.sh TEST_PROGS += cmsg_time.sh cmsg_ipv6.sh TEST_PROGS += netns-name.sh +TEST_PROGS += nl_netdev.py TEST_PROGS += srv6_end_dt46_l3vpn_test.sh TEST_PROGS += srv6_end_dt4_l3vpn_test.sh TEST_PROGS += srv6_end_dt6_l3vpn_test.sh @@ -53,8 +53,7 @@ TEST_PROGS += bind_bhash.sh TEST_PROGS += ip_local_port_range.sh TEST_PROGS += rps_default_mask.sh TEST_PROGS += big_tcp.sh -TEST_PROGS_EXTENDED := in_netns.sh setup_loopback.sh setup_veth.sh -TEST_PROGS_EXTENDED += toeplitz_client.sh toeplitz.sh +TEST_PROGS_EXTENDED := toeplitz_client.sh toeplitz.sh TEST_GEN_FILES = socket nettest TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any TEST_GEN_FILES += tcp_mmap tcp_inq psock_snd txring_overwrite @@ -68,7 +67,7 @@ TEST_GEN_FILES += ipsec TEST_GEN_FILES += ioam6_parser TEST_GEN_FILES += gro TEST_GEN_PROGS = reuseport_bpf reuseport_bpf_cpu reuseport_bpf_numa -TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap +TEST_GEN_PROGS += reuseport_dualstack reuseaddr_conflict tls tun tap epoll_busy_poll TEST_GEN_FILES += toeplitz TEST_GEN_FILES += cmsg_sender TEST_GEN_FILES += stress_reuseport_listen @@ -82,8 +81,6 @@ TEST_PROGS += test_ingress_egress_chaining.sh TEST_GEN_PROGS += so_incoming_cpu TEST_PROGS += sctp_vrf.sh TEST_GEN_FILES += sctp_hello -TEST_GEN_FILES += csum -TEST_GEN_FILES += nat6to4.o TEST_GEN_FILES += ip_local_port_range TEST_GEN_FILES += bind_wildcard TEST_PROGS += test_vxlan_mdb.sh @@ -91,60 +88,24 @@ TEST_PROGS += test_bridge_neigh_suppress.sh TEST_PROGS += test_vxlan_nolocalbypass.sh TEST_PROGS += test_bridge_backup_port.sh TEST_PROGS += fdb_flush.sh +TEST_PROGS += fq_band_pktlimit.sh +TEST_PROGS += vlan_hw_filter.sh +TEST_PROGS += bpf_offload.py TEST_FILES := settings +TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh + +TEST_GEN_FILES += $(patsubst %.c,%.o,$(wildcard *.bpf.c)) + +TEST_INCLUDES := forwarding/lib.sh include ../lib.mk +$(OUTPUT)/epoll_busy_poll: LDLIBS += -lcap $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma $(OUTPUT)/tcp_mmap: LDLIBS += -lpthread -lcrypto $(OUTPUT)/tcp_inq: LDLIBS += -lpthread $(OUTPUT)/bind_bhash: LDLIBS += -lpthread $(OUTPUT)/io_uring_zerocopy_tx: CFLAGS += -I../../../include/ -# Rules to generate bpf obj nat6to4.o -CLANG ?= clang -SCRATCH_DIR := $(OUTPUT)/tools -BUILD_DIR := $(SCRATCH_DIR)/build -BPFDIR := $(abspath ../../../lib/bpf) -APIDIR := $(abspath ../../../include/uapi) - -CCINCLUDE += -I../bpf -CCINCLUDE += -I../../../../usr/include/ -CCINCLUDE += -I$(SCRATCH_DIR)/include - -BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a - -MAKE_DIRS := $(BUILD_DIR)/libbpf -$(MAKE_DIRS): - mkdir -p $@ - -# Get Clang's default includes on this system, as opposed to those seen by -# '--target=bpf'. This fixes "missing" files on some architectures/distros, -# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. -# -# Use '-idirafter': Don't interfere with include mechanics except where the -# build would have failed anyways. -define get_sys_includes -$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ - | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ -$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') -endef - -ifneq ($(CROSS_COMPILE),) -CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) -endif - -CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) - -$(OUTPUT)/nat6to4.o: nat6to4.c $(BPFOBJ) | $(MAKE_DIRS) - $(CLANG) -O2 --target=bpf -c $< $(CCINCLUDE) $(CLANG_SYS_INCLUDES) -o $@ - -$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ - $(APIDIR)/linux/bpf.h \ - | $(BUILD_DIR)/libbpf - $(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ - EXTRA_CFLAGS='-g -O0' \ - DESTDIR=$(SCRATCH_DIR) prefix= all install_headers - -EXTRA_CLEAN := $(SCRATCH_DIR) +include bpf.mk diff --git a/tools/testing/selftests/net/af_unix/Makefile b/tools/testing/selftests/net/af_unix/Makefile index 221c387a7d7f..3b83c797650d 100644 --- a/tools/testing/selftests/net/af_unix/Makefile +++ b/tools/testing/selftests/net/af_unix/Makefile @@ -1,4 +1,4 @@ CFLAGS += $(KHDR_INCLUDES) -TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd +TEST_GEN_PROGS := diag_uid test_unix_oob unix_connect scm_pidfd scm_rights include ../../lib.mk diff --git a/tools/testing/selftests/net/af_unix/diag_uid.c b/tools/testing/selftests/net/af_unix/diag_uid.c index 5b88f7129fea..79a3dd75590e 100644 --- a/tools/testing/selftests/net/af_unix/diag_uid.c +++ b/tools/testing/selftests/net/af_unix/diag_uid.c @@ -148,7 +148,6 @@ void receive_response(struct __test_metadata *_metadata, .msg_iov = &iov, .msg_iovlen = 1 }; - struct unix_diag_req *udr; struct nlmsghdr *nlh; int ret; diff --git a/tools/testing/selftests/net/af_unix/scm_rights.c b/tools/testing/selftests/net/af_unix/scm_rights.c new file mode 100644 index 000000000000..bab606c9f1eb --- /dev/null +++ b/tools/testing/selftests/net/af_unix/scm_rights.c @@ -0,0 +1,286 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ +#define _GNU_SOURCE +#include <sched.h> + +#include <stdio.h> +#include <string.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/socket.h> +#include <sys/un.h> + +#include "../../kselftest_harness.h" + +FIXTURE(scm_rights) +{ + int fd[16]; +}; + +FIXTURE_VARIANT(scm_rights) +{ + char name[16]; + int type; + int flags; + bool test_listener; +}; + +FIXTURE_VARIANT_ADD(scm_rights, dgram) +{ + .name = "UNIX ", + .type = SOCK_DGRAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = false, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = 0, + .test_listener = true, +}; + +FIXTURE_VARIANT_ADD(scm_rights, stream_listener_oob) +{ + .name = "UNIX-STREAM ", + .type = SOCK_STREAM, + .flags = MSG_OOB, + .test_listener = true, +}; + +static int count_sockets(struct __test_metadata *_metadata, + const FIXTURE_VARIANT(scm_rights) *variant) +{ + int sockets = -1, len, ret; + char *line = NULL; + size_t unused; + FILE *f; + + f = fopen("/proc/net/protocols", "r"); + ASSERT_NE(NULL, f); + + len = strlen(variant->name); + + while (getline(&line, &unused, f) != -1) { + int unused2; + + if (strncmp(line, variant->name, len)) + continue; + + ret = sscanf(line + len, "%d %d", &unused2, &sockets); + ASSERT_EQ(2, ret); + + break; + } + + free(line); + + ret = fclose(f); + ASSERT_EQ(0, ret); + + return sockets; +} + +FIXTURE_SETUP(scm_rights) +{ + int ret; + + ret = unshare(CLONE_NEWNET); + ASSERT_EQ(0, ret); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +FIXTURE_TEARDOWN(scm_rights) +{ + int ret; + + sleep(1); + + ret = count_sockets(_metadata, variant); + ASSERT_EQ(0, ret); +} + +static void create_listeners(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + struct sockaddr_un addr = { + .sun_family = AF_UNIX, + }; + socklen_t addrlen; + int i, ret; + + for (i = 0; i < n * 2; i += 2) { + self->fd[i] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i]); + + addrlen = sizeof(addr.sun_family); + ret = bind(self->fd[i], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + + ret = listen(self->fd[i], -1); + ASSERT_EQ(0, ret); + + addrlen = sizeof(addr); + ret = getsockname(self->fd[i], (struct sockaddr *)&addr, &addrlen); + ASSERT_EQ(0, ret); + + self->fd[i + 1] = socket(AF_UNIX, SOCK_STREAM, 0); + ASSERT_LE(0, self->fd[i + 1]); + + ret = connect(self->fd[i + 1], (struct sockaddr *)&addr, addrlen); + ASSERT_EQ(0, ret); + } +} + +static void create_socketpairs(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i += 2) { + ret = socketpair(AF_UNIX, variant->type, 0, self->fd + i); + ASSERT_EQ(0, ret); + } +} + +static void __create_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int n) +{ + if (variant->test_listener) + create_listeners(_metadata, self, n); + else + create_socketpairs(_metadata, self, variant, n); +} + +static void __close_sockets(struct __test_metadata *_metadata, + FIXTURE_DATA(scm_rights) *self, + int n) +{ + int i, ret; + + ASSERT_GE(sizeof(self->fd) / sizeof(int), n); + + for (i = 0; i < n * 2; i++) { + ret = close(self->fd[i]); + ASSERT_EQ(0, ret); + } +} + +void __send_fd(struct __test_metadata *_metadata, + const FIXTURE_DATA(scm_rights) *self, + const FIXTURE_VARIANT(scm_rights) *variant, + int inflight, int receiver) +{ +#define MSG "nop" +#define MSGLEN 3 + struct { + struct cmsghdr cmsghdr; + int fd[2]; + } cmsg = { + .cmsghdr = { + .cmsg_len = CMSG_LEN(sizeof(cmsg.fd)), + .cmsg_level = SOL_SOCKET, + .cmsg_type = SCM_RIGHTS, + }, + .fd = { + self->fd[inflight * 2], + self->fd[inflight * 2], + }, + }; + struct iovec iov = { + .iov_base = MSG, + .iov_len = MSGLEN, + }; + struct msghdr msg = { + .msg_name = NULL, + .msg_namelen = 0, + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &cmsg, + .msg_controllen = CMSG_SPACE(sizeof(cmsg.fd)), + }; + int ret; + + ret = sendmsg(self->fd[receiver * 2 + 1], &msg, variant->flags); + ASSERT_EQ(MSGLEN, ret); +} + +#define create_sockets(n) \ + __create_sockets(_metadata, self, variant, n) +#define close_sockets(n) \ + __close_sockets(_metadata, self, n) +#define send_fd(inflight, receiver) \ + __send_fd(_metadata, self, variant, inflight, receiver) + +TEST_F(scm_rights, self_ref) +{ + create_sockets(2); + + send_fd(0, 0); + + send_fd(1, 1); + + close_sockets(2); +} + +TEST_F(scm_rights, triangle) +{ + create_sockets(6); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + + send_fd(3, 4); + send_fd(4, 5); + send_fd(5, 3); + + close_sockets(6); +} + +TEST_F(scm_rights, cross_edge) +{ + create_sockets(8); + + send_fd(0, 1); + send_fd(1, 2); + send_fd(2, 0); + send_fd(1, 3); + send_fd(3, 2); + + send_fd(4, 5); + send_fd(5, 6); + send_fd(6, 4); + send_fd(5, 7); + send_fd(7, 6); + + close_sockets(8); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/amt.sh b/tools/testing/selftests/net/amt.sh index 75528788cb95..5175a42cbe8a 100755 --- a/tools/testing/selftests/net/amt.sh +++ b/tools/testing/selftests/net/amt.sh @@ -210,8 +210,8 @@ check_features() test_ipv4_forward() { - RESULT4=$(ip netns exec "${LISTENER}" nc -w 1 -l -u 239.0.0.1 4000) - if [ "$RESULT4" == "172.17.0.2" ]; then + RESULT4=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP4-LISTEN:4000,readbytes=128 || true) + if echo "$RESULT4" | grep -q "172.17.0.2"; then printf "TEST: %-60s [ OK ]\n" "IPv4 amt multicast forwarding" exit 0 else @@ -222,8 +222,8 @@ test_ipv4_forward() test_ipv6_forward() { - RESULT6=$(ip netns exec "${LISTENER}" nc -w 1 -l -u ff0e::5:6 6000) - if [ "$RESULT6" == "2001:db8:3::2" ]; then + RESULT6=$(ip netns exec "${LISTENER}" timeout 15 socat - UDP6-LISTEN:6000,readbytes=128 || true) + if echo "$RESULT6" | grep -q "2001:db8:3::2"; then printf "TEST: %-60s [ OK ]\n" "IPv6 amt multicast forwarding" exit 0 else @@ -236,14 +236,14 @@ send_mcast4() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & + 'printf "%s %128s" 172.17.0.2 | nc -w 1 -u 239.0.0.1 4000' & } send_mcast6() { sleep 2 ip netns exec "${SOURCE}" bash -c \ - 'echo 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & + 'printf "%s %128s" 2001:db8:3::2 | nc -w 1 -u ff0e::5:6 6000' & } check_features diff --git a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh index 4a110bb01e53..92eb880c52f2 100755 --- a/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh +++ b/tools/testing/selftests/net/arp_ndisc_evict_nocarrier.sh @@ -12,7 +12,8 @@ # {arp,ndisc}_evict_nocarrer=0 should still contain the single ARP/ND entry # -readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" +source lib.sh + readonly V4_ADDR0=10.0.10.1 readonly V4_ADDR1=10.0.10.2 readonly V6_ADDR0=2001:db8:91::1 @@ -22,43 +23,29 @@ ret=0 cleanup_v6() { - ip netns del me - ip netns del peer + cleanup_ns ${me} ${peer} sysctl -w net.ipv6.conf.veth1.ndisc_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv6.conf.all.ndisc_evict_nocarrier=1 >/dev/null 2>&1 } -create_ns() -{ - local n=${1} - - ip netns del ${n} 2>/dev/null - - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} link set lo up -} - - setup_v6() { - create_ns me - create_ns peer + setup_ns me peer - IP="ip -netns me" + IP="ip -netns ${me}" $IP li add veth1 type veth peer name veth2 $IP li set veth1 up $IP -6 addr add $V6_ADDR0/64 dev veth1 nodad - $IP li set veth2 netns peer up - ip -netns peer -6 addr add $V6_ADDR1/64 dev veth2 nodad + $IP li set veth2 netns ${peer} up + ip -netns ${peer} -6 addr add $V6_ADDR1/64 dev veth2 nodad - ip netns exec me sysctl -w $1 >/dev/null 2>&1 + ip netns exec ${me} sysctl -w $1 >/dev/null 2>&1 # Establish an ND cache entry - ip netns exec me ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 + ip netns exec ${me} ping -6 -c1 -Iveth1 $V6_ADDR1 >/dev/null 2>&1 # Should have the veth1 entry in ND table - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -ne 0 ]; then cleanup_v6 echo "failed" @@ -66,11 +53,11 @@ setup_v6() { fi # Set veth2 down, which will put veth1 in NOCARRIER state - ip netns exec peer ip link set veth2 down + ip netns exec ${peer} ip link set veth2 down } setup_v4() { - ip netns add "${PEER_NS}" + setup_ns PEER_NS ip link add name veth0 type veth peer name veth1 ip link set dev veth0 up ip link set dev veth1 netns "${PEER_NS}" @@ -99,8 +86,7 @@ setup_v4() { cleanup_v4() { ip neigh flush dev veth0 ip link del veth0 - local -r ns="$(ip netns list|grep $PEER_NS)" - [ -n "$ns" ] && ip netns del $ns 2>/dev/null + cleanup_ns $PEER_NS sysctl -w net.ipv4.conf.veth0.arp_evict_nocarrier=1 >/dev/null 2>&1 sysctl -w net.ipv4.conf.all.arp_evict_nocarrier=1 >/dev/null 2>&1 @@ -163,7 +149,7 @@ run_ndisc_evict_nocarrier_enabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=1" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "failed" @@ -180,7 +166,7 @@ run_ndisc_evict_nocarrier_disabled() { setup_v6 "net.ipv6.conf.veth1.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" @@ -197,7 +183,7 @@ run_ndisc_evict_nocarrier_disabled_all() { setup_v6 "net.ipv6.conf.all.ndisc_evict_nocarrier=0" - ip netns exec me ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 + ip netns exec ${me} ip -6 neigh get $V6_ADDR1 dev veth1 >/dev/null 2>&1 if [ $? -eq 0 ];then echo "ok" diff --git a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh index c899b446acb6..a40c0e9bd023 100755 --- a/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh +++ b/tools/testing/selftests/net/arp_ndisc_untracked_subnets.sh @@ -5,16 +5,14 @@ # garp to the router. Router accepts or ignores based on its arp_accept # or accept_untracked_na configuration. +source lib.sh + TESTS="arp ndisc" -ROUTER_NS="ns-router" -ROUTER_NS_V6="ns-router-v6" ROUTER_INTF="veth-router" ROUTER_ADDR="10.0.10.1" ROUTER_ADDR_V6="2001:db8:abcd:0012::1" -HOST_NS="ns-host" -HOST_NS_V6="ns-host-v6" HOST_INTF="veth-host" HOST_ADDR="10.0.10.2" HOST_ADDR_V6="2001:db8:abcd:0012::2" @@ -23,13 +21,11 @@ SUBNET_WIDTH=24 PREFIX_WIDTH_V6=64 cleanup() { - ip netns del ${HOST_NS} - ip netns del ${ROUTER_NS} + cleanup_ns ${HOST_NS} ${ROUTER_NS} } cleanup_v6() { - ip netns del ${HOST_NS_V6} - ip netns del ${ROUTER_NS_V6} + cleanup_ns ${HOST_NS_V6} ${ROUTER_NS_V6} } setup() { @@ -37,8 +33,7 @@ setup() { local arp_accept=$1 # Set up two namespaces - ip netns add ${ROUTER_NS} - ip netns add ${HOST_NS} + setup_ns HOST_NS ROUTER_NS # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. @@ -72,8 +67,7 @@ setup_v6() { local accept_untracked_na=$1 # Set up two namespaces - ip netns add ${ROUTER_NS_V6} - ip netns add ${HOST_NS_V6} + setup_ns HOST_NS_V6 ROUTER_NS_V6 # Set up interfaces veth0 and veth1, which are pairs in separate # namespaces. veth0 is veth-router, veth1 is veth-host. @@ -150,7 +144,7 @@ arp_test_gratuitous() { fi # Supply arp_accept option to set up which sets it in sysctl setup ${arp_accept} - ip netns exec ${HOST_NS} arping -A -U ${HOST_ADDR} -c1 2>&1 >/dev/null + ip netns exec ${HOST_NS} arping -A -I ${HOST_INTF} -U ${HOST_ADDR} -c1 2>&1 >/dev/null if verify_arp $1 $2; then printf " TEST: %-60s [ OK ]\n" "${test_msg[*]}" diff --git a/tools/testing/selftests/net/big_tcp.sh b/tools/testing/selftests/net/big_tcp.sh index cde9a91c4797..2db9d15cd45f 100755 --- a/tools/testing/selftests/net/big_tcp.sh +++ b/tools/testing/selftests/net/big_tcp.sh @@ -122,7 +122,9 @@ do_netperf() { local netns=$1 [ "$NF" = "6" ] && serip=$SERVER_IP6 - ip net exec $netns netperf -$NF -t TCP_STREAM -H $serip 2>&1 >/dev/null + + # use large write to be sure to generate big tcp packets + ip net exec $netns netperf -$NF -t TCP_STREAM -l 1 -H $serip -- -m 262144 2>&1 >/dev/null } do_test() { diff --git a/tools/testing/selftests/net/bind_wildcard.c b/tools/testing/selftests/net/bind_wildcard.c index a2662348cdb1..b7b54d646b93 100644 --- a/tools/testing/selftests/net/bind_wildcard.c +++ b/tools/testing/selftests/net/bind_wildcard.c @@ -6,7 +6,9 @@ #include "../kselftest_harness.h" -struct in6_addr in6addr_v4mapped_any = { +static const __u32 in4addr_any = INADDR_ANY; +static const __u32 in4addr_loopback = INADDR_LOOPBACK; +static const struct in6_addr in6addr_v4mapped_any = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -14,8 +16,7 @@ struct in6_addr in6addr_v4mapped_any = { 0, 0, 0, 0 } }; - -struct in6_addr in6addr_v4mapped_loopback = { +static const struct in6_addr in6addr_v4mapped_loopback = { .s6_addr = { 0, 0, 0, 0, 0, 0, 0, 0, @@ -24,137 +25,785 @@ struct in6_addr in6addr_v4mapped_loopback = { } }; +#define NR_SOCKETS 8 + FIXTURE(bind_wildcard) { - struct sockaddr_in addr4; - struct sockaddr_in6 addr6; + int fd[NR_SOCKETS]; + socklen_t addrlen[NR_SOCKETS]; + union { + struct sockaddr addr; + struct sockaddr_in addr4; + struct sockaddr_in6 addr6; + } addr[NR_SOCKETS]; }; FIXTURE_VARIANT(bind_wildcard) { - const __u32 addr4_const; - const struct in6_addr *addr6_const; - int expected_errno; + sa_family_t family[2]; + const void *addr[2]; + bool ipv6_only[2]; + + /* 6 bind() calls below follow two bind() for the defined 2 addresses: + * + * 0.0.0.0 + * 127.0.0.1 + * :: + * ::1 + * ::ffff:0.0.0.0 + * ::ffff:127.0.0.1 + */ + int expected_errno[NR_SOCKETS]; + int expected_reuse_errno[NR_SOCKETS]; +}; + +/* (IPv4, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v4_local) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v4_any) +{ + .family = {AF_INET, AF_INET}, + .addr = {&in4addr_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; +/* (IPv4, IPv6) */ FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_any) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_any_v6_v4mapped_local) { - .addr4_const = INADDR_ANY, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_any_only) +{ + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_loopback, - .expected_errno = 0, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_any) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_any, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, }; FIXTURE_VARIANT_ADD(bind_wildcard, v4_local_v6_v4mapped_local) { - .addr4_const = INADDR_LOOPBACK, - .addr6_const = &in6addr_v4mapped_loopback, - .expected_errno = EADDRINUSE, + .family = {AF_INET, AF_INET6}, + .addr = {&in4addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv4) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, }; +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_any, &in4addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_loopback, &in4addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_any, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_any) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_local_v4_local) +{ + .family = {AF_INET6, AF_INET}, + .addr = {&in6addr_v4mapped_loopback, &in4addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +/* (IPv6, IPv6) */ +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_any}, + .ipv6_only = {true, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_any}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_any_only_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_any, &in6addr_v4mapped_loopback}, + .ipv6_only = {true, false}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, EADDRINUSE, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + 0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_local_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_loopback, &in6addr_v4mapped_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_any_v6_v4mapped_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_any, &in6addr_v4mapped_loopback}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_any_only) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_any}, + .ipv6_only = {false, true}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_local) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_loopback}, + .expected_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, EADDRINUSE}, +}; + +FIXTURE_VARIANT_ADD(bind_wildcard, v6_v4mapped_loopback_v6_v4mapped_any) +{ + .family = {AF_INET6, AF_INET6}, + .addr = {&in6addr_v4mapped_loopback, &in6addr_v4mapped_any}, + .expected_errno = {0, EADDRINUSE, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, + .expected_reuse_errno = {0, 0, + EADDRINUSE, EADDRINUSE, + EADDRINUSE, 0, + EADDRINUSE, EADDRINUSE}, +}; + +static void setup_addr(FIXTURE_DATA(bind_wildcard) *self, int i, + int family, const void *addr_const) +{ + if (family == AF_INET) { + struct sockaddr_in *addr4 = &self->addr[i].addr4; + const __u32 *addr4_const = addr_const; + + addr4->sin_family = AF_INET; + addr4->sin_port = htons(0); + addr4->sin_addr.s_addr = htonl(*addr4_const); + + self->addrlen[i] = sizeof(struct sockaddr_in); + } else { + struct sockaddr_in6 *addr6 = &self->addr[i].addr6; + const struct in6_addr *addr6_const = addr_const; + + addr6->sin6_family = AF_INET6; + addr6->sin6_port = htons(0); + addr6->sin6_addr = *addr6_const; + + self->addrlen[i] = sizeof(struct sockaddr_in6); + } +} + FIXTURE_SETUP(bind_wildcard) { - self->addr4.sin_family = AF_INET; - self->addr4.sin_port = htons(0); - self->addr4.sin_addr.s_addr = htonl(variant->addr4_const); + setup_addr(self, 0, variant->family[0], variant->addr[0]); + setup_addr(self, 1, variant->family[1], variant->addr[1]); + + setup_addr(self, 2, AF_INET, &in4addr_any); + setup_addr(self, 3, AF_INET, &in4addr_loopback); - self->addr6.sin6_family = AF_INET6; - self->addr6.sin6_port = htons(0); - self->addr6.sin6_addr = *variant->addr6_const; + setup_addr(self, 4, AF_INET6, &in6addr_any); + setup_addr(self, 5, AF_INET6, &in6addr_loopback); + setup_addr(self, 6, AF_INET6, &in6addr_v4mapped_any); + setup_addr(self, 7, AF_INET6, &in6addr_v4mapped_loopback); } FIXTURE_TEARDOWN(bind_wildcard) { + int i; + + for (i = 0; i < NR_SOCKETS; i++) + close(self->fd[i]); } -void bind_sockets(struct __test_metadata *_metadata, - FIXTURE_DATA(bind_wildcard) *self, - int expected_errno, - struct sockaddr *addr1, socklen_t addrlen1, - struct sockaddr *addr2, socklen_t addrlen2) +void bind_socket(struct __test_metadata *_metadata, + FIXTURE_DATA(bind_wildcard) *self, + const FIXTURE_VARIANT(bind_wildcard) *variant, + int i, int reuse) { - int fd[2]; int ret; - fd[0] = socket(addr1->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[0], 0); + self->fd[i] = socket(self->addr[i].addr.sa_family, SOCK_STREAM, 0); + ASSERT_GT(self->fd[i], 0); - ret = bind(fd[0], addr1, addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && variant->ipv6_only[i]) { + ret = setsockopt(self->fd[i], SOL_IPV6, IPV6_V6ONLY, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ret = getsockname(fd[0], addr1, &addrlen1); - ASSERT_EQ(ret, 0); + if (i < 2 && reuse) { + ret = setsockopt(self->fd[i], SOL_SOCKET, reuse, &(int){1}, sizeof(int)); + ASSERT_EQ(ret, 0); + } - ((struct sockaddr_in *)addr2)->sin_port = ((struct sockaddr_in *)addr1)->sin_port; + self->addr[i].addr4.sin_port = self->addr[0].addr4.sin_port; - fd[1] = socket(addr2->sa_family, SOCK_STREAM, 0); - ASSERT_GT(fd[1], 0); + ret = bind(self->fd[i], &self->addr[i].addr, self->addrlen[i]); - ret = bind(fd[1], addr2, addrlen2); - if (expected_errno) { - ASSERT_EQ(ret, -1); - ASSERT_EQ(errno, expected_errno); + if (reuse) { + if (variant->expected_reuse_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_reuse_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } } else { + if (variant->expected_errno[i]) { + ASSERT_EQ(ret, -1); + ASSERT_EQ(errno, variant->expected_errno[i]); + } else { + ASSERT_EQ(ret, 0); + } + } + + if (i == 0) { + ret = getsockname(self->fd[0], &self->addr[0].addr, &self->addrlen[0]); ASSERT_EQ(ret, 0); } +} - close(fd[1]); - close(fd[0]); +TEST_F(bind_wildcard, plain) +{ + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, 0); } -TEST_F(bind_wildcard, v4_v6) +TEST_F(bind_wildcard, reuseaddr) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr4, sizeof(self->addr4), - (struct sockaddr *)&self->addr6, sizeof(self->addr6)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEADDR); } -TEST_F(bind_wildcard, v6_v4) +TEST_F(bind_wildcard, reuseport) { - bind_sockets(_metadata, self, variant->expected_errno, - (struct sockaddr *)&self->addr6, sizeof(self->addr6), - (struct sockaddr *)&self->addr4, sizeof(self->addr4)); + int i; + + for (i = 0; i < NR_SOCKETS; i++) + bind_socket(_metadata, self, variant, i, SO_REUSEPORT); } TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/bpf.mk b/tools/testing/selftests/net/bpf.mk new file mode 100644 index 000000000000..a4f6755dd894 --- /dev/null +++ b/tools/testing/selftests/net/bpf.mk @@ -0,0 +1,53 @@ +# SPDX-License-Identifier: GPL-2.0 +# Rules to generate bpf objs +CLANG ?= clang +SCRATCH_DIR := $(OUTPUT)/tools +BUILD_DIR := $(SCRATCH_DIR)/build +BPFDIR := $(top_srcdir)/tools/lib/bpf +APIDIR := $(top_srcdir)/tools/include/uapi + +CCINCLUDE += -I$(selfdir)/bpf +CCINCLUDE += -I$(top_srcdir)/usr/include/ +CCINCLUDE += -I$(SCRATCH_DIR)/include + +BPFOBJ := $(BUILD_DIR)/libbpf/libbpf.a + +MAKE_DIRS := $(BUILD_DIR)/libbpf +$(MAKE_DIRS): + $(call msg,MKDIR,,$@) + $(Q)mkdir -p $@ + +# Get Clang's default includes on this system, as opposed to those seen by +# '--target=bpf'. This fixes "missing" files on some architectures/distros, +# such as asm/byteorder.h, asm/socket.h, asm/sockios.h, sys/cdefs.h etc. +# +# Use '-idirafter': Don't interfere with include mechanics except where the +# build would have failed anyways. +define get_sys_includes +$(shell $(1) $(2) -v -E - </dev/null 2>&1 \ + | sed -n '/<...> search starts here:/,/End of search list./{ s| \(/.*\)|-idirafter \1|p }') \ +$(shell $(1) $(2) -dM -E - </dev/null | grep '__riscv_xlen ' | awk '{printf("-D__riscv_xlen=%d -D__BITS_PER_LONG=%d", $$3, $$3)}') +endef + +ifneq ($(CROSS_COMPILE),) +CLANG_TARGET_ARCH = --target=$(notdir $(CROSS_COMPILE:%-=%)) +endif + +CLANG_SYS_INCLUDES = $(call get_sys_includes,$(CLANG),$(CLANG_TARGET_ARCH)) + +BPF_PROG_OBJS := $(patsubst %.c,$(OUTPUT)/%.o,$(wildcard *.bpf.c)) + +$(BPF_PROG_OBJS): $(OUTPUT)/%.o : %.c $(BPFOBJ) | $(MAKE_DIRS) + $(call msg,BPF_PROG,,$@) + $(Q)$(CLANG) -O2 -g --target=bpf $(CCINCLUDE) $(CLANG_SYS_INCLUDES) \ + -c $< -o $@ + +$(BPFOBJ): $(wildcard $(BPFDIR)/*.[ch] $(BPFDIR)/Makefile) \ + $(APIDIR)/linux/bpf.h \ + | $(BUILD_DIR)/libbpf + $(call msg,MAKE,,$@) + $(Q)$(MAKE) $(submake_extras) -C $(BPFDIR) OUTPUT=$(BUILD_DIR)/libbpf/ \ + EXTRA_CFLAGS='-g -O0' \ + DESTDIR=$(SCRATCH_DIR) prefix= all install_headers + +EXTRA_CLEAN += $(SCRATCH_DIR) diff --git a/tools/testing/selftests/net/bpf_offload.py b/tools/testing/selftests/net/bpf_offload.py new file mode 100755 index 000000000000..3efe44f6e92a --- /dev/null +++ b/tools/testing/selftests/net/bpf_offload.py @@ -0,0 +1,1341 @@ +#!/usr/bin/env python3 + +# Copyright (C) 2017 Netronome Systems, Inc. +# Copyright (c) 2019 Mellanox Technologies. All rights reserved +# +# This software is licensed under the GNU General License Version 2, +# June 1991 as shown in the file COPYING in the top-level directory of this +# source tree. +# +# THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS +# FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE +# OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME +# THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +from datetime import datetime +import argparse +import errno +import json +import os +import pprint +import random +import re +import stat +import string +import struct +import subprocess +import time +import traceback + +from lib.py import NetdevSim, NetdevSimDev + + +logfile = None +log_level = 1 +skip_extack = False +bpf_test_dir = os.path.dirname(os.path.realpath(__file__)) +pp = pprint.PrettyPrinter() +devs = [] # devices we created for clean up +files = [] # files to be removed +netns = [] # net namespaces to be removed + +def log_get_sec(level=0): + return "*" * (log_level + level) + +def log_level_inc(add=1): + global log_level + log_level += add + +def log_level_dec(sub=1): + global log_level + log_level -= sub + +def log_level_set(level): + global log_level + log_level = level + +def log(header, data, level=None): + """ + Output to an optional log. + """ + if logfile is None: + return + if level is not None: + log_level_set(level) + + if not isinstance(data, str): + data = pp.pformat(data) + + if len(header): + logfile.write("\n" + log_get_sec() + " ") + logfile.write(header) + if len(header) and len(data.strip()): + logfile.write("\n") + logfile.write(data) + +def skip(cond, msg): + if not cond: + return + print("SKIP: " + msg) + log("SKIP: " + msg, "", level=1) + os.sys.exit(0) + +def fail(cond, msg): + if not cond: + return + print("FAIL: " + msg) + tb = "".join(traceback.extract_stack().format()) + print(tb) + log("FAIL: " + msg, tb, level=1) + os.sys.exit(1) + +def start_test(msg): + log(msg, "", level=1) + log_level_inc() + print(msg) + +def cmd(cmd, shell=True, include_stderr=False, background=False, fail=True): + """ + Run a command in subprocess and return tuple of (retval, stdout); + optionally return stderr as well as third value. + """ + proc = subprocess.Popen(cmd, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if background: + msg = "%s START: %s" % (log_get_sec(1), + datetime.now().strftime("%H:%M:%S.%f")) + log("BKG " + proc.args, msg) + return proc + + return cmd_result(proc, include_stderr=include_stderr, fail=fail) + +def cmd_result(proc, include_stderr=False, fail=False): + stdout, stderr = proc.communicate() + stdout = stdout.decode("utf-8") + stderr = stderr.decode("utf-8") + proc.stdout.close() + proc.stderr.close() + + stderr = "\n" + stderr + if stderr[-1] == "\n": + stderr = stderr[:-1] + + sec = log_get_sec(1) + log("CMD " + proc.args, + "RETCODE: %d\n%s STDOUT:\n%s%s STDERR:%s\n%s END: %s" % + (proc.returncode, sec, stdout, sec, stderr, + sec, datetime.now().strftime("%H:%M:%S.%f"))) + + if proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\n%s" % (proc.args, stderr)) + + if include_stderr: + return proc.returncode, stdout, stderr + else: + return proc.returncode, stdout + +def rm(f): + cmd("rm -f %s" % (f)) + if f in files: + files.remove(f) + +def tool(name, args, flags, JSON=True, ns="", fail=True, include_stderr=False): + params = "" + if JSON: + params += "%s " % (flags["json"]) + + if ns: + ns = "ip netns exec %s " % (ns) + elif ns is None: + ns = "" + + if include_stderr: + ret, stdout, stderr = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=True) + else: + ret, stdout = cmd(ns + name + " " + params + args, + fail=fail, include_stderr=False) + + if JSON and len(stdout.strip()) != 0: + out = json.loads(stdout) + else: + out = stdout + + if include_stderr: + return ret, out, stderr + else: + return ret, out + +def bpftool(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("bpftool", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def bpftool_prog_list(expected=None, ns="", exclude_orphaned=True): + _, progs = bpftool("prog show", JSON=True, ns=ns, fail=True) + # Remove the base progs + for p in base_progs: + if p in progs: + progs.remove(p) + if exclude_orphaned: + progs = [ p for p in progs if not p['orphaned'] ] + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs loaded, expected %d" % + (len(progs), expected)) + return progs + +def bpftool_map_list(expected=None, ns=""): + _, maps = bpftool("map show", JSON=True, ns=ns, fail=True) + # Remove the base maps + maps = [m for m in maps if m not in base_maps and m.get('name') and m.get('name') not in base_map_names] + if expected is not None: + if len(maps) != expected: + fail(True, "%d BPF maps loaded, expected %d" % + (len(maps), expected)) + return maps + +def bpftool_prog_list_wait(expected=0, n_retry=20): + for i in range(n_retry): + nprogs = len(bpftool_prog_list()) + if nprogs == expected: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d, have %d" % (expected, nprogs)) + +def bpftool_map_list_wait(expected=0, n_retry=20, ns=""): + for i in range(n_retry): + maps = bpftool_map_list(ns=ns) + if len(maps) == expected: + return maps + time.sleep(0.05) + raise Exception("Time out waiting for map counts to stabilize want %d, have %d" % (expected, nmaps)) + +def bpftool_prog_load(sample, file_name, maps=[], prog_type="xdp", dev=None, + fail=True, include_stderr=False): + args = "prog load %s %s" % (os.path.join(bpf_test_dir, sample), file_name) + if prog_type is not None: + args += " type " + prog_type + if dev is not None: + args += " dev " + dev + if len(maps): + args += " map " + " map ".join(maps) + + res = bpftool(args, fail=fail, include_stderr=include_stderr) + if res[0] == 0: + files.append(file_name) + return res + +def ip(args, force=False, JSON=True, ns="", fail=True, include_stderr=False): + if force: + args = "-force " + args + return tool("ip", args, {"json":"-j"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def tc(args, JSON=True, ns="", fail=True, include_stderr=False): + return tool("tc", args, {"json":"-p"}, JSON=JSON, ns=ns, + fail=fail, include_stderr=include_stderr) + +def ethtool(dev, opt, args, fail=True): + return cmd("ethtool %s %s %s" % (opt, dev["ifname"], args), fail=fail) + +def bpf_obj(name, sec="xdp", path=bpf_test_dir,): + return "obj %s sec %s" % (os.path.join(path, name), sec) + +def bpf_pinned(name): + return "pinned %s" % (name) + +def bpf_bytecode(bytecode): + return "bytecode \"%s\"" % (bytecode) + +def mknetns(n_retry=10): + for i in range(n_retry): + name = ''.join([random.choice(string.ascii_letters) for i in range(8)]) + ret, _ = ip("netns add %s" % (name), fail=False) + if ret == 0: + netns.append(name) + return name + return None + +def int2str(fmt, val): + ret = [] + for b in struct.pack(fmt, val): + ret.append(int(b)) + return " ".join(map(lambda x: str(x), ret)) + +def str2int(strtab): + inttab = [] + for i in strtab: + inttab.append(int(i, 16)) + ba = bytearray(inttab) + if len(strtab) == 4: + fmt = "I" + elif len(strtab) == 8: + fmt = "Q" + else: + raise Exception("String array of len %d can't be unpacked to an int" % + (len(strtab))) + return struct.unpack(fmt, ba)[0] + +class DebugfsDir: + """ + Class for accessing DebugFS directories as a dictionary. + """ + + def __init__(self, path): + self.path = path + self._dict = self._debugfs_dir_read(path) + + def __len__(self): + return len(self._dict.keys()) + + def __getitem__(self, key): + if type(key) is int: + key = list(self._dict.keys())[key] + return self._dict[key] + + def __setitem__(self, key, value): + log("DebugFS set %s = %s" % (key, value), "") + log_level_inc() + + cmd("echo '%s' > %s/%s" % (value, self.path, key)) + log_level_dec() + + _, out = cmd('cat %s/%s' % (self.path, key)) + self._dict[key] = out.strip() + + def _debugfs_dir_read(self, path): + dfs = {} + + log("DebugFS state for %s" % (path), "") + log_level_inc(add=2) + + _, out = cmd('ls ' + path) + for f in out.split(): + if f == "ports": + continue + + p = os.path.join(path, f) + if not os.stat(p).st_mode & stat.S_IRUSR: + continue + + if os.path.isfile(p): + # We need to init trap_flow_action_cookie before read it + if f == "trap_flow_action_cookie": + cmd('echo deadbeef > %s/%s' % (path, f)) + _, out = cmd('cat %s/%s' % (path, f)) + dfs[f] = out.strip() + elif os.path.isdir(p): + dfs[f] = DebugfsDir(p) + else: + raise Exception("%s is neither file nor directory" % (p)) + + log_level_dec() + log("DebugFS state", dfs) + log_level_dec() + + return dfs + +class BpfNetdevSimDev(NetdevSimDev): + """ + Class for netdevsim bus device and its attributes. + """ + def __init__(self, port_count=1, ns=None): + super().__init__(port_count, ns=ns) + devs.append(self) + + def _make_port(self, port_index, ifname): + return BpfNetdevSim(self, port_index, ifname, self.ns) + + def dfs_num_bound_progs(self): + path = os.path.join(self.dfs_dir, "bpf_bound_progs") + _, progs = cmd('ls %s' % (path)) + return len(progs.split()) + + def dfs_get_bound_progs(self, expected): + progs = DebugfsDir(os.path.join(self.dfs_dir, "bpf_bound_progs")) + if expected is not None: + if len(progs) != expected: + fail(True, "%d BPF programs bound, expected %d" % + (len(progs), expected)) + return progs + + def remove(self): + super().remove() + devs.remove(self) + + +class BpfNetdevSim(NetdevSim): + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname, ns=None): + super().__init__(nsimdev, port_index, ifname, ns=ns) + + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + self.dfs_refresh() + + def __getitem__(self, key): + return self.dev[key] + + def remove(self): + self.nsimdev.remove_nsim(self) + + def dfs_refresh(self): + self.dfs = DebugfsDir(self.dfs_dir) + return self.dfs + + def dfs_read(self, f): + path = os.path.join(self.dfs_dir, f) + _, data = cmd('cat %s' % (path)) + return data.strip() + + def wait_for_flush(self, bound=0, total=0, n_retry=20): + for i in range(n_retry): + nbound = self.nsimdev.dfs_num_bound_progs() + nprogs = len(bpftool_prog_list()) + if nbound == bound and nprogs == total: + return + time.sleep(0.05) + raise Exception("Time out waiting for program counts to stabilize want %d/%d, have %d bound, %d loaded" % (bound, total, nbound, nprogs)) + + def set_ns(self, ns): + name = ns if ns else "1" + ip("link set dev %s netns %s" % (self.dev["ifname"], name), ns=self.ns) + self.ns = ns + + def set_mtu(self, mtu, fail=True): + return ip("link set dev %s mtu %d" % (self.dev["ifname"], mtu), + fail=fail) + + def set_xdp(self, bpf, mode, force=False, JSON=True, verbose=False, + fail=True, include_stderr=False): + if verbose: + bpf += " verbose" + return ip("link set dev %s xdp%s %s" % (self.dev["ifname"], mode, bpf), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def unset_xdp(self, mode, force=False, JSON=True, + fail=True, include_stderr=False): + return ip("link set dev %s xdp%s off" % (self.dev["ifname"], mode), + force=force, JSON=JSON, + fail=fail, include_stderr=include_stderr) + + def ip_link_show(self, xdp): + _, link = ip("link show dev %s" % (self['ifname'])) + if len(link) > 1: + raise Exception("Multiple objects on ip link show") + if len(link) < 1: + return {} + fail(xdp != "xdp" in link, + "XDP program not reporting in iplink (reported %s, expected %s)" % + ("xdp" in link, xdp)) + return link[0] + + def tc_add_ingress(self): + tc("qdisc add dev %s ingress" % (self['ifname'])) + + def tc_del_ingress(self): + tc("qdisc del dev %s ingress" % (self['ifname'])) + + def tc_flush_filters(self, bound=0, total=0): + self.tc_del_ingress() + self.tc_add_ingress() + self.wait_for_flush(bound=bound, total=total) + + def tc_show_ingress(self, expected=None): + # No JSON support, oh well... + flags = ["skip_sw", "skip_hw", "in_hw"] + named = ["protocol", "pref", "chain", "handle", "id", "tag"] + + args = "-s filter show dev %s ingress" % (self['ifname']) + _, out = tc(args, JSON=False) + + filters = [] + lines = out.split('\n') + for line in lines: + words = line.split() + if "handle" not in words: + continue + fltr = {} + for flag in flags: + fltr[flag] = flag in words + for name in named: + try: + idx = words.index(name) + fltr[name] = words[idx + 1] + except ValueError: + pass + filters.append(fltr) + + if expected is not None: + fail(len(filters) != expected, + "%d ingress filters loaded, expected %d" % + (len(filters), expected)) + return filters + + def cls_filter_op(self, op, qdisc="ingress", prio=None, handle=None, + chain=None, cls="", params="", + fail=True, include_stderr=False): + spec = "" + if prio is not None: + spec += " prio %d" % (prio) + if handle: + spec += " handle %s" % (handle) + if chain is not None: + spec += " chain %d" % (chain) + + return tc("filter {op} dev {dev} {qdisc} {spec} {cls} {params}"\ + .format(op=op, dev=self['ifname'], qdisc=qdisc, spec=spec, + cls=cls, params=params), + fail=fail, include_stderr=include_stderr) + + def cls_bpf_add_filter(self, bpf, op="add", prio=None, handle=None, + chain=None, da=False, verbose=False, + skip_sw=False, skip_hw=False, + fail=True, include_stderr=False): + cls = "bpf " + bpf + + params = "" + if da: + params += " da" + if verbose: + params += " verbose" + if skip_sw: + params += " skip_sw" + if skip_hw: + params += " skip_hw" + + return self.cls_filter_op(op=op, prio=prio, handle=handle, cls=cls, + chain=chain, params=params, + fail=fail, include_stderr=include_stderr) + + def set_ethtool_tc_offloads(self, enable, fail=True): + args = "hw-tc-offload %s" % ("on" if enable else "off") + return ethtool(self, "-K", args, fail=fail) + +################################################################################ +def clean_up(): + global files, netns, devs + + for dev in devs: + dev.remove() + for f in files: + cmd("rm -f %s" % (f)) + for ns in netns: + cmd("ip netns delete %s" % (ns)) + files = [] + netns = [] + +def pin_prog(file_name, idx=0): + progs = bpftool_prog_list(expected=(idx + 1)) + prog = progs[idx] + bpftool("prog pin id %d %s" % (prog["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def pin_map(file_name, idx=0, expected=1): + maps = bpftool_map_list_wait(expected=expected) + m = maps[idx] + bpftool("map pin id %d %s" % (m["id"], file_name)) + files.append(file_name) + + return file_name, bpf_pinned(file_name) + +def check_dev_info_removed(prog_file=None, map_file=None): + bpftool_prog_list(expected=0) + bpftool_prog_list(expected=1, exclude_orphaned=False) + ret, err = bpftool("prog show pin %s" % (prog_file), fail=False) + fail(ret != 0, "failed to show prog with removed device") + + bpftool_map_list_wait(expected=0) + ret, err = bpftool("map show pin %s" % (map_file), fail=False) + fail(ret == 0, "Showing map with removed device did not fail") + fail(err["error"].find("No such device") == -1, + "Showing map with removed device expected ENODEV, error is %s" % + (err["error"])) + +def check_dev_info(other_ns, ns, prog_file=None, map_file=None, removed=False): + progs = bpftool_prog_list(expected=1, ns=ns) + prog = progs[0] + + fail("dev" not in prog.keys(), "Device parameters not reported") + dev = prog["dev"] + fail("ifindex" not in dev.keys(), "Device parameters not reported") + fail("ns_dev" not in dev.keys(), "Device parameters not reported") + fail("ns_inode" not in dev.keys(), "Device parameters not reported") + + if not other_ns: + fail("ifname" not in dev.keys(), "Ifname not reported") + fail(dev["ifname"] != sim["ifname"], + "Ifname incorrect %s vs %s" % (dev["ifname"], sim["ifname"])) + else: + fail("ifname" in dev.keys(), "Ifname is reported for other ns") + + maps = bpftool_map_list_wait(expected=2, ns=ns) + for m in maps: + fail("dev" not in m.keys(), "Device parameters not reported") + fail(dev != m["dev"], "Map's device different than program's") + +def check_extack(output, reference, args): + if skip_extack: + return + lines = output.split("\n") + comp = len(lines) >= 2 and lines[1] == 'Error: ' + reference + fail(not comp, "Missing or incorrect netlink extack message") + +def check_extack_nsim(output, reference, args): + check_extack(output, "netdevsim: " + reference, args) + +def check_no_extack(res, needle): + fail((res[1] + res[2]).count(needle) or (res[1] + res[2]).count("Warning:"), + "Found '%s' in command output, leaky extack?" % (needle)) + +def check_verifier_log(output, reference): + lines = output.split("\n") + for l in reversed(lines): + if l == reference: + return + fail(True, "Missing or incorrect message from netdevsim in verifier log") + +def check_multi_basic(two_xdps): + fail(two_xdps["mode"] != 4, "Bad mode reported with multiple programs") + fail("prog" in two_xdps, "Base program reported in multi program mode") + fail(len(two_xdps["attached"]) != 2, + "Wrong attached program count with two programs") + fail(two_xdps["attached"][0]["prog"]["id"] == + two_xdps["attached"][1]["prog"]["id"], + "Offloaded and other programs have the same id") + +def test_spurios_extack(sim, obj, skip_hw, needle): + res = sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=skip_hw, + include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_hw=skip_hw, include_stderr=True) + check_no_extack(res, needle) + res = sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf", + include_stderr=True) + check_no_extack(res, needle) + +def test_multi_prog(simdev, sim, obj, modename, modeid): + start_test("Test multi-attachment XDP - %s + offload..." % + (modename or "default", )) + sim.set_xdp(obj, "offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + fail("prog" not in xdp, "Base program not reported in single program mode") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with one program") + + sim.set_xdp(obj, modename) + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + check_multi_basic(two_xdps) + + offloaded2 = sim.dfs_read("bpf_offloaded_id") + fail(offloaded != offloaded2, + "Offload ID changed after loading other program") + + start_test("Test multi-attachment XDP - replace...") + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Replaced one of programs without -force") + check_extack(err, "XDP program already attached.", args) + + start_test("Test multi-attachment XDP - remove without mode...") + ret, _, err = sim.unset_xdp("", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Removed program without a mode flag") + check_extack(err, "More than one program loaded, unset mode is ambiguous.", args) + + sim.unset_xdp("offload") + xdp = sim.ip_link_show(xdp=True)["xdp"] + offloaded = sim.dfs_read("bpf_offloaded_id") + + fail(xdp["mode"] != modeid, "Bad mode reported after multiple programs") + fail("prog" not in xdp, + "Base program not reported after multi program mode") + fail(xdp["attached"][0] not in two_xdps["attached"], + "Offload program not reported after other activated") + fail(len(xdp["attached"]) != 1, + "Wrong attached program count with remaining programs") + fail(offloaded != "0", "Offload ID reported with only other program left") + + start_test("Test multi-attachment XDP - reattach...") + sim.set_xdp(obj, "offload") + two_xdps = sim.ip_link_show(xdp=True)["xdp"] + + fail(xdp["attached"][0] not in two_xdps["attached"], + "Other program not reported after offload activated") + check_multi_basic(two_xdps) + + start_test("Test multi-attachment XDP - device remove...") + simdev.remove() + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + return [simdev, sim] + +# Parse command line +parser = argparse.ArgumentParser() +parser.add_argument("--log", help="output verbose log to given file") +args = parser.parse_args() +if args.log: + logfile = open(args.log, 'w+') + logfile.write("# -*-Org-*-") + +log("Prepare...", "", level=1) +log_level_inc() + +# Check permissions +skip(os.getuid() != 0, "test must be run as root") + +# Check tools +ret, progs = bpftool("prog", fail=False) +skip(ret != 0, "bpftool not installed") +base_progs = progs +_, base_maps = bpftool("map") +base_map_names = [ + 'pid_iter.rodata', # created on each bpftool invocation + 'libbpf_det_bind', # created on each bpftool invocation +] + +# Check netdevsim +if not os.path.isdir("/sys/bus/netdevsim/"): + ret, out = cmd("modprobe netdevsim", fail=False) + skip(ret != 0, "netdevsim module could not be loaded") + +# Check debugfs +_, out = cmd("mount") +if out.find("/sys/kernel/debug type debugfs") == -1: + cmd("mount -t debugfs none /sys/kernel/debug") + +# Check samples are compiled +samples = ["sample_ret0.bpf.o", "sample_map_ret0.bpf.o"] +for s in samples: + ret, out = cmd("ls %s/%s" % (bpf_test_dir, s), fail=False) + skip(ret != 0, "sample %s/%s not found, please compile it" % + (bpf_test_dir, s)) + +# Check if iproute2 is built with libmnl (needed by extack support) +_, _, err = cmd("tc qdisc delete dev lo handle 0", + fail=False, include_stderr=True) +if err.find("Error: Failed to find qdisc with specified handle.") == -1: + print("Warning: no extack message in iproute2 output, libmnl missing?") + log("Warning: no extack message in iproute2 output, libmnl missing?", "") + skip_extack = True + +# Check if net namespaces seem to work +ns = mknetns() +skip(ns is None, "Could not create a net namespace") +cmd("ip netns delete %s" % (ns)) +netns = [] + +try: + obj = bpf_obj("sample_ret0.bpf.o") + bytecode = bpf_bytecode("1,6 0 0 4294967295,") + + start_test("Test destruction of generic XDP...") + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_xdp(obj, "generic") + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.tc_add_ingress() + + start_test("Test TC non-offloaded...") + ret, _ = sim.cls_bpf_add_filter(obj, skip_hw=True, fail=False) + fail(ret != 0, "Software TC filter did not load") + + start_test("Test TC non-offloaded isn't getting bound...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + + sim.tc_flush_filters() + + start_test("Test TC offloads are off by default...") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter loaded without enabling TC offloads") + check_extack(err, "TC offload is disabled on net device.", args) + sim.wait_for_flush() + + sim.set_ethtool_tc_offloads(True) + sim.dfs["bpf_tc_non_bound_accept"] = "Y" + + start_test("Test TC offload by default...") + ret, _ = sim.cls_bpf_add_filter(obj, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Filter not offloaded by default") + + sim.tc_flush_filters() + + start_test("Test TC cBPF bytcode tries offload by default...") + ret, _ = sim.cls_bpf_add_filter(bytecode, fail=False) + fail(ret != 0, "Software TC filter did not load") + simdev.dfs_get_bound_progs(expected=0) + ingress = sim.tc_show_ingress(expected=1) + fltr = ingress[0] + fail(not fltr["in_hw"], "Bytecode not offloaded by default") + + sim.tc_flush_filters() + sim.dfs["bpf_tc_non_bound_accept"] = "N" + + start_test("Test TC cBPF unbound bytecode doesn't offload...") + ret, _, err = sim.cls_bpf_add_filter(bytecode, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC bytecode loaded for offload") + check_extack_nsim(err, "netdevsim configured to reject unbound programs.", + args) + sim.wait_for_flush() + + start_test("Test non-0 chain offload...") + ret, _, err = sim.cls_bpf_add_filter(obj, chain=1, prio=1, handle=1, + skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Offloaded a filter to chain other than 0") + check_extack(err, "Driver supports only offload of chain 0.", args) + sim.tc_flush_filters() + + start_test("Test TC replace...") + sim.cls_bpf_add_filter(obj, prio=1, handle=1) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_sw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_sw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + sim.cls_bpf_add_filter(obj, prio=1, handle=1, skip_hw=True) + sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, skip_hw=True) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test TC replace bad flags...") + for i in range(3): + for j in range(3): + ret, _ = sim.cls_bpf_add_filter(obj, op="replace", prio=1, handle=1, + skip_sw=(j == 1), skip_hw=(j == 2), + fail=False) + fail(bool(ret) != bool(j), + "Software TC incorrect load in replace test, iteration %d" % + (j)) + sim.cls_filter_op(op="delete", prio=1, handle=1, cls="bpf") + + start_test("Test spurious extack from the driver...") + test_spurios_extack(sim, obj, False, "netdevsim") + test_spurios_extack(sim, obj, True, "netdevsim") + + sim.set_ethtool_tc_offloads(False) + + test_spurios_extack(sim, obj, False, "TC offload is disabled") + test_spurios_extack(sim, obj, True, "TC offload is disabled") + + sim.set_ethtool_tc_offloads(True) + + sim.tc_flush_filters() + + start_test("Test TC offloads failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "TC filter did not reject with TC offloads enabled") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + + start_test("Test TC offloads work...") + ret, _, err = sim.cls_bpf_add_filter(obj, verbose=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret != 0, "TC filter did not load with TC offloads enabled") + + start_test("Test TC offload basics...") + dfs = simdev.dfs_get_bound_progs(expected=1) + progs = bpftool_prog_list(expected=1) + ingress = sim.tc_show_ingress(expected=1) + + dprog = dfs[0] + prog = progs[0] + fltr = ingress[0] + fail(fltr["skip_hw"], "TC does reports 'skip_hw' on offloaded filter") + fail(not fltr["in_hw"], "TC does not report 'in_hw' for offloaded filter") + fail(not fltr["skip_sw"], "TC does not report 'skip_sw' back") + + start_test("Test TC offload is device-bound...") + fail(str(prog["id"]) != fltr["id"], "Program IDs don't match") + fail(prog["tag"] != fltr["tag"], "Program tags don't match") + fail(fltr["id"] != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test disabling TC offloads is rejected while filters installed...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret == 0, "Driver should refuse to disable TC offloads with filters installed...") + sim.set_ethtool_tc_offloads(True) + + start_test("Test qdisc removal frees things...") + sim.tc_flush_filters() + sim.tc_show_ingress(expected=0) + + start_test("Test disabling TC offloads is OK without filters...") + ret, _ = sim.set_ethtool_tc_offloads(False, fail=False) + fail(ret != 0, + "Driver refused to disable TC offloads without filters installed...") + + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of TC filters...") + sim.cls_bpf_add_filter(obj, skip_sw=True) + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + + start_test("Test destroying device gets rid of XDP...") + sim.set_xdp(obj, "offload") + simdev.remove() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_ethtool_tc_offloads(True) + + start_test("Test XDP prog reporting...") + sim.set_xdp(obj, "drv") + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + + start_test("Test XDP prog replace without force...") + ret, _ = sim.set_xdp(obj, "drv", fail=False) + fail(ret == 0, "Replaced XDP program without -force") + sim.wait_for_flush(total=1) + + start_test("Test XDP prog replace with force...") + ret, _ = sim.set_xdp(obj, "drv", force=True, fail=False) + fail(ret != 0, "Could not replace XDP program with -force") + bpftool_prog_list_wait(expected=1) + ipl = sim.ip_link_show(xdp=True) + progs = bpftool_prog_list(expected=1) + fail(ipl["xdp"]["prog"]["id"] != progs[0]["id"], + "Loaded program has wrong ID") + fail("dev" in progs[0].keys(), + "Device parameters reported for non-offloaded program") + + start_test("Test XDP prog replace with bad flags...") + ret, _, err = sim.set_xdp(obj, "generic", force=True, + fail=False, include_stderr=True) + fail(ret == 0, "Replaced XDP program with a program in different mode") + check_extack(err, + "Native and generic XDP can't be active at the same time.", + args) + + start_test("Test MTU restrictions...") + ret, _ = sim.set_mtu(9000, fail=False) + fail(ret == 0, + "Driver should refuse to increase MTU to 9000 with XDP loaded...") + sim.unset_xdp("drv") + bpftool_prog_list_wait(expected=0) + sim.set_mtu(9000) + ret, _, err = sim.set_xdp(obj, "drv", fail=False, include_stderr=True) + fail(ret == 0, "Driver should refuse to load program with MTU of 9000...") + check_extack_nsim(err, "MTU too large w/ XDP enabled.", args) + sim.set_mtu(1500) + + sim.wait_for_flush() + start_test("Test non-offload XDP attaching to HW...") + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/nooffload") + nooffload = bpf_pinned("/sys/fs/bpf/nooffload") + ret, _, err = sim.set_xdp(nooffload, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "attached non-offloaded XDP program to HW") + check_extack_nsim(err, "xdpoffload of non-bound program.", args) + rm("/sys/fs/bpf/nooffload") + + start_test("Test offload XDP attaching to drv...") + bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", + dev=sim['ifname']) + offload = bpf_pinned("/sys/fs/bpf/offload") + ret, _, err = sim.set_xdp(offload, "drv", fail=False, include_stderr=True) + fail(ret == 0, "attached offloaded XDP program to drv") + check_extack(err, "Using offloaded program without HW_MODE flag is not supported.", args) + rm("/sys/fs/bpf/offload") + sim.wait_for_flush() + + start_test("Test XDP load failure...") + sim.dfs["dev/bpf_bind_verifier_accept"] = 0 + ret, _, err = bpftool_prog_load("sample_ret0.bpf.o", "/sys/fs/bpf/offload", + dev=sim['ifname'], fail=False, include_stderr=True) + fail(ret == 0, "verifier should fail on load") + check_verifier_log(err, "[netdevsim] Hello from netdevsim!") + sim.dfs["dev/bpf_bind_verifier_accept"] = 1 + sim.wait_for_flush() + + start_test("Test XDP offload...") + _, _, err = sim.set_xdp(obj, "offload", verbose=True, include_stderr=True) + ipl = sim.ip_link_show(xdp=True) + link_xdp = ipl["xdp"]["prog"] + progs = bpftool_prog_list(expected=1) + prog = progs[0] + fail(link_xdp["id"] != prog["id"], "Loaded program has wrong ID") + + start_test("Test XDP offload is device bound...") + dfs = simdev.dfs_get_bound_progs(expected=1) + dprog = dfs[0] + + fail(prog["id"] != link_xdp["id"], "Program IDs don't match") + fail(prog["tag"] != link_xdp["tag"], "Program tags don't match") + fail(str(link_xdp["id"]) != dprog["id"], "Program IDs don't match") + fail(dprog["state"] != "xlated", "Offloaded program state not translated") + fail(dprog["loaded"] != "Y", "Offloaded program is not loaded") + + start_test("Test removing XDP program many times...") + sim.unset_xdp("offload") + sim.unset_xdp("offload") + sim.unset_xdp("drv") + sim.unset_xdp("drv") + sim.unset_xdp("") + sim.unset_xdp("") + bpftool_prog_list_wait(expected=0) + + start_test("Test attempt to use a program for a wrong device...") + simdev2 = BpfNetdevSimDev() + sim2, = simdev2.nsims + sim2.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a different device accepted") + check_extack(err, "Program bound to different device.", args) + simdev2.remove() + ret, _, err = sim.set_xdp(pinned, "offload", + fail=False, include_stderr=True) + fail(ret == 0, "Pinned program loaded for a removed device accepted") + check_extack(err, "Program bound to different device.", args) + rm(pin_file) + bpftool_prog_list_wait(expected=0) + + simdev, sim = test_multi_prog(simdev, sim, obj, "", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "drv", 1) + simdev, sim = test_multi_prog(simdev, sim, obj, "generic", 2) + + start_test("Test mixing of TC and XDP...") + sim.tc_add_ingress() + sim.set_xdp(obj, "offload") + ret, _, err = sim.cls_bpf_add_filter(obj, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Loading TC when XDP active should fail") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + sim.unset_xdp("offload") + sim.wait_for_flush() + + sim.cls_bpf_add_filter(obj, skip_sw=True) + ret, _, err = sim.set_xdp(obj, "offload", fail=False, include_stderr=True) + fail(ret == 0, "Loading XDP when TC active should fail") + check_extack_nsim(err, "TC program is already loaded.", args) + + start_test("Test binding TC from pinned...") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp") + sim.tc_flush_filters(bound=1, total=1) + sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True) + sim.tc_flush_filters(bound=1, total=1) + + start_test("Test binding XDP from pinned...") + sim.set_xdp(obj, "offload") + pin_file, pinned = pin_prog("/sys/fs/bpf/tmp2", idx=1) + + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + sim.set_xdp(pinned, "offload", force=True) + sim.unset_xdp("offload") + + start_test("Test offload of wrong type fails...") + ret, _ = sim.cls_bpf_add_filter(pinned, da=True, skip_sw=True, fail=False) + fail(ret == 0, "Managed to attach XDP program to TC") + + start_test("Test asking for TC offload of two filters...") + sim.cls_bpf_add_filter(obj, da=True, skip_sw=True) + ret, _, err = sim.cls_bpf_add_filter(obj, da=True, skip_sw=True, + fail=False, include_stderr=True) + fail(ret == 0, "Managed to offload two TC filters at the same time") + check_extack_nsim(err, "driver and netdev offload states mismatch.", args) + + sim.tc_flush_filters(bound=2, total=2) + + start_test("Test if netdev removal waits for translation...") + delay_msec = 500 + sim.dfs["dev/bpf_bind_verifier_delay"] = delay_msec + start = time.time() + cmd_line = "tc filter add dev %s ingress bpf %s da skip_sw" % \ + (sim['ifname'], obj) + tc_proc = cmd(cmd_line, background=True, fail=False) + # Wait for the verifier to start + while simdev.dfs_num_bound_progs() <= 2: + pass + simdev.remove() + end = time.time() + ret, _ = cmd_result(tc_proc, fail=False) + time_diff = end - start + log("Time", "start:\t%s\nend:\t%s\ndiff:\t%s" % (start, end, time_diff)) + + fail(ret == 0, "Managed to load TC filter on a unregistering device") + delay_sec = delay_msec * 0.001 + fail(time_diff < delay_sec, "Removal process took %s, expected %s" % + (time_diff, delay_sec)) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + map_obj = bpf_obj("sample_map_ret0.bpf.o") + start_test("Test loading program with maps...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + + start_test("Test bpftool bound info reporting (own ns)...") + check_dev_info(False, "") + + start_test("Test bpftool bound info reporting (other ns)...") + ns = mknetns() + sim.set_ns(ns) + check_dev_info(True, "") + + start_test("Test bpftool bound info reporting (remote ns)...") + check_dev_info(False, ns) + + start_test("Test bpftool bound info reporting (back to own ns)...") + sim.set_ns("") + check_dev_info(False, "") + + prog_file, _ = pin_prog("/sys/fs/bpf/tmp_prog") + map_file, _ = pin_map("/sys/fs/bpf/tmp_map", idx=1, expected=2) + simdev.remove() + + start_test("Test bpftool bound info reporting (removed dev)...") + check_dev_info_removed(prog_file=prog_file, map_file=map_file) + + # Remove all pinned files and reinstantiate the netdev + clean_up() + bpftool_prog_list_wait(expected=0) + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + + start_test("Test map update (no flags)...") + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + maps = bpftool_map_list_wait(expected=2) + array = maps[0] if maps[0]["type"] == "array" else maps[1] + htab = maps[0] if maps[0]["type"] == "hash" else maps[1] + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, _ = bpftool("map update id %d key %s value %s" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "added too many entries") + + start_test("Test map update (exists)...") + for m in maps: + for i in range(2): + bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", i), int2str("Q", i * 3))) + + for m in maps: + ret, err = bpftool("map update id %d key %s value %s exist" % + (m["id"], int2str("I", 3), int2str("Q", 3 * 3)), + fail=False) + fail(ret == 0, "updated non-existing key") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map update (noexist)...") + for m in maps: + for i in range(2): + ret, err = bpftool("map update id %d key %s value %s noexist" % + (m["id"], int2str("I", i), int2str("Q", i * 3)), + fail=False) + fail(ret == 0, "updated existing key") + fail(err["error"].find("File exists") == -1, + "expected EEXIST, error is '%s'" % (err["error"])) + + start_test("Test map dump...") + for m in maps: + _, entries = bpftool("map dump id %d" % (m["id"])) + for i in range(2): + key = str2int(entries[i]["key"]) + fail(key != i, "expected key %d, got %d" % (key, i)) + val = str2int(entries[i]["value"]) + fail(val != i * 3, "expected value %d, got %d" % (val, i * 3)) + + start_test("Test map getnext...") + for m in maps: + _, entry = bpftool("map getnext id %d" % (m["id"])) + key = str2int(entry["next_key"]) + fail(key != 0, "next key %d, expected %d" % (key, 0)) + _, entry = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 0))) + key = str2int(entry["next_key"]) + fail(key != 1, "next key %d, expected %d" % (key, 1)) + ret, err = bpftool("map getnext id %d key %s" % + (m["id"], int2str("I", 1)), fail=False) + fail(ret == 0, "got next key past the end of map") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map delete (htab)...") + for i in range(2): + bpftool("map delete id %d key %s" % (htab["id"], int2str("I", i))) + + start_test("Test map delete (array)...") + for i in range(2): + ret, err = bpftool("map delete id %d key %s" % + (htab["id"], int2str("I", i)), fail=False) + fail(ret == 0, "removed entry from an array") + fail(err["error"].find("No such file or directory") == -1, + "expected ENOENT, error is '%s'" % (err["error"])) + + start_test("Test map remove...") + sim.unset_xdp("offload") + bpftool_map_list_wait(expected=0) + simdev.remove() + + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.set_xdp(map_obj, "offload", JSON=False) # map fixup msg breaks JSON + simdev.remove() + bpftool_map_list_wait(expected=0) + + start_test("Test map creation fail path...") + simdev = BpfNetdevSimDev() + sim, = simdev.nsims + sim.dfs["bpf_map_accept"] = "N" + ret, _ = sim.set_xdp(map_obj, "offload", JSON=False, fail=False) + fail(ret == 0, + "netdevsim didn't refuse to create a map with offload disabled") + + simdev.remove() + + start_test("Test multi-dev ASIC program reuse...") + simdevA = BpfNetdevSimDev() + simA, = simdevA.nsims + simdevB = BpfNetdevSimDev(3) + simB1, simB2, simB3 = simdevB.nsims + sims = (simA, simB1, simB2, simB3) + simB = (simB1, simB2, simB3) + + bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA", + dev=simA['ifname']) + progA = bpf_pinned("/sys/fs/bpf/nsimA") + bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB", + dev=simB1['ifname']) + progB = bpf_pinned("/sys/fs/bpf/nsimB") + + simA.set_xdp(progA, "offload", JSON=False) + for d in simdevB.nsims: + d.set_xdp(progB, "offload", JSON=False) + + start_test("Test multi-dev ASIC cross-dev replace...") + ret, _ = simA.set_xdp(progB, "offload", force=True, JSON=False, fail=False) + fail(ret == 0, "cross-ASIC program allowed") + for d in simdevB.nsims: + ret, _ = d.set_xdp(progA, "offload", force=True, JSON=False, fail=False) + fail(ret == 0, "cross-ASIC program allowed") + + start_test("Test multi-dev ASIC cross-dev install...") + for d in sims: + d.unset_xdp("offload") + + ret, _, err = simA.set_xdp(progB, "offload", force=True, JSON=False, + fail=False, include_stderr=True) + fail(ret == 0, "cross-ASIC program allowed") + check_extack(err, "Program bound to different device.", args) + for d in simdevB.nsims: + ret, _, err = d.set_xdp(progA, "offload", force=True, JSON=False, + fail=False, include_stderr=True) + fail(ret == 0, "cross-ASIC program allowed") + check_extack(err, "Program bound to different device.", args) + + start_test("Test multi-dev ASIC cross-dev map reuse...") + + mapA = bpftool("prog show %s" % (progA))[1]["map_ids"][0] + mapB = bpftool("prog show %s" % (progB))[1]["map_ids"][0] + + ret, _ = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_", + dev=simB3['ifname'], + maps=["idx 0 id %d" % (mapB)], + fail=False) + fail(ret != 0, "couldn't reuse a map on the same ASIC") + rm("/sys/fs/bpf/nsimB_") + + ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimA_", + dev=simA['ifname'], + maps=["idx 0 id %d" % (mapB)], + fail=False, include_stderr=True) + fail(ret == 0, "could reuse a map on a different ASIC") + fail(err.count("offload device mismatch between prog and map") == 0, + "error message missing for cross-ASIC map") + + ret, _, err = bpftool_prog_load("sample_map_ret0.bpf.o", "/sys/fs/bpf/nsimB_", + dev=simB1['ifname'], + maps=["idx 0 id %d" % (mapA)], + fail=False, include_stderr=True) + fail(ret == 0, "could reuse a map on a different ASIC") + fail(err.count("offload device mismatch between prog and map") == 0, + "error message missing for cross-ASIC map") + + start_test("Test multi-dev ASIC cross-dev destruction...") + bpftool_prog_list_wait(expected=2) + + simdevA.remove() + bpftool_prog_list_wait(expected=1) + + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB != simB1['ifname'], "program not bound to original device") + simB1.remove() + bpftool_prog_list_wait(expected=1) + + start_test("Test multi-dev ASIC cross-dev destruction - move...") + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB not in (simB2['ifname'], simB3['ifname']), + "program not bound to remaining devices") + + simB2.remove() + ifnameB = bpftool("prog show %s" % (progB))[1]["dev"]["ifname"] + fail(ifnameB != simB3['ifname'], "program not bound to remaining device") + + simB3.remove() + simdevB.remove() + bpftool_prog_list_wait(expected=0) + + start_test("Test multi-dev ASIC cross-dev destruction - orphaned...") + ret, out = bpftool("prog show %s" % (progB), fail=False) + fail(ret != 0, "couldn't get information about orphaned program") + + print("%s: OK" % (os.path.basename(__file__))) + +finally: + log("Clean up...", "", level=1) + log_level_inc() + clean_up() diff --git a/tools/testing/selftests/net/cmsg_ipv6.sh b/tools/testing/selftests/net/cmsg_ipv6.sh index 330d0b1ceced..8bc23fb4c82b 100755 --- a/tools/testing/selftests/net/cmsg_ipv6.sh +++ b/tools/testing/selftests/net/cmsg_ipv6.sh @@ -1,9 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ksft_skip=4 +source lib.sh -NS=ns IP6=2001:db8:1::1/64 TGT6=2001:db8:1::2 TMPF=$(mktemp --suffix ".pcap") @@ -11,13 +10,11 @@ TMPF=$(mktemp --suffix ".pcap") cleanup() { rm -f $TMPF - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT -NSEXE="ip netns exec $NS" - tcpdump -h | grep immediate-mode >> /dev/null if [ $? -ne 0 ]; then echo "SKIP - tcpdump with --immediate-mode option required" @@ -25,7 +22,8 @@ if [ $? -ne 0 ]; then fi # Namespaces -ip netns add $NS +setup_ns NS +NSEXE="ip netns exec $NS" $NSEXE sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null @@ -91,7 +89,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "TCLASS $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $((TOS2)) $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "class $TOS2" >> /dev/null @@ -128,7 +126,7 @@ for ovr in setsock cmsg both diff; do check_result $? 0 "HOPLIMIT $prot $ovr - pass" while [ -d /proc/$BG ]; do - $NSEXE ./cmsg_sender -6 -p u $TGT6 1234 + $NSEXE ./cmsg_sender -6 -p $p $m $LIM $TGT6 1234 done tcpdump -r $TMPF -v 2>&1 | grep "hlim $LIM[^0-9]" >> /dev/null diff --git a/tools/testing/selftests/net/cmsg_sender.c b/tools/testing/selftests/net/cmsg_sender.c index 24b21b15ed3f..876c2db02a63 100644 --- a/tools/testing/selftests/net/cmsg_sender.c +++ b/tools/testing/selftests/net/cmsg_sender.c @@ -45,11 +45,13 @@ struct options { const char *host; const char *service; unsigned int size; + unsigned int num_pkt; struct { unsigned int mark; unsigned int dontfrag; unsigned int tclass; unsigned int hlimit; + unsigned int priority; } sockopt; struct { unsigned int family; @@ -72,6 +74,7 @@ struct options { } v6; } opt = { .size = 13, + .num_pkt = 1, .sock = { .family = AF_UNSPEC, .type = SOCK_DGRAM, @@ -112,7 +115,7 @@ static void cs_parse_args(int argc, char *argv[]) { int o; - while ((o = getopt(argc, argv, "46sS:p:m:M:d:tf:F:c:C:l:L:H:")) != -1) { + while ((o = getopt(argc, argv, "46sS:p:P:m:M:n:d:tf:F:c:C:l:L:H:")) != -1) { switch (o) { case 's': opt.silent_send = true; @@ -138,7 +141,9 @@ static void cs_parse_args(int argc, char *argv[]) cs_usage(argv[0]); } break; - + case 'P': + opt.sockopt.priority = atoi(optarg); + break; case 'm': opt.mark.ena = true; opt.mark.val = atoi(optarg); @@ -146,6 +151,9 @@ static void cs_parse_args(int argc, char *argv[]) case 'M': opt.sockopt.mark = atoi(optarg); break; + case 'n': + opt.num_pkt = atoi(optarg); + break; case 'd': opt.txtime.ena = true; opt.txtime.delay = atoi(optarg); @@ -252,15 +260,8 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) SOL_IPV6, IPV6_HOPLIMIT, &opt.v6.hlimit); if (opt.txtime.ena) { - struct sock_txtime so_txtime = { - .clockid = CLOCK_MONOTONIC, - }; __u64 txtime; - if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, - &so_txtime, sizeof(so_txtime))) - error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); - txtime = time_start_mono.tv_sec * (1000ULL * 1000 * 1000) + time_start_mono.tv_nsec + opt.txtime.delay * 1000; @@ -276,13 +277,6 @@ cs_write_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) memcpy(CMSG_DATA(cmsg), &txtime, sizeof(txtime)); } if (opt.ts.ena) { - __u32 val = SOF_TIMESTAMPING_SOFTWARE | - SOF_TIMESTAMPING_OPT_TSONLY; - - if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, - &val, sizeof(val))) - error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); - cmsg = (struct cmsghdr *)(cbuf + cmsg_len); cmsg_len += CMSG_SPACE(sizeof(__u32)); if (cbuf_sz < cmsg_len) @@ -325,16 +319,17 @@ static const char *cs_ts_info2str(unsigned int info) return "unknown"; } -static void +static unsigned long cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) { struct sock_extended_err *see; struct scm_timestamping *ts; + unsigned long ts_seen = 0; struct cmsghdr *cmsg; int i, err; if (!opt.ts.ena) - return; + return 0; msg->msg_control = cbuf; msg->msg_controllen = cbuf_sz; @@ -388,8 +383,11 @@ cs_read_cmsg(int fd, struct msghdr *msg, char *cbuf, size_t cbuf_sz) printf(" %5s ts%d %lluus\n", cs_ts_info2str(see->ee_info), i, rel_time); + ts_seen |= 1 << see->ee_info; } } + + return ts_seen; } static void ca_set_sockopts(int fd) @@ -410,17 +408,40 @@ static void ca_set_sockopts(int fd) setsockopt(fd, SOL_IPV6, IPV6_UNICAST_HOPS, &opt.sockopt.hlimit, sizeof(opt.sockopt.hlimit))) error(ERN_SOCKOPT, errno, "setsockopt IPV6_HOPLIMIT"); + if (opt.sockopt.priority && + setsockopt(fd, SOL_SOCKET, SO_PRIORITY, + &opt.sockopt.priority, sizeof(opt.sockopt.priority))) + error(ERN_SOCKOPT, errno, "setsockopt SO_PRIORITY"); + + if (opt.txtime.ena) { + struct sock_txtime so_txtime = { + .clockid = CLOCK_MONOTONIC, + }; + + if (setsockopt(fd, SOL_SOCKET, SO_TXTIME, + &so_txtime, sizeof(so_txtime))) + error(ERN_SOCKOPT, errno, "setsockopt TXTIME"); + } + if (opt.ts.ena) { + __u32 val = SOF_TIMESTAMPING_SOFTWARE | + SOF_TIMESTAMPING_OPT_TSONLY; + + if (setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, + &val, sizeof(val))) + error(ERN_SOCKOPT, errno, "setsockopt TIMESTAMPING"); + } } int main(int argc, char *argv[]) { struct addrinfo hints, *ai; struct iovec iov[1]; + unsigned char *buf; struct msghdr msg; char cbuf[1024]; - char *buf; int err; int fd; + int i; cs_parse_args(argc, argv); @@ -480,24 +501,33 @@ int main(int argc, char *argv[]) cs_write_cmsg(fd, &msg, cbuf, sizeof(cbuf)); - err = sendmsg(fd, &msg, 0); - if (err < 0) { - if (!opt.silent_send) - fprintf(stderr, "send failed: %s\n", strerror(errno)); - err = ERN_SEND; - goto err_out; - } else if (err != (int)opt.size) { - fprintf(stderr, "short send\n"); - err = ERN_SEND_SHORT; - goto err_out; - } else { - err = ERN_SUCCESS; + for (i = 0; i < opt.num_pkt; i++) { + err = sendmsg(fd, &msg, 0); + if (err < 0) { + if (!opt.silent_send) + fprintf(stderr, "send failed: %s\n", strerror(errno)); + err = ERN_SEND; + goto err_out; + } else if (err != (int)opt.size) { + fprintf(stderr, "short send\n"); + err = ERN_SEND_SHORT; + goto err_out; + } } + err = ERN_SUCCESS; - /* Make sure all timestamps have time to loop back */ - usleep(opt.txtime.delay); + if (opt.ts.ena) { + unsigned long seen; + int i; - cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + /* Make sure all timestamps have time to loop back */ + for (i = 0; i < 40; i++) { + seen = cs_read_cmsg(fd, &msg, cbuf, sizeof(cbuf)); + if (seen & (1 << SCM_TSTAMP_SND)) + break; + usleep(opt.txtime.delay / 20); + } + } err_out: close(fd); diff --git a/tools/testing/selftests/net/cmsg_so_mark.sh b/tools/testing/selftests/net/cmsg_so_mark.sh index 1650b8622f2f..772ad0cc2630 100755 --- a/tools/testing/selftests/net/cmsg_so_mark.sh +++ b/tools/testing/selftests/net/cmsg_so_mark.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -10,13 +11,13 @@ MARK=1000 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null diff --git a/tools/testing/selftests/net/cmsg_time.sh b/tools/testing/selftests/net/cmsg_time.sh index 91161e1da734..1d7e756644bc 100755 --- a/tools/testing/selftests/net/cmsg_time.sh +++ b/tools/testing/selftests/net/cmsg_time.sh @@ -1,7 +1,8 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -NS=ns +source lib.sh + IP4=172.16.0.1/24 TGT4=172.16.0.2 IP6=2001:db8:1::1/64 @@ -9,13 +10,13 @@ TGT6=2001:db8:1::2 cleanup() { - ip netns del $NS + cleanup_ns $NS } trap cleanup EXIT # Namespaces -ip netns add $NS +setup_ns NS ip netns exec $NS sysctl -w net.ipv4.ping_group_range='0 2147483647' > /dev/null @@ -65,10 +66,13 @@ for i in "-4 $TGT4" "-6 $TGT6"; do awk '/SND/ { if ($3 > 1000) print "OK"; }') check_result $? "$ts" "OK" "$prot - TXTIME abs" - ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d 1000 | + [ "$KSFT_MACHINE_SLOW" = yes ] && delay=8000 || delay=1000 + + ts=$(ip netns exec $NS ./cmsg_sender -p $p $i 1234 -t -d $delay | awk '/SND/ {snd=$3} /SCHED/ {sch=$3} - END { if (snd - sch > 500) print "OK"; }') + END { if (snd - sch > '$((delay/2))') print "OK"; + else print snd, "-", sch, "<", '$((delay/2))'; }') check_result $? "$ts" "OK" "$prot - TXTIME rel" done done diff --git a/tools/testing/selftests/net/config b/tools/testing/selftests/net/config index 8da562a9ae87..04de7a6ba6f3 100644 --- a/tools/testing/selftests/net/config +++ b/tools/testing/selftests/net/config @@ -1,5 +1,6 @@ CONFIG_USER_NS=y CONFIG_NET_NS=y +CONFIG_BONDING=m CONFIG_BPF_SYSCALL=y CONFIG_TEST_BPF=m CONFIG_NUMA=y @@ -14,30 +15,75 @@ CONFIG_VETH=y CONFIG_NET_IPVTI=y CONFIG_IPV6_VTI=y CONFIG_DUMMY=y +CONFIG_BRIDGE_VLAN_FILTERING=y CONFIG_BRIDGE=y +CONFIG_CRYPTO_CHACHA20POLY1305=m CONFIG_VLAN_8021Q=y +CONFIG_GENEVE=m CONFIG_IFB=y +CONFIG_INET_DIAG=y +CONFIG_INET_ESP=y +CONFIG_INET_ESP_OFFLOAD=y +CONFIG_NET_FOU=y +CONFIG_NET_FOU_IP_TUNNELS=y +CONFIG_IP_GRE=m CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_SIT=y +CONFIG_IP_DCCP=m CONFIG_NF_NAT=m CONFIG_IP6_NF_IPTABLES=m CONFIG_IP_NF_IPTABLES=m CONFIG_IP6_NF_NAT=m +CONFIG_IP6_NF_RAW=m CONFIG_IP_NF_NAT=m +CONFIG_IP_NF_RAW=m +CONFIG_IP_NF_TARGET_TTL=m +CONFIG_IPV6_GRE=m +CONFIG_IPV6_SEG6_LWTUNNEL=y +CONFIG_L2TP_ETH=m +CONFIG_L2TP_IP=m +CONFIG_L2TP=m +CONFIG_L2TP_V3=y +CONFIG_MACSEC=m +CONFIG_MACVLAN=y +CONFIG_MACVTAP=y +CONFIG_MPLS=y +CONFIG_MPTCP=y CONFIG_NF_TABLES=m CONFIG_NF_TABLES_IPV6=y CONFIG_NF_TABLES_IPV4=y CONFIG_NFT_NAT=m +CONFIG_NETFILTER_XT_MATCH_LENGTH=m +CONFIG_NET_ACT_CSUM=m +CONFIG_NET_ACT_CT=m +CONFIG_NET_ACT_GACT=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_CLS_BPF=m +CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_U32=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPIP=y +CONFIG_NET_SCH_FQ_CODEL=m +CONFIG_NET_SCH_HTB=m CONFIG_NET_SCH_FQ=m CONFIG_NET_SCH_ETF=m CONFIG_NET_SCH_NETEM=y +CONFIG_NET_SCH_PRIO=m +CONFIG_NFT_COMPAT=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_PSAMPLE=m +CONFIG_TCP_MD5SIG=y CONFIG_TEST_BLACKHOLE_DEV=m CONFIG_KALLSYMS=y +CONFIG_TLS=m CONFIG_TRACEPOINTS=y CONFIG_NET_DROP_MONITOR=m CONFIG_NETDEVSIM=m -CONFIG_NET_FOU=m CONFIG_MPLS_ROUTING=m CONFIG_MPLS_IPTUNNEL=m CONFIG_NET_SCH_INGRESS=m @@ -48,7 +94,10 @@ CONFIG_BAREUDP=m CONFIG_IPV6_IOAM6_LWTUNNEL=y CONFIG_CRYPTO_SM4_GENERIC=y CONFIG_AMT=m +CONFIG_TUN=y CONFIG_VXLAN=m CONFIG_IP_SCTP=m CONFIG_NETFILTER_XT_MATCH_POLICY=m CONFIG_CRYPTO_ARIA=y +CONFIG_XFRM_INTERFACE=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/devlink_port_split.py b/tools/testing/selftests/net/devlink_port_split.py deleted file mode 100755 index 2d84c7a0be6b..000000000000 --- a/tools/testing/selftests/net/devlink_port_split.py +++ /dev/null @@ -1,309 +0,0 @@ -#!/usr/bin/env python3 -# SPDX-License-Identifier: GPL-2.0 - -from subprocess import PIPE, Popen -import json -import time -import argparse -import collections -import sys - -# -# Test port split configuration using devlink-port lanes attribute. -# The test is skipped in case the attribute is not available. -# -# First, check that all the ports with 1 lane fail to split. -# Second, check that all the ports with more than 1 lane can be split -# to all valid configurations (e.g., split to 2, split to 4 etc.) -# - - -# Kselftest framework requirement - SKIP code is 4 -KSFT_SKIP=4 -Port = collections.namedtuple('Port', 'bus_info name') - - -def run_command(cmd, should_fail=False): - """ - Run a command in subprocess. - Return: Tuple of (stdout, stderr). - """ - - p = Popen(cmd, stdout=PIPE, stderr=PIPE, shell=True) - stdout, stderr = p.communicate() - stdout, stderr = stdout.decode(), stderr.decode() - - if stderr != "" and not should_fail: - print("Error sending command: %s" % cmd) - print(stdout) - print(stderr) - return stdout, stderr - - -class devlink_ports(object): - """ - Class that holds information on the devlink ports, required to the tests; - if_names: A list of interfaces in the devlink ports. - """ - - def get_if_names(dev): - """ - Get a list of physical devlink ports. - Return: Array of tuples (bus_info/port, if_name). - """ - - arr = [] - - cmd = "devlink -j port show" - stdout, stderr = run_command(cmd) - assert stderr == "" - ports = json.loads(stdout)['port'] - - validate_devlink_output(ports, 'flavour') - - for port in ports: - if dev in port: - if ports[port]['flavour'] == 'physical': - arr.append(Port(bus_info=port, name=ports[port]['netdev'])) - - return arr - - def __init__(self, dev): - self.if_names = devlink_ports.get_if_names(dev) - - -def get_max_lanes(port): - """ - Get the $port's maximum number of lanes. - Return: number of lanes, e.g. 1, 2, 4 and 8. - """ - - cmd = "devlink -j port show %s" % port - stdout, stderr = run_command(cmd) - assert stderr == "" - values = list(json.loads(stdout)['port'].values())[0] - - if 'lanes' in values: - lanes = values['lanes'] - else: - lanes = 0 - return lanes - - -def get_split_ability(port): - """ - Get the $port split ability. - Return: split ability, true or false. - """ - - cmd = "devlink -j port show %s" % port.name - stdout, stderr = run_command(cmd) - assert stderr == "" - values = list(json.loads(stdout)['port'].values())[0] - - return values['splittable'] - - -def split(k, port, should_fail=False): - """ - Split $port into $k ports. - If should_fail == True, the split should fail. Otherwise, should pass. - Return: Array of sub ports after splitting. - If the $port wasn't split, the array will be empty. - """ - - cmd = "devlink port split %s count %s" % (port.bus_info, k) - stdout, stderr = run_command(cmd, should_fail=should_fail) - - if should_fail: - if not test(stderr != "", "%s is unsplittable" % port.name): - print("split an unsplittable port %s" % port.name) - return create_split_group(port, k) - else: - if stderr == "": - return create_split_group(port, k) - print("didn't split a splittable port %s" % port.name) - - return [] - - -def unsplit(port): - """ - Unsplit $port. - """ - - cmd = "devlink port unsplit %s" % port - stdout, stderr = run_command(cmd) - test(stderr == "", "Unsplit port %s" % port) - - -def exists(port, dev): - """ - Check if $port exists in the devlink ports. - Return: True is so, False otherwise. - """ - - return any(dev_port.name == port - for dev_port in devlink_ports.get_if_names(dev)) - - -def exists_and_lanes(ports, lanes, dev): - """ - Check if every port in the list $ports exists in the devlink ports and has - $lanes number of lanes after splitting. - Return: True if both are True, False otherwise. - """ - - for port in ports: - max_lanes = get_max_lanes(port) - if not exists(port, dev): - print("port %s doesn't exist in devlink ports" % port) - return False - if max_lanes != lanes: - print("port %s has %d lanes, but %s were expected" - % (port, lanes, max_lanes)) - return False - return True - - -def test(cond, msg): - """ - Check $cond and print a message accordingly. - Return: True is pass, False otherwise. - """ - - if cond: - print("TEST: %-60s [ OK ]" % msg) - else: - print("TEST: %-60s [FAIL]" % msg) - - return cond - - -def create_split_group(port, k): - """ - Create the split group for $port. - Return: Array with $k elements, which are the split port group. - """ - - return list(port.name + "s" + str(i) for i in range(k)) - - -def split_unsplittable_port(port, k): - """ - Test that splitting of unsplittable port fails. - """ - - # split to max - new_split_group = split(k, port, should_fail=True) - - if new_split_group != []: - unsplit(port.bus_info) - - -def split_splittable_port(port, k, lanes, dev): - """ - Test that splitting of splittable port passes correctly. - """ - - new_split_group = split(k, port) - - # Once the split command ends, it takes some time to the sub ifaces' - # to get their names. Use udevadm to continue only when all current udev - # events are handled. - cmd = "udevadm settle" - stdout, stderr = run_command(cmd) - assert stderr == "" - - if new_split_group != []: - test(exists_and_lanes(new_split_group, lanes/k, dev), - "split port %s into %s" % (port.name, k)) - - unsplit(port.bus_info) - - -def validate_devlink_output(devlink_data, target_property=None): - """ - Determine if test should be skipped by checking: - 1. devlink_data contains values - 2. The target_property exist in devlink_data - """ - skip_reason = None - if any(devlink_data.values()): - if target_property: - skip_reason = "{} not found in devlink output, test skipped".format(target_property) - for key in devlink_data: - if target_property in devlink_data[key]: - skip_reason = None - else: - skip_reason = 'devlink output is empty, test skipped' - - if skip_reason: - print(skip_reason) - sys.exit(KSFT_SKIP) - - -def make_parser(): - parser = argparse.ArgumentParser(description='A test for port splitting.') - parser.add_argument('--dev', - help='The devlink handle of the device under test. ' + - 'The default is the first registered devlink ' + - 'handle.') - - return parser - - -def main(cmdline=None): - parser = make_parser() - args = parser.parse_args(cmdline) - - dev = args.dev - if not dev: - cmd = "devlink -j dev show" - stdout, stderr = run_command(cmd) - assert stderr == "" - - validate_devlink_output(json.loads(stdout)) - devs = json.loads(stdout)['dev'] - dev = list(devs.keys())[0] - - cmd = "devlink dev show %s" % dev - stdout, stderr = run_command(cmd) - if stderr != "": - print("devlink device %s can not be found" % dev) - sys.exit(1) - - ports = devlink_ports(dev) - - found_max_lanes = False - for port in ports.if_names: - max_lanes = get_max_lanes(port.name) - - # If max lanes is 0, do not test port splitting at all - if max_lanes == 0: - continue - - # If 1 lane, shouldn't be able to split - elif max_lanes == 1: - test(not get_split_ability(port), - "%s should not be able to split" % port.name) - split_unsplittable_port(port, max_lanes) - - # Else, splitting should pass and all the split ports should exist. - else: - lane = max_lanes - test(get_split_ability(port), - "%s should be able to split" % port.name) - while lane > 1: - split_splittable_port(port, lane, max_lanes, dev) - - lane //= 2 - found_max_lanes = True - - if not found_max_lanes: - print(f"Test not started, no port of device {dev} reports max_lanes") - sys.exit(KSFT_SKIP) - - -if __name__ == "__main__": - main() diff --git a/tools/testing/selftests/net/drop_monitor_tests.sh b/tools/testing/selftests/net/drop_monitor_tests.sh index b7650e30d18b..7c4818c971fc 100755 --- a/tools/testing/selftests/net/drop_monitor_tests.sh +++ b/tools/testing/selftests/net/drop_monitor_tests.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking drop monitor functionality. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -13,10 +11,6 @@ TESTS=" hw_drops " -IP="ip -netns ns1" -TC="tc -netns ns1" -DEVLINK="devlink -N ns1" -NS_EXEC="ip netns exec ns1" NETDEVSIM_PATH=/sys/bus/netdevsim/ DEV_ADDR=1337 DEV=netdevsim${DEV_ADDR} @@ -43,7 +37,7 @@ setup() modprobe netdevsim &> /dev/null set -e - ip netns add ns1 + setup_ns NS1 $IP link add dummy10 up type dummy $NS_EXEC echo "$DEV_ADDR 1" > ${NETDEVSIM_PATH}/new_device @@ -57,7 +51,7 @@ setup() cleanup() { $NS_EXEC echo "$DEV_ADDR" > ${NETDEVSIM_PATH}/del_device - ip netns del ns1 + cleanup_ns ${NS1} } sw_drops_test() @@ -194,8 +188,15 @@ if [ $? -ne 0 ]; then exit $ksft_skip fi -# start clean +# create netns first so we can get the namespace name +setup_ns NS1 cleanup &> /dev/null +trap cleanup EXIT + +IP="ip -netns ${NS1}" +TC="tc -netns ${NS1}" +DEVLINK="devlink -N ${NS1}" +NS_EXEC="ip netns exec ${NS1}" for t in $TESTS do diff --git a/tools/testing/selftests/net/epoll_busy_poll.c b/tools/testing/selftests/net/epoll_busy_poll.c new file mode 100644 index 000000000000..16e457c2f877 --- /dev/null +++ b/tools/testing/selftests/net/epoll_busy_poll.c @@ -0,0 +1,320 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +/* Basic per-epoll context busy poll test. + * + * Only tests the ioctls, but should be expanded to test two connected hosts in + * the future + */ + +#define _GNU_SOURCE + +#include <error.h> +#include <errno.h> +#include <inttypes.h> +#include <limits.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> + +#include <sys/capability.h> + +#include <sys/epoll.h> +#include <sys/ioctl.h> +#include <sys/socket.h> + +#include "../kselftest_harness.h" + +/* if the headers haven't been updated, we need to define some things */ +#if !defined(EPOLL_IOC_TYPE) +struct epoll_params { + uint32_t busy_poll_usecs; + uint16_t busy_poll_budget; + uint8_t prefer_busy_poll; + + /* pad the struct to a multiple of 64bits */ + uint8_t __pad; +}; + +#define EPOLL_IOC_TYPE 0x8A +#define EPIOCSPARAMS _IOW(EPOLL_IOC_TYPE, 0x01, struct epoll_params) +#define EPIOCGPARAMS _IOR(EPOLL_IOC_TYPE, 0x02, struct epoll_params) +#endif + +FIXTURE(invalid_fd) +{ + int invalid_fd; + struct epoll_params params; +}; + +FIXTURE_SETUP(invalid_fd) +{ + int ret; + + ret = socket(AF_UNIX, SOCK_DGRAM, 0); + EXPECT_NE(-1, ret) + TH_LOG("error creating unix socket"); + + self->invalid_fd = ret; +} + +FIXTURE_TEARDOWN(invalid_fd) +{ + int ret; + + ret = close(self->invalid_fd); + EXPECT_EQ(0, ret); +} + +TEST_F(invalid_fd, test_invalid_fd) +{ + int ret; + + ret = ioctl(self->invalid_fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCGPARAMS on invalid epoll FD should set errno to ENOTTY"); + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->invalid_fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should error"); + + EXPECT_EQ(ENOTTY, errno) + TH_LOG("EPIOCSPARAMS on invalid epoll FD should set errno to ENOTTY"); +} + +FIXTURE(epoll_busy_poll) +{ + int fd; + struct epoll_params params; + struct epoll_params *invalid_params; + cap_t caps; +}; + +FIXTURE_SETUP(epoll_busy_poll) +{ + int ret; + + ret = epoll_create1(0); + EXPECT_NE(-1, ret) + TH_LOG("epoll_create1 failed?"); + + self->fd = ret; + + self->caps = cap_get_proc(); + EXPECT_NE(NULL, self->caps); +} + +FIXTURE_TEARDOWN(epoll_busy_poll) +{ + int ret; + + ret = close(self->fd); + EXPECT_EQ(0, ret); + + ret = cap_free(self->caps); + EXPECT_NE(-1, ret) + TH_LOG("unable to free capabilities"); +} + +TEST_F(epoll_busy_poll, test_get_params) +{ + /* begin by getting the epoll params from the kernel + * + * the default should be default and all fields should be zero'd by the + * kernel, so set params fields to garbage to test this. + */ + int ret = 0; + + self->params.busy_poll_usecs = 0xff; + self->params.busy_poll_budget = 0xff; + self->params.prefer_busy_poll = 1; + self->params.__pad = 0xf; + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + EXPECT_EQ(0, ret) + TH_LOG("ioctl EPIOCGPARAMS should succeed"); + + EXPECT_EQ(0, self->params.busy_poll_usecs) + TH_LOG("EPIOCGPARAMS busy_poll_usecs should have been 0"); + + EXPECT_EQ(0, self->params.busy_poll_budget) + TH_LOG("EPIOCGPARAMS busy_poll_budget should have been 0"); + + EXPECT_EQ(0, self->params.prefer_busy_poll) + TH_LOG("EPIOCGPARAMS prefer_busy_poll should have been 0"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("EPIOCGPARAMS __pad should have been 0"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCGPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCGPARAMS should error with invalid params"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCGPARAMS with invalid params should set errno to EFAULT"); +} + +TEST_F(epoll_busy_poll, test_set_invalid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.__pad = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS non-zero __pad should error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS non-zero __pad errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = (uint32_t)INT_MAX + 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_usecs > S32_MAX"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS busy_poll_usecs > S32_MAX errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 2; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error prefer_busy_poll > 1"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("EPIOCSPARAMS prefer_busy_poll > 1 errno should be EINVAL"); + + self->params.__pad = 0; + self->params.busy_poll_usecs = 32; + self->params.prefer_busy_poll = 1; + + /* set budget well above kernel's NAPI_POLL_WEIGHT of 64 */ + self->params.busy_poll_budget = UINT16_MAX; + + /* test harness should run with CAP_NET_ADMIN, but let's make sure */ + cap_flag_value_t tmp; + + ret = cap_get_flag(self->caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &tmp); + EXPECT_EQ(0, ret) + TH_LOG("unable to get CAP_NET_ADMIN cap flag"); + + EXPECT_EQ(CAP_SET, tmp) + TH_LOG("expecting CAP_NET_ADMIN to be set for the test harness"); + + /* at this point we know CAP_NET_ADMIN is available, so setting the + * params with a busy_poll_budget > NAPI_POLL_WEIGHT should succeed + */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS should allow busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* remove CAP_NET_ADMIN from our effective set */ + cap_value_t net_admin[] = { CAP_NET_ADMIN }; + + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_CLEAR); + EXPECT_EQ(0, ret) + TH_LOG("couldn't clear CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should drop CAP_NET_ADMIN"); + + /* this is now expected to fail */ + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error busy_poll_budget > NAPI_POLL_WEIGHT"); + + EXPECT_EQ(EPERM, errno) + TH_LOG("EPIOCSPARAMS errno should be EPERM busy_poll_budget > NAPI_POLL_WEIGHT"); + + /* restore CAP_NET_ADMIN to our effective set */ + ret = cap_set_flag(self->caps, CAP_EFFECTIVE, 1, net_admin, CAP_SET); + EXPECT_EQ(0, ret) + TH_LOG("couldn't restore CAP_NET_ADMIN"); + + ret = cap_set_proc(self->caps); + EXPECT_EQ(0, ret) + TH_LOG("cap_set_proc should set CAP_NET_ADMIN"); + + self->invalid_params = (struct epoll_params *)0xdeadbeef; + ret = ioctl(self->fd, EPIOCSPARAMS, self->invalid_params); + + EXPECT_EQ(-1, ret) + TH_LOG("EPIOCSPARAMS should error when epoll_params is invalid"); + + EXPECT_EQ(EFAULT, errno) + TH_LOG("EPIOCSPARAMS should set errno to EFAULT when epoll_params is invalid"); +} + +TEST_F(epoll_busy_poll, test_set_and_get_valid) +{ + int ret; + + memset(&self->params, 0, sizeof(struct epoll_params)); + + self->params.busy_poll_usecs = 25; + self->params.busy_poll_budget = 16; + self->params.prefer_busy_poll = 1; + + ret = ioctl(self->fd, EPIOCSPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCSPARAMS with valid params should not error"); + + /* check that the kernel returns the same values back */ + + memset(&self->params, 0, sizeof(struct epoll_params)); + + ret = ioctl(self->fd, EPIOCGPARAMS, &self->params); + + EXPECT_EQ(0, ret) + TH_LOG("EPIOCGPARAMS should not error"); + + EXPECT_EQ(25, self->params.busy_poll_usecs) + TH_LOG("params.busy_poll_usecs incorrect"); + + EXPECT_EQ(16, self->params.busy_poll_budget) + TH_LOG("params.busy_poll_budget incorrect"); + + EXPECT_EQ(1, self->params.prefer_busy_poll) + TH_LOG("params.prefer_busy_poll incorrect"); + + EXPECT_EQ(0, self->params.__pad) + TH_LOG("params.__pad was not 0"); +} + +TEST_F(epoll_busy_poll, test_invalid_ioctl) +{ + int invalid_ioctl = EPIOCGPARAMS + 10; + int ret; + + ret = ioctl(self->fd, invalid_ioctl, &self->params); + + EXPECT_EQ(-1, ret) + TH_LOG("invalid ioctl should return error"); + + EXPECT_EQ(EINVAL, errno) + TH_LOG("invalid ioctl should set errno to EINVAL"); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index d32a14ba069a..386ebd829df5 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -37,8 +37,9 @@ # # server / client nomenclature relative to ns-A -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh + +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH VERBOSE=0 @@ -82,14 +83,6 @@ MCAST=ff02::1 NSA_LINKIP6= NSB_LINKIP6= -NSA=ns-A -NSB=ns-B -NSC=ns-C - -NSA_CMD="ip netns exec ${NSA}" -NSB_CMD="ip netns exec ${NSB}" -NSC_CMD="ip netns exec ${NSC}" - which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) # Check if FIPS mode is enabled @@ -107,6 +100,7 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local ans [ "${VERBOSE}" = "1" ] && echo @@ -116,19 +110,20 @@ log_test() else nfail=$((nfail+1)) printf "TEST: %-70s [FAIL]\n" "${msg}" + echo " expected rc $expected; actual rc $rc" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi fi if [ "${PAUSE}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi kill_procs @@ -197,6 +192,15 @@ kill_procs() sleep 1 } +set_ping_group() +{ + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'" + fi + + ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647' +} + do_run_cmd() { local cmd="$*" @@ -406,9 +410,6 @@ create_ns() local addr=$2 local addr6=$3 - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -467,13 +468,12 @@ cleanup() ip -netns ${NSA} link del dev ${NSA_DEV} ip netns pids ${NSA} | xargs kill 2>/dev/null - ip netns del ${NSA} + cleanup_ns ${NSA} fi ip netns pids ${NSB} | xargs kill 2>/dev/null - ip netns del ${NSB} ip netns pids ${NSC} | xargs kill 2>/dev/null - ip netns del ${NSC} >/dev/null 2>&1 + cleanup_ns ${NSB} ${NSC} } cleanup_vrf_dup() @@ -487,6 +487,8 @@ setup_vrf_dup() { # some VRF tests use ns-C which has the same config as # ns-B but for a device NOT in the VRF + setup_ns NSC + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSC} "-" "-" connect_ns ${NSA} ${NSA_DEV2} ${NSA_IP}/24 ${NSA_IP6}/64 \ ${NSC} ${NSC_DEV} ${NSB_IP}/24 ${NSB_IP6}/64 @@ -503,6 +505,10 @@ setup() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + create_ns ${NSA} ${NSA_LO_IP}/32 ${NSA_LO_IP6}/128 create_ns ${NSB} ${NSB_LO_IP}/32 ${NSB_LO_IP6}/128 connect_ns ${NSA} ${NSA_DEV} ${NSA_IP}/24 ${NSA_IP6}/64 \ @@ -545,6 +551,10 @@ setup_lla_only() log_debug "Configuring network namespaces" set -e + setup_ns NSA NSB NSC + NSA_CMD="ip netns exec ${NSA}" + NSB_CMD="ip netns exec ${NSB}" + NSC_CMD="ip netns exec ${NSC}" create_ns ${NSA} "-" "-" create_ns ${NSB} "-" "-" create_ns ${NSC} "-" "-" @@ -839,14 +849,14 @@ ipv4_ping() set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_vrf } @@ -2057,12 +2067,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_vrf } @@ -2525,14 +2535,14 @@ ipv6_ping() setup ipv6_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_vrf } diff --git a/tools/testing/selftests/net/fdb_flush.sh b/tools/testing/selftests/net/fdb_flush.sh index 90e7a29e0476..d5e3abb8658c 100755 --- a/tools/testing/selftests/net/fdb_flush.sh +++ b/tools/testing/selftests/net/fdb_flush.sh @@ -5,6 +5,8 @@ # Check that flush works as expected with all the supported arguments and verify # some combinations of arguments. +source lib.sh + FLUSH_BY_STATE_TESTS=" vxlan_test_flush_by_permanent vxlan_test_flush_by_nopermanent @@ -739,10 +741,9 @@ bridge_vxlan_test_flush() setup() { - IP="ip -netns ns1" - BRIDGE="bridge -netns ns1" - - ip netns add ns1 + setup_ns NS + IP="ip -netns ${NS}" + BRIDGE="bridge -netns ${NS}" $IP link add name vx10 type vxlan id 1000 dstport "$VXPORT" $IP link add name vx20 type vxlan id 2000 dstport "$VXPORT" @@ -759,7 +760,7 @@ cleanup() $IP link del dev vx20 $IP link del dev vx10 - ip netns del ns1 + cleanup_ns ${NS} } ################################################################################ diff --git a/tools/testing/selftests/net/fib-onlink-tests.sh b/tools/testing/selftests/net/fib-onlink-tests.sh index c287b90b8af8..ec2d6ceb1f08 100755 --- a/tools/testing/selftests/net/fib-onlink-tests.sh +++ b/tools/testing/selftests/net/fib-onlink-tests.sh @@ -3,6 +3,7 @@ # IPv4 and IPv6 onlink tests +source lib.sh PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} VERBOSE=0 @@ -74,9 +75,6 @@ TEST_NET4IN6[2]=10.2.1.254 # mcast address MCAST6=ff02::1 - -PEER_NS=bart -PEER_CMD="ip netns exec ${PEER_NS}" VRF=lisa VRF_TABLE=1101 PBR_TABLE=101 @@ -176,8 +174,7 @@ setup() set -e # create namespace - ip netns add ${PEER_NS} - ip -netns ${PEER_NS} li set lo up + setup_ns PEER_NS # add vrf table ip li add ${VRF} type vrf table ${VRF_TABLE} @@ -219,7 +216,7 @@ setup() cleanup() { # make sure we start from a clean slate - ip netns del ${PEER_NS} 2>/dev/null + cleanup_ns ${PEER_NS} 2>/dev/null for n in 1 3 5 7; do ip link del ${NETIFS[p${n}]} 2>/dev/null done diff --git a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh index 51df5e305855..e85248609af4 100755 --- a/tools/testing/selftests/net/fib_nexthop_multiprefix.sh +++ b/tools/testing/selftests/net/fib_nexthop_multiprefix.sh @@ -12,6 +12,7 @@ # # routing in h0 to hN is done with nexthop objects. +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -72,12 +73,6 @@ create_ns() { local ns=${1} - ip netns del ${ns} 2>/dev/null - - ip netns add ${ns} - ip -netns ${ns} addr add 127.0.0.1/8 dev lo - ip -netns ${ns} link set lo up - ip netns exec ${ns} sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 case ${ns} in h*) @@ -97,7 +92,13 @@ setup() #set -e - for ns in h0 r1 h1 h2 h3 + setup_ns h0 r1 h1 h2 h3 + h[0]=$h0 + h[1]=$h1 + h[2]=$h2 + h[3]=$h3 + r[1]=$r1 + for ns in ${h[0]} ${r[1]} ${h[1]} ${h[2]} ${h[3]} do create_ns ${ns} done @@ -108,35 +109,35 @@ setup() for i in 0 1 2 3 do - ip -netns h${i} li add eth0 type veth peer name r1h${i} - ip -netns h${i} li set eth0 up - ip -netns h${i} li set r1h${i} netns r1 name eth${i} up - - ip -netns h${i} addr add dev eth0 172.16.10${i}.1/24 - ip -netns h${i} -6 addr add dev eth0 2001:db8:10${i}::1/64 - ip -netns r1 addr add dev eth${i} 172.16.10${i}.254/24 - ip -netns r1 -6 addr add dev eth${i} 2001:db8:10${i}::64/64 + ip -netns ${h[$i]} li add eth0 type veth peer name r1h${i} + ip -netns ${h[$i]} li set eth0 up + ip -netns ${h[$i]} li set r1h${i} netns ${r[1]} name eth${i} up + + ip -netns ${h[$i]} addr add dev eth0 172.16.10${i}.1/24 + ip -netns ${h[$i]} -6 addr add dev eth0 2001:db8:10${i}::1/64 + ip -netns ${r[1]} addr add dev eth${i} 172.16.10${i}.254/24 + ip -netns ${r[1]} -6 addr add dev eth${i} 2001:db8:10${i}::64/64 done - ip -netns h0 nexthop add id 4 via 172.16.100.254 dev eth0 - ip -netns h0 nexthop add id 6 via 2001:db8:100::64 dev eth0 + ip -netns ${h[0]} nexthop add id 4 via 172.16.100.254 dev eth0 + ip -netns ${h[0]} nexthop add id 6 via 2001:db8:100::64 dev eth0 - # routing from h0 to h1-h3 and back + # routing from ${h[0]} to h1-h3 and back for i in 1 2 3 do - ip -netns h0 ro add 172.16.10${i}.0/24 nhid 4 - ip -netns h${i} ro add 172.16.100.0/24 via 172.16.10${i}.254 + ip -netns ${h[0]} ro add 172.16.10${i}.0/24 nhid 4 + ip -netns ${h[$i]} ro add 172.16.100.0/24 via 172.16.10${i}.254 - ip -netns h0 -6 ro add 2001:db8:10${i}::/64 nhid 6 - ip -netns h${i} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 + ip -netns ${h[0]} -6 ro add 2001:db8:10${i}::/64 nhid 6 + ip -netns ${h[$i]} -6 ro add 2001:db8:100::/64 via 2001:db8:10${i}::64 done if [ "$VERBOSE" = "1" ]; then echo echo "host 1 config" - ip -netns h0 li sh - ip -netns h0 ro sh - ip -netns h0 -6 ro sh + ip -netns ${h[0]} li sh + ip -netns ${h[0]} ro sh + ip -netns ${h[0]} -6 ro sh fi #set +e @@ -144,10 +145,7 @@ setup() cleanup() { - for n in h0 r1 h1 h2 h3 - do - ip netns del ${n} 2>/dev/null - done + cleanup_all_ns } change_mtu() @@ -156,7 +154,7 @@ change_mtu() local mtu=$2 run_cmd ip -netns h${hostid} li set eth0 mtu ${mtu} - run_cmd ip -netns r1 li set eth${hostid} mtu ${mtu} + run_cmd ip -netns ${r1} li set eth${hostid} mtu ${mtu} } ################################################################################ @@ -168,23 +166,23 @@ validate_v4_exception() local mtu=$2 local ping_sz=$3 local dst="172.16.10${i}.1" - local h0=172.16.100.1 - local r1=172.16.100.254 + local h0_ip=172.16.100.1 + local r1_ip=172.16.100.254 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ping -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ping -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 ro get ${dst} + ip -netns ${h0} ro get ${dst} echo "Searching for:" echo " cache .* mtu ${mtu}" echo fi - ip -netns h0 ro get ${dst} | \ + ip -netns ${h0} ro get ${dst} | \ grep -q "cache .* mtu ${mtu}" rc=$? @@ -197,24 +195,24 @@ validate_v6_exception() local mtu=$2 local ping_sz=$3 local dst="2001:db8:10${i}::1" - local h0=2001:db8:100::1 - local r1=2001:db8:100::64 + local h0_ip=2001:db8:100::1 + local r1_ip=2001:db8:100::64 local rc if [ ${ping_sz} != "0" ]; then - run_cmd ip netns exec h0 ${ping6} -s ${ping_sz} -c5 -w5 ${dst} + run_cmd ip netns exec ${h0} ${ping6} -s ${ping_sz} -c5 -w5 ${dst} fi if [ "$VERBOSE" = "1" ]; then echo "Route get" - ip -netns h0 -6 ro get ${dst} + ip -netns ${h0} -6 ro get ${dst} echo "Searching for:" - echo " ${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + echo " ${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" echo fi - ip -netns h0 -6 ro get ${dst} | \ - grep -q "${dst} from :: via ${r1} dev eth0 src ${h0} .* mtu ${mtu}" + ip -netns ${h0} -6 ro get ${dst} | \ + grep -q "${dst}.* via ${r1_ip} dev eth0 src ${h0_ip} .* mtu ${mtu}" rc=$? log_test $rc 0 "IPv6: host 0 to host ${i}, mtu ${mtu}" @@ -242,11 +240,11 @@ for i in 1 2 3 do # generate a cached route per-cpu for c in ${cpus}; do - run_cmd taskset -c ${c} ip netns exec h0 ping -c1 -w1 172.16.10${i}.1 - [ $? -ne 0 ] && printf "\nERROR: ping to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ping -c1 -w1 172.16.10${i}.1 + [ $? -ne 0 ] && printf "\nERROR: ping to ${h[$i]} failed\n" && ret=1 - run_cmd taskset -c ${c} ip netns exec h0 ${ping6} -c1 -w1 2001:db8:10${i}::1 - [ $? -ne 0 ] && printf "\nERROR: ping6 to h${i} failed\n" && ret=1 + run_cmd taskset -c ${c} ip netns exec ${h0} ${ping6} -c1 -w1 2001:db8:10${i}::1 + [ $? -ne 0 ] && printf "\nERROR: ping6 to ${h[$i]} failed\n" && ret=1 [ $ret -ne 0 ] && break done @@ -282,11 +280,11 @@ if [ $ret -eq 0 ]; then validate_v6_exception 3 1400 0 # targeted deletes to trigger cleanup paths in kernel - ip -netns h0 ro del 172.16.102.0/24 nhid 4 - ip -netns h0 -6 ro del 2001:db8:102::/64 nhid 6 + ip -netns ${h0} ro del 172.16.102.0/24 nhid 4 + ip -netns ${h0} -6 ro del 2001:db8:102::/64 nhid 6 - ip -netns h0 nexthop del id 4 - ip -netns h0 nexthop del id 6 + ip -netns ${h0} nexthop del id 4 + ip -netns ${h0} nexthop del id 6 fi cleanup diff --git a/tools/testing/selftests/net/fib_nexthop_nongw.sh b/tools/testing/selftests/net/fib_nexthop_nongw.sh index b7b928b38ce4..1ccf56f10171 100755 --- a/tools/testing/selftests/net/fib_nexthop_nongw.sh +++ b/tools/testing/selftests/net/fib_nexthop_nongw.sh @@ -8,6 +8,7 @@ # veth0 <---|---> veth1 # Validate source address selection for route without gateway +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -64,35 +65,31 @@ run_cmd() # config setup() { - ip netns add h1 - ip -n h1 link set lo up - ip netns add h2 - ip -n h2 link set lo up + setup_ns h1 h2 # Add a fake eth0 to support an ip address - ip -n h1 link add name eth0 type dummy - ip -n h1 link set eth0 up - ip -n h1 address add 192.168.0.1/24 dev eth0 + ip -n $h1 link add name eth0 type dummy + ip -n $h1 link set eth0 up + ip -n $h1 address add 192.168.0.1/24 dev eth0 # Configure veths (same @mac, arp off) - ip -n h1 link add name veth0 type veth peer name veth1 netns h2 - ip -n h1 link set veth0 up + ip -n $h1 link add name veth0 type veth peer name veth1 netns $h2 + ip -n $h1 link set veth0 up - ip -n h2 link set veth1 up + ip -n $h2 link set veth1 up # Configure @IP in the peer netns - ip -n h2 address add 192.168.1.1/32 dev veth1 - ip -n h2 route add default dev veth1 + ip -n $h2 address add 192.168.1.1/32 dev veth1 + ip -n $h2 route add default dev veth1 # Add a nexthop without @gw and use it in a route - ip -n h1 nexthop add id 1 dev veth0 - ip -n h1 route add 192.168.1.1 nhid 1 + ip -n $h1 nexthop add id 1 dev veth0 + ip -n $h1 route add 192.168.1.1 nhid 1 } cleanup() { - ip netns del h1 2>/dev/null - ip netns del h2 2>/dev/null + cleanup_ns $h1 $h2 } trap cleanup EXIT @@ -108,12 +105,11 @@ do esac done -cleanup setup -run_cmd ip -netns h1 route get 192.168.1.1 +run_cmd ip -netns $h1 route get 192.168.1.1 log_test $? 0 "nexthop: get route with nexthop without gw" -run_cmd ip netns exec h1 ping -c1 192.168.1.1 +run_cmd ip netns exec $h1 ping -c1 192.168.1.1 log_test $? 0 "nexthop: ping through nexthop without gw" exit $ret diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh index a6f2c0b9555d..ac0b2c6a5761 100755 --- a/tools/testing/selftests/net/fib_nexthops.sh +++ b/tools/testing/selftests/net/fib_nexthops.sh @@ -14,6 +14,7 @@ # objects. Device reference counts and network namespace cleanup tested # by use of network namespace for peer. +source lib.sh ret=0 # Kselftest framework requirement - SKIP code is 4. ksft_skip=4 @@ -148,13 +149,7 @@ create_ns() { local n=${1} - ip netns del ${n} 2>/dev/null - set -e - ip netns add ${n} - ip netns set ${n} $((nsid++)) - ip -netns ${n} addr add 127.0.0.1/8 dev lo - ip -netns ${n} link set lo up ip netns exec ${n} sysctl -qw net.ipv4.ip_forward=1 ip netns exec ${n} sysctl -qw net.ipv4.fib_multipath_use_neigh=1 @@ -173,12 +168,13 @@ setup() { cleanup - create_ns me - create_ns peer - create_ns remote + setup_ns me peer remote + create_ns $me + create_ns $peer + create_ns $remote - IP="ip -netns me" - BRIDGE="bridge -netns me" + IP="ip -netns $me" + BRIDGE="bridge -netns $me" set -e $IP li add veth1 type veth peer name veth2 $IP li set veth1 up @@ -190,24 +186,24 @@ setup() $IP addr add 172.16.2.1/24 dev veth3 $IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad - $IP li set veth2 netns peer up - ip -netns peer addr add 172.16.1.2/24 dev veth2 - ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad + $IP li set veth2 netns $peer up + ip -netns $peer addr add 172.16.1.2/24 dev veth2 + ip -netns $peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad - $IP li set veth4 netns peer up - ip -netns peer addr add 172.16.2.2/24 dev veth4 - ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad + $IP li set veth4 netns $peer up + ip -netns $peer addr add 172.16.2.2/24 dev veth4 + ip -netns $peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad - ip -netns remote li add veth5 type veth peer name veth6 - ip -netns remote li set veth5 up - ip -netns remote addr add dev veth5 172.16.101.1/24 - ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad - ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2 - ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 + ip -netns $remote li add veth5 type veth peer name veth6 + ip -netns $remote li set veth5 up + ip -netns $remote addr add dev veth5 172.16.101.1/24 + ip -netns $remote -6 addr add dev veth5 2001:db8:101::1/64 nodad + ip -netns $remote ro add 172.16.0.0/22 via 172.16.101.2 + ip -netns $remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2 - ip -netns remote li set veth6 netns peer up - ip -netns peer addr add dev veth6 172.16.101.2/24 - ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad + ip -netns $remote li set veth6 netns $peer up + ip -netns $peer addr add dev veth6 172.16.101.2/24 + ip -netns $peer -6 addr add dev veth6 2001:db8:101::2/64 nodad set +e } @@ -215,7 +211,7 @@ cleanup() { local ns - for ns in me peer remote; do + for ns in $me $peer $remote; do ip netns del ${ns} 2>/dev/null done } @@ -779,7 +775,7 @@ ipv6_grp_refs() run_cmd "$IP route add 2001:db8:101::1/128 nhid 102" # create per-cpu dsts through nh 100 - run_cmd "ip netns exec me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" + run_cmd "ip netns exec $me mausezahn -6 veth1.10 -B 2001:db8:101::1 -A 2001:db8:91::1 -c 5 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1" # remove nh 100 from the group to delete the route potentially leaving # a stale per-cpu dst which holds a reference to the nexthop's net @@ -805,7 +801,7 @@ ipv6_grp_refs() # if a reference was lost this command will hang because the net device # cannot be removed - timeout -s KILL 5 ip netns exec me ip link del veth1.10 >/dev/null 2>&1 + timeout -s KILL 5 ip netns exec $me ip link del veth1.10 >/dev/null 2>&1 # we can't cleanup if the command is hung trying to delete the netdev if [ $? -eq 137 ]; then @@ -1012,13 +1008,13 @@ ipv6_fcnal_runtime() log_test $? 0 "Route delete" run_cmd "$IP ro add 2001:db8:101::1/128 nhid 81" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping with nexthop" run_cmd "$IP nexthop add id 82 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 122 group 81/82" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - multipath" # @@ -1026,26 +1022,26 @@ ipv6_fcnal_runtime() # run_cmd "$IP -6 nexthop add id 83 blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 83 via 2001:db8:91::2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP -6 nexthop replace id 83 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 83" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 81/82" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1128,15 +1124,15 @@ ipv6_fcnal_runtime() # rpfilter and default route $IP nexthop flush >/dev/null 2>&1 - run_cmd "ip netns exec me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" + run_cmd "ip netns exec $me ip6tables -t mangle -I PREROUTING 1 -m rpfilter --invert -j DROP" run_cmd "$IP nexthop add id 91 via 2001:db8:91::2 dev veth1" run_cmd "$IP nexthop add id 92 via 2001:db8:92::2 dev veth3" run_cmd "$IP nexthop add id 93 group 91/92" run_cmd "$IP -6 ro add default nhid 91" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with default route and rpfilter" run_cmd "$IP -6 ro replace default nhid 93" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 2001:db8:101::1" log_test $? 0 "Nexthop with multipath default route and rpfilter" # TO-DO: @@ -1216,11 +1212,11 @@ ipv6_torture() pid1=$! ipv6_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn -6 veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -1270,11 +1266,11 @@ ipv6_res_torture() pid1=$! ipv6_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 & + ip netns exec $me ping -f 2001:db8:101::2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn -6 veth1 \ + ip netns exec $me mausezahn -6 veth1 \ -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -1544,7 +1540,7 @@ ipv4_withv6_fcnal() local lladdr set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 11 via ${lladdr} dev veth1" set +e run_cmd "$IP ro add 172.16.101.1/32 nhid 11" @@ -1606,13 +1602,13 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop replace id 21 via 172.16.1.2 dev veth1" run_cmd "$IP ro replace 172.16.101.1/32 nhid 21" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Basic ping" run_cmd "$IP nexthop replace id 22 via 172.16.2.2 dev veth3" run_cmd "$IP nexthop add id 122 group 21/22" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multipath" run_cmd "$IP ro delete 172.16.101.1/32 nhid 122" @@ -1623,7 +1619,7 @@ ipv4_fcnal_runtime() run_cmd "$IP nexthop add id 501 via 172.16.1.2 dev veth1" run_cmd "$IP ro add default nhid 501" run_cmd "$IP ro add default via 172.16.1.3 dev veth1 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh first" # flip the order @@ -1632,7 +1628,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro add default via 172.16.1.2 dev veth1 metric 20" run_cmd "$IP nexthop replace id 501 via 172.16.1.3 dev veth1" run_cmd "$IP ro add default nhid 501 metric 20" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - multiple default routes, nh second" run_cmd "$IP nexthop delete nhid 501" @@ -1643,26 +1639,26 @@ ipv4_fcnal_runtime() # run_cmd "$IP nexthop add id 23 blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - blackhole" run_cmd "$IP nexthop replace id 23 via 172.16.1.2 dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - blackhole replaced with gateway" run_cmd "$IP nexthop replace id 23 blackhole" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - gateway replaced by blackhole" run_cmd "$IP ro replace 172.16.101.1/32 nhid 122" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" if [ $? -eq 0 ]; then run_cmd "$IP nexthop replace id 122 group 23" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 2 "Ping - group with blackhole" run_cmd "$IP nexthop replace id 122 group 21/22" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "Ping - group blackhole replaced with gateways" else log_test 2 0 "Ping - multipath failed" @@ -1685,11 +1681,11 @@ ipv4_fcnal_runtime() # IPv4 with IPv6 # set -e - lladdr=$(get_linklocal veth2 peer) + lladdr=$(get_linklocal veth2 $peer) run_cmd "$IP nexthop add id 24 via ${lladdr} dev veth1" set +e run_cmd "$IP ro replace 172.16.101.1/32 nhid 24" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1713,11 +1709,11 @@ ipv4_fcnal_runtime() check_route "172.16.101.1" "172.16.101.1 nhid 101 nexthop via inet6 ${lladdr} dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv6 nexthop with IPv4 route" run_cmd "$IP ro replace 172.16.101.1/32 via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 route with IPv6 gateway" $IP neigh sh | grep -q "${lladdr} dev veth1" @@ -1734,7 +1730,7 @@ ipv4_fcnal_runtime() run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1" run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1" - run_cmd "ip netns exec me ping -c1 -w$PING_TIMEOUT 172.16.101.1" + run_cmd "ip netns exec $me ping -c1 -w$PING_TIMEOUT 172.16.101.1" log_test $? 0 "IPv4 default route with IPv6 gateway" # @@ -1785,7 +1781,7 @@ sysctl_nexthop_compat_mode_check() local sysctlname="net.ipv4.nexthop_compat_mode" local lprefix=$1 - IPE="ip netns exec me" + IPE="ip netns exec $me" $IPE sysctl -q $sysctlname 2>&1 >/dev/null if [ $? -ne 0 ]; then @@ -1804,7 +1800,7 @@ sysctl_nexthop_compat_mode_set() local mode=$1 local lprefix=$2 - IPE="ip netns exec me" + IPE="ip netns exec $me" out=$($IPE sysctl -w $sysctlname=$mode) log_test $? 0 "$lprefix set compat mode - $mode" @@ -1988,11 +1984,11 @@ ipv4_torture() pid1=$! ipv4_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & + ip netns exec $me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! sleep 300 @@ -2042,11 +2038,11 @@ ipv4_res_torture() pid1=$! ipv4_res_grp_replace_loop & pid2=$! - ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.1 >/dev/null 2>&1 & pid3=$! - ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 & + ip netns exec $me ping -f 172.16.101.2 >/dev/null 2>&1 & pid4=$! - ip netns exec me mausezahn veth1 \ + ip netns exec $me mausezahn veth1 \ -B 172.16.101.2 -A 172.16.1.1 -c 0 \ -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 & pid5=$! @@ -2070,6 +2066,12 @@ basic() run_cmd "$IP nexthop get id 1" log_test $? 2 "Nexthop get on non-existent id" + run_cmd "$IP nexthop del id 1" + log_test $? 2 "Nexthop del with non-existent id" + + run_cmd "$IP nexthop del id 1 group 1/2/3/4/5/6/7/8" + log_test $? 2 "Nexthop del with non-existent id and extra attributes" + # attempt to create nh without a device or gw - fails run_cmd "$IP nexthop add id 1" log_test $? 2 "Nexthop with no device or gateway" @@ -2081,10 +2083,10 @@ basic() # create nh with linkdown device - fails $IP li set veth1 up - ip -netns peer li set veth2 down + ip -netns $peer li set veth2 down run_cmd "$IP nexthop add id 1 dev veth1" log_test $? 2 "Nexthop with device that is linkdown" - ip -netns peer li set veth2 up + ip -netns $peer li set veth2 up # device only run_cmd "$IP nexthop add id 1 dev veth1" @@ -2465,7 +2467,7 @@ fi for t in $TESTS do case $t in - none) IP="ip -netns peer"; setup; exit 0;; + none) IP="ip -netns $peer"; setup; exit 0;; *) setup; $t; cleanup;; esac done diff --git a/tools/testing/selftests/net/fib_rule_tests.sh b/tools/testing/selftests/net/fib_rule_tests.sh index 63c3eaec8d30..7c01f58a20de 100755 --- a/tools/testing/selftests/net/fib_rule_tests.sh +++ b/tools/testing/selftests/net/fib_rule_tests.sh @@ -3,17 +3,13 @@ # This test is for checking IPv4 and IPv6 FIB rules API -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 - PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -IP="ip -netns testns" -IP_PEER="ip -netns peerns" RTABLE=100 RTABLE_PEER=101 +RTABLE_VRF=102 GW_IP4=192.51.100.2 SRC_IP=192.51.100.3 GW_IP6=2001:db8:1::2 @@ -22,7 +18,14 @@ SRC_IP6=2001:db8:1::3 DEV_ADDR=192.51.100.1 DEV_ADDR6=2001:db8:1::1 DEV=dummy0 -TESTS="fib_rule6 fib_rule4 fib_rule6_connect fib_rule4_connect" +TESTS=" + fib_rule6 + fib_rule4 + fib_rule6_connect + fib_rule4_connect + fib_rule6_vrf + fib_rule4_vrf +" SELFTEST_PATH="" @@ -32,13 +35,18 @@ log_test() local expected=$2 local msg="$3" + $IP rule show | grep -q l3mdev + if [ $? -eq 0 ]; then + msg="$msg (VRF)" + fi + if [ ${rc} -eq ${expected} ]; then nsuccess=$((nsuccess+1)) - printf "\n TEST: %-50s [ OK ]\n" "${msg}" + printf "\n TEST: %-60s [ OK ]\n" "${msg}" else ret=1 nfail=$((nfail+1)) - printf "\n TEST: %-50s [FAIL]\n" "${msg}" + printf "\n TEST: %-60s [FAIL]\n" "${msg}" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" @@ -84,8 +92,8 @@ check_nettest() setup() { set -e - ip netns add testns - $IP link set dev lo up + setup_ns testns + IP="ip -netns $testns" $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -98,18 +106,19 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del testns + cleanup_ns $testns } setup_peer() { set -e - ip netns add peerns + setup_ns peerns + IP_PEER="ip -netns $peerns" $IP_PEER link set dev lo up - ip link add name veth0 netns testns type veth \ - peer name veth1 netns peerns + ip link add name veth0 netns $testns type veth \ + peer name veth1 netns $peerns $IP link set dev veth0 up $IP_PEER link set dev veth1 up @@ -131,7 +140,18 @@ setup_peer() cleanup_peer() { $IP link del dev veth0 - ip netns del peerns + ip netns del $peerns +} + +setup_vrf() +{ + $IP link add name vrf0 up type vrf table $RTABLE_VRF + $IP link set dev $DEV master vrf0 +} + +cleanup_vrf() +{ + $IP link del dev vrf0 } fib_check_iproute_support() @@ -252,6 +272,13 @@ fib_rule6_test() fi } +fib_rule6_vrf_test() +{ + setup_vrf + fib_rule6_test + cleanup_vrf +} + # Verify that the IPV6_TCLASS option of UDPv6 and TCPv6 sockets is properly # taken into account when connecting the socket and when sending packets. fib_rule6_connect_test() @@ -270,11 +297,11 @@ fib_rule6_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -6 -B -t 5 -N testns -O peerns -U -D \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -U -D \ -Q "${dsfield}" -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield udp connect (dsfield ${dsfield})" - nettest -q -6 -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -6 -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 2001:db8::1:11 -r 2001:db8::1:11 log_test $? 0 "rule6 dsfield tcp connect (dsfield ${dsfield})" done @@ -337,11 +364,11 @@ fib_rule4_test() # need enable forwarding and disable rp_filter temporarily as all the # addresses are in the same subnet and egress device == ingress device. - ip netns exec testns sysctl -qw net.ipv4.ip_forward=1 - ip netns exec testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $testns sysctl -qw net.ipv4.conf.$DEV.rp_filter=0 match="from $SRC_IP iif $DEV" fib_rule4_test_match_n_redirect "$match" "$match" "iif redirect to table" - ip netns exec testns sysctl -qw net.ipv4.ip_forward=0 + ip netns exec $testns sysctl -qw net.ipv4.ip_forward=0 # Reject dsfield (tos) options which have ECN bits set for cnt in $(seq 1 3); do @@ -389,6 +416,13 @@ fib_rule4_test() fi } +fib_rule4_vrf_test() +{ + setup_vrf + fib_rule4_test + cleanup_vrf +} + # Verify that the IP_TOS option of UDPv4 and TCPv4 sockets is properly taken # into account when connecting the socket and when sending packets. fib_rule4_connect_test() @@ -407,11 +441,11 @@ fib_rule4_connect_test() # (Not-ECT: 0, ECT(1): 1, ECT(0): 2, CE: 3). # The ECN bits shouldn't influence the result of the test. for dsfield in 0x04 0x05 0x06 0x07; do - nettest -q -B -t 5 -N testns -O peerns -D -U -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -D -U -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield udp connect (dsfield ${dsfield})" - nettest -q -B -t 5 -N testns -O peerns -Q "${dsfield}" \ + nettest -q -B -t 5 -N $testns -O $peerns -Q "${dsfield}" \ -l 198.51.100.11 -r 198.51.100.11 log_test $? 0 "rule4 dsfield tcp connect (dsfield ${dsfield})" done @@ -471,6 +505,8 @@ do fib_rule4_test|fib_rule4) fib_rule4_test;; fib_rule6_connect_test|fib_rule6_connect) fib_rule6_connect_test;; fib_rule4_connect_test|fib_rule4_connect) fib_rule4_connect_test;; + fib_rule6_vrf_test|fib_rule6_vrf) fib_rule6_vrf_test;; + fib_rule4_vrf_test|fib_rule4_vrf) fib_rule4_vrf_test;; help) echo "Test names: $TESTS"; exit 0;; diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index 66d0db7a2614..73895711cdf4 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -3,10 +3,8 @@ # This test is for checking IPv4 and IPv6 FIB behavior in response to # different events. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ @@ -18,8 +16,6 @@ TESTS="unregister down carrier nexthop suppress ipv6_notify ipv4_notify \ VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="$(which ip) -netns ns1" -NS_EXEC="$(which ip) netns exec ns1" which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping) @@ -55,11 +51,11 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up - ip netns exec ns1 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns1 + IP="$(which ip) -netns $ns1" + NS_EXEC="$(which ip) netns exec $ns1" + ip netns exec $ns1 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP link add dummy0 type dummy $IP link set dev dummy0 up @@ -72,8 +68,7 @@ setup() cleanup() { $IP link del dev dummy0 &> /dev/null - ip netns del ns1 &> /dev/null - ip netns del ns2 &> /dev/null + cleanup_ns $ns1 $ns2 } get_linklocal() @@ -448,28 +443,25 @@ fib_rp_filter_test() setup set -e - ip netns add ns2 - ip netns set ns2 auto - - ip -netns ns2 link set dev lo up + setup_ns ns2 $IP link add name veth1 type veth peer name veth2 - $IP link set dev veth2 netns ns2 + $IP link set dev veth2 netns $ns2 $IP address add 192.0.2.1/24 dev veth1 - ip -netns ns2 address add 192.0.2.1/24 dev veth2 + ip -netns $ns2 address add 192.0.2.1/24 dev veth2 $IP link set dev veth1 up - ip -netns ns2 link set dev veth2 up + ip -netns $ns2 link set dev veth2 up $IP link set dev lo address 52:54:00:6a:c7:5e $IP link set dev veth1 address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev lo address 52:54:00:6a:c7:5e - ip -netns ns2 link set dev veth2 address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev lo address 52:54:00:6a:c7:5e + ip -netns $ns2 link set dev veth2 address 52:54:00:6a:c7:5e # 1. (ns2) redirect lo's egress to veth2's egress - ip netns exec ns2 tc qdisc add dev lo parent root handle 1: fq_codel - ip netns exec ns2 tc filter add dev lo parent 1: protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev lo parent root handle 1: fq_codel + ip netns exec $ns2 tc filter add dev lo parent 1: protocol arp basic \ action mirred egress redirect dev veth2 - ip netns exec ns2 tc filter add dev lo parent 1: protocol ip basic \ + ip netns exec $ns2 tc filter add dev lo parent 1: protocol ip basic \ action mirred egress redirect dev veth2 # 2. (ns1) redirect veth1's ingress to lo's ingress @@ -487,24 +479,24 @@ fib_rp_filter_test() action mirred egress redirect dev veth1 # 4. (ns2) redirect veth2's ingress to lo's ingress - ip netns exec ns2 tc qdisc add dev veth2 ingress - ip netns exec ns2 tc filter add dev veth2 ingress protocol arp basic \ + ip netns exec $ns2 tc qdisc add dev veth2 ingress + ip netns exec $ns2 tc filter add dev veth2 ingress protocol arp basic \ action mirred ingress redirect dev lo - ip netns exec ns2 tc filter add dev veth2 ingress protocol ip basic \ + ip netns exec $ns2 tc filter add dev veth2 ingress protocol ip basic \ action mirred ingress redirect dev lo $NS_EXEC sysctl -qw net.ipv4.conf.all.rp_filter=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.accept_local=1 $NS_EXEC sysctl -qw net.ipv4.conf.all.route_localnet=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 - ip netns exec ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.accept_local=1 + ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.route_localnet=1 set +e - run_cmd "ip netns exec ns2 ping -w1 -c1 192.0.2.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 192.0.2.1" log_test $? 0 "rp_filter passes local packets" - run_cmd "ip netns exec ns2 ping -w1 -c1 127.0.0.1" + run_cmd "ip netns exec $ns2 ping -w1 -c1 127.0.0.1" log_test $? 0 "rp_filter passes loopback packets" cleanup @@ -751,6 +743,43 @@ fib_notify_test() cleanup &> /dev/null } +# Create a new dummy_10 to remove all associated routes. +reset_dummy_10() +{ + $IP link del dev dummy_10 + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 +} + +check_rt_num() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + echo "FAIL: Expected $expected routes, got $num" + ret=1 + else + ret=0 + fi +} + +check_rt_num_clean() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + log_test 1 0 "expected $expected routes, got $num" + set +e + cleanup &> /dev/null + return 1 + fi + return 0 +} + fib6_gc_test() { setup @@ -759,7 +788,8 @@ fib6_gc_test() echo "Fib6 garbage collection test" set -e - EXPIRE=3 + EXPIRE=5 + GC_WAIT_TIME=$((EXPIRE * 2 + 2)) # Check expiration of routes every $EXPIRE seconds (GC) $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE @@ -771,44 +801,110 @@ fib6_gc_test() $NS_EXEC sysctl -wq net.ipv6.route.flush=1 # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP" - ret=1 - else - ret=0 - fi + sleep $GC_WAIT_TIME + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection" + + reset_dummy_10 # Permanent routes - for i in $(seq 1 5000); do + for i in $(seq 1 5); do $IP -6 route add 2001:30::$i \ via 2001:10::2 dev dummy_10 done # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires," \ - "got $N_EXP_SLEEP (5000 permanent routes)" - ret=1 - else - ret=0 + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" + + reset_dummy_10 + + # Permanent routes + for i in $(seq 1 5); do + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + # Replace with temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with expires)" + + reset_dummy_10 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Replace with permanent routes + for i in $(seq 1 5); do + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return + + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + + # ra6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return fi - set +e + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 - log_test $ret 0 "ipv6 route garbage collection" + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a route from veth2 to veth1. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. You syould make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return + + # Wait for GC + sleep $GC_WAIT_TIME + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (RA message)" + + set +e cleanup &> /dev/null } @@ -959,34 +1055,32 @@ route_setup() [ "${VERBOSE}" = "1" ] && set -x set -e - ip netns add ns2 - ip netns set ns2 auto - ip -netns ns2 link set dev lo up - ip netns exec ns2 sysctl -qw net.ipv4.ip_forward=1 - ip netns exec ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 + setup_ns ns2 + ip netns exec $ns2 sysctl -qw net.ipv4.ip_forward=1 + ip netns exec $ns2 sysctl -qw net.ipv6.conf.all.forwarding=1 $IP li add veth1 type veth peer name veth2 $IP li add veth3 type veth peer name veth4 $IP li set veth1 up $IP li set veth3 up - $IP li set veth2 netns ns2 up - $IP li set veth4 netns ns2 up - ip -netns ns2 li add dummy1 type dummy - ip -netns ns2 li set dummy1 up + $IP li set veth2 netns $ns2 up + $IP li set veth4 netns $ns2 up + ip -netns $ns2 li add dummy1 type dummy + ip -netns $ns2 li set dummy1 up $IP -6 addr add 2001:db8:101::1/64 dev veth1 nodad $IP -6 addr add 2001:db8:103::1/64 dev veth3 nodad $IP addr add 172.16.101.1/24 dev veth1 $IP addr add 172.16.103.1/24 dev veth3 - ip -netns ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad - ip -netns ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad - ip -netns ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad + ip -netns $ns2 -6 addr add 2001:db8:101::2/64 dev veth2 nodad + ip -netns $ns2 -6 addr add 2001:db8:103::2/64 dev veth4 nodad + ip -netns $ns2 -6 addr add 2001:db8:104::1/64 dev dummy1 nodad - ip -netns ns2 addr add 172.16.101.2/24 dev veth2 - ip -netns ns2 addr add 172.16.103.2/24 dev veth4 - ip -netns ns2 addr add 172.16.104.1/24 dev dummy1 + ip -netns $ns2 addr add 172.16.101.2/24 dev veth2 + ip -netns $ns2 addr add 172.16.103.2/24 dev veth4 + ip -netns $ns2 addr add 172.16.104.1/24 dev dummy1 set +e } @@ -1238,7 +1332,7 @@ ipv6_addr_metric_test() log_test $rc 0 "Modify metric of address" # verify prefix route removed on down - run_cmd "ip netns exec ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" + run_cmd "ip netns exec $ns1 sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1" run_cmd "$IP li set dev dummy2 down" rc=$? if [ $rc -eq 0 ]; then @@ -1344,7 +1438,7 @@ ipv6_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP -6 ro add 2001:db8:104::/64 via 2001:db8:101::2 mtu 1300 - run_cmd "ip netns exec ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" + run_cmd "ip netns exec $ns1 ${ping6} -w1 -c1 -s 1500 2001:db8:104::1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP -6 ro add 2001:db8:114::/64 via 2001:db8:101::2 congctl lock foo" @@ -1599,19 +1693,19 @@ ipv4_rt_replace() ipv4_local_rt_cache() { run_cmd "ip addr add 10.0.0.1/32 dev lo" - run_cmd "ip netns add test-ns" + run_cmd "setup_ns test-ns" run_cmd "ip link add veth-outside type veth peer name veth-inside" run_cmd "ip link add vrf-100 type vrf table 1100" run_cmd "ip link set veth-outside master vrf-100" - run_cmd "ip link set veth-inside netns test-ns" + run_cmd "ip link set veth-inside netns $test-ns" run_cmd "ip link set veth-outside up" run_cmd "ip link set vrf-100 up" run_cmd "ip route add 10.1.1.1/32 dev veth-outside table 1100" - run_cmd "ip netns exec test-ns ip link set veth-inside up" - run_cmd "ip netns exec test-ns ip addr add 10.1.1.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add 10.0.0.1/32 dev veth-inside" - run_cmd "ip netns exec test-ns ip route add default via 10.0.0.1" - run_cmd "ip netns exec test-ns ping 10.0.0.1 -c 1 -i 1" + run_cmd "ip netns exec $test-ns ip link set veth-inside up" + run_cmd "ip netns exec $test-ns ip addr add 10.1.1.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add 10.0.0.1/32 dev veth-inside" + run_cmd "ip netns exec $test-ns ip route add default via 10.0.0.1" + run_cmd "ip netns exec $test-ns ping 10.0.0.1 -c 1 -i 1" run_cmd "ip link delete vrf-100" # if we do not hang test is a success @@ -1841,7 +1935,7 @@ ipv4_route_metrics_test() log_test $rc 0 "Multipath route with mtu metric" $IP ro add 172.16.104.0/24 via 172.16.101.2 mtu 1300 - run_cmd "ip netns exec ns1 ping -w1 -c1 -s 1500 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 -s 1500 172.16.104.1" log_test $? 0 "Using route with mtu metric" run_cmd "$IP ro add 172.16.111.0/24 via 172.16.101.2 congctl lock foo" @@ -2105,7 +2199,7 @@ ipv4_route_v6_gw_test() check_route "172.16.104.0/24 via inet6 2001:db8:101::2 dev veth1" fi - run_cmd "ip netns exec ns1 ping -w1 -c1 172.16.104.1" + run_cmd "ip netns exec $ns1 ping -w1 -c1 172.16.104.1" log_test $rc 0 "Single path route with IPv6 gateway - ping" run_cmd "$IP ro del 172.16.104.0/24 via inet6 2001:db8:101::2" @@ -2196,7 +2290,7 @@ ipv4_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP4-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP4-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2254,7 +2348,7 @@ ipv6_mangle_test() sleep 2 local tmp_file=$(mktemp) - ip netns exec ns2 socat UDP6-LISTEN:54321,fork $tmp_file & + ip netns exec $ns2 socat UDP6-LISTEN:54321,fork $tmp_file & # Add a FIB rule and a route that will direct our connection to the # listening server. @@ -2423,37 +2517,37 @@ ipv4_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 address add 172.16.201.1/24 dev nh1" - run_cmd "ip -n ns2 address add 172.16.202.1/24 dev nh2" - run_cmd "ip -n ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 route add 203.0.113.0/24 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 address add 172.16.201.1/24 dev nh1" + run_cmd "ip -n $ns2 address add 172.16.202.1/24 dev nh2" + run_cmd "ip -n $ns2 neigh add 172.16.201.2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 neigh add 172.16.202.2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 route add 203.0.113.0/24 nexthop via 172.16.201.2 nexthop via 172.16.202.2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.veth2.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.all.rp_filter=0" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv4.conf.default.rp_filter=0" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn veth1 -a own -b $dmac -A 172.16.101.1 -B 203.0.113.1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib:fib_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff @@ -2471,34 +2565,34 @@ ipv6_mpath_list_test() route_setup set -e - run_cmd "ip netns exec ns1 ethtool -K veth1 tcp-segmentation-offload off" - - run_cmd "ip netns exec ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" - run_cmd "ip netns exec ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" - run_cmd "ip netns exec ns2 ethtool -K veth2 generic-receive-offload on" - run_cmd "ip -n ns2 link add name nh1 up type dummy" - run_cmd "ip -n ns2 link add name nh2 up type dummy" - run_cmd "ip -n ns2 -6 address add 2001:db8:201::1/64 dev nh1" - run_cmd "ip -n ns2 -6 address add 2001:db8:202::1/64 dev nh2" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" - run_cmd "ip -n ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" - run_cmd "ip -n ns2 -6 route add 2001:db8:301::/64 + run_cmd "ip netns exec $ns1 ethtool -K veth1 tcp-segmentation-offload off" + + run_cmd "ip netns exec $ns2 bash -c \"echo 20000 > /sys/class/net/veth2/gro_flush_timeout\"" + run_cmd "ip netns exec $ns2 bash -c \"echo 1 > /sys/class/net/veth2/napi_defer_hard_irqs\"" + run_cmd "ip netns exec $ns2 ethtool -K veth2 generic-receive-offload on" + run_cmd "ip -n $ns2 link add name nh1 up type dummy" + run_cmd "ip -n $ns2 link add name nh2 up type dummy" + run_cmd "ip -n $ns2 -6 address add 2001:db8:201::1/64 dev nh1" + run_cmd "ip -n $ns2 -6 address add 2001:db8:202::1/64 dev nh2" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:201::2 lladdr 00:11:22:33:44:55 nud perm dev nh1" + run_cmd "ip -n $ns2 -6 neigh add 2001:db8:202::2 lladdr 00:aa:bb:cc:dd:ee nud perm dev nh2" + run_cmd "ip -n $ns2 -6 route add 2001:db8:301::/64 nexthop via 2001:db8:201::2 nexthop via 2001:db8:202::2" - run_cmd "ip netns exec ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" + run_cmd "ip netns exec $ns2 sysctl -qw net.ipv6.fib_multipath_hash_policy=1" set +e - local dmac=$(ip -n ns2 -j link show dev veth2 | jq -r '.[]["address"]') + local dmac=$(ip -n $ns2 -j link show dev veth2 | jq -r '.[]["address"]') local tmp_file=$(mktemp) - local cmd="ip netns exec ns1 mausezahn -6 veth1 -a own -b $dmac + local cmd="ip netns exec $ns1 mausezahn -6 veth1 -a own -b $dmac -A 2001:db8:101::1 -B 2001:db8:301::1 -t udp 'sp=12345,dp=0-65535' -q" # Packets forwarded in a list using a multipath route must not reuse a # cached result so that a flow always hits the same nexthop. In other # words, the FIB lookup tracepoint needs to be triggered for every # packet. - local t0_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t0_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) run_cmd "perf stat -a -e fib6:fib6_table_lookup --filter 'err == 0' -j -o $tmp_file -- $cmd" - local t1_rx_pkts=$(link_stats_get ns2 veth2 rx packets) + local t1_rx_pkts=$(link_stats_get $ns2 veth2 rx packets) local diff=$(echo $t1_rx_pkts - $t0_rx_pkts | bc -l) list_rcv_eval $tmp_file $diff diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index df593b7b3e6b..fa7b59ff4029 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -15,17 +15,12 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ bridge_vlan_unaware.sh \ custom_multipath_hash.sh \ dual_vxlan_bridge.sh \ - ethtool_extended_state.sh \ - ethtool_mm.sh \ - ethtool.sh \ gre_custom_multipath_hash.sh \ gre_inner_v4_multipath.sh \ gre_inner_v6_multipath.sh \ gre_multipath_nh_res.sh \ gre_multipath_nh.sh \ gre_multipath.sh \ - hw_stats_l3.sh \ - hw_stats_l3_gre.sh \ ip6_forward_instats_vrf.sh \ ip6gre_custom_multipath_hash.sh \ ip6gre_flat_key.sh \ @@ -42,8 +37,8 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ ipip_hier_gre_key.sh \ ipip_hier_gre_keys.sh \ ipip_hier_gre.sh \ + lib_sh_test.sh \ local_termination.sh \ - loopback.sh \ mirror_gre_bound.sh \ mirror_gre_bridge_1d.sh \ mirror_gre_bridge_1d_vlan.sh \ @@ -111,8 +106,7 @@ TEST_PROGS = bridge_fdb_learning_limit.sh \ vxlan_symmetric_ipv6.sh \ vxlan_symmetric.sh -TEST_PROGS_EXTENDED := devlink_lib.sh \ - ethtool_lib.sh \ +TEST_FILES := devlink_lib.sh \ fib_offload_lib.sh \ forwarding.config.sample \ ip6gre_lib.sh \ @@ -122,10 +116,14 @@ TEST_PROGS_EXTENDED := devlink_lib.sh \ mirror_gre_topo_lib.sh \ mirror_lib.sh \ mirror_topo_lib.sh \ + router_mpath_nh_lib.sh \ sch_ets_core.sh \ sch_ets_tests.sh \ sch_tbf_core.sh \ sch_tbf_etsprio.sh \ tc_common.sh +TEST_INCLUDES := \ + ../lib.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/README b/tools/testing/selftests/net/forwarding/README index b8a2af8fcfb7..7fdb6a9ca543 100644 --- a/tools/testing/selftests/net/forwarding/README +++ b/tools/testing/selftests/net/forwarding/README @@ -56,3 +56,36 @@ o Checks shall be added to lib.sh for any external dependencies. o Code shall be checked using ShellCheck [1] prior to submission. 1. https://www.shellcheck.net/ + +Customization +============= + +The forwarding selftests framework uses a number of variables that +influence its behavior and tools it invokes, and how it invokes them, in +various ways. A number of these variables can be overridden. The way these +overridable variables are specified is typically one of the following two +syntaxes: + + : "${VARIABLE:=default_value}" + VARIABLE=${VARIABLE:=default_value} + +Any of these variables can be overridden. Notably net/forwarding/lib.sh and +net/lib.sh contain a number of overridable variables. + +One way of overriding these variables is through the environment: + + PAUSE_ON_FAIL=yes ./some_test.sh + +The variable NETIFS is special. Since it is an array variable, there is no +way to pass it through the environment. Its value can instead be given as +consecutive arguments to the selftest: + + ./some_test.sh swp{1..8} + +A way to customize variables in a persistent fashion is to create a file +named forwarding.config in this directory. lib.sh sources the file if +present, so it can contain any shell code. Typically it will contain +assignments of variables whose value should be overridden. + +forwarding.config.sample is available in the directory as an example of +how forwarding.config might look. diff --git a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh index 9af9f6964808..c62331b2e006 100755 --- a/tools/testing/selftests/net/forwarding/bridge_locked_port.sh +++ b/tools/testing/selftests/net/forwarding/bridge_locked_port.sh @@ -327,10 +327,10 @@ locked_port_mab_redirect() RET=0 check_port_mab_support || return 0 - bridge link set dev $swp1 learning on locked on mab on tc qdisc add dev $swp1 clsact tc filter add dev $swp1 ingress protocol all pref 1 handle 101 flower \ action mirred egress redirect dev $swp2 + bridge link set dev $swp1 learning on locked on mab on ping_do $h1 192.0.2.2 check_err $? "Ping did not work with redirection" @@ -349,8 +349,8 @@ locked_port_mab_redirect() check_err $? "Locked entry not created after deleting filter" bridge fdb del `mac_get $h1` vlan 1 dev $swp1 master - tc qdisc del dev $swp1 clsact bridge link set dev $swp1 learning off locked off mab off + tc qdisc del dev $swp1 clsact log_test "Locked port MAB redirect" } diff --git a/tools/testing/selftests/net/forwarding/bridge_mdb.sh b/tools/testing/selftests/net/forwarding/bridge_mdb.sh index e4e3e9405056..d9d587454d20 100755 --- a/tools/testing/selftests/net/forwarding/bridge_mdb.sh +++ b/tools/testing/selftests/net/forwarding/bridge_mdb.sh @@ -329,7 +329,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) \"permanent\" entry has a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_err $? "\"permanent\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -346,7 +346,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_fail $? "(*, G) EXCLUDE entry does not have a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_err $? "\"blocked\" source entry has a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -363,7 +363,7 @@ __cfg_test_port_ip_star_g() bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q " 0.00" check_err $? "(*, G) INCLUDE entry has a pending group timer" - bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "\/0.00" + bridge -d -s mdb get dev br0 grp $grp vid 10 | grep -q "/0.00" check_fail $? "Source entry does not have a pending source timer" bridge mdb del dev br0 port $swp1 grp $grp vid 10 @@ -803,11 +803,198 @@ cfg_test_dump() cfg_test_dump_common "L2" l2_grps_get } +# Check flush functionality with different parameters. +cfg_test_flush() +{ + local num_entries + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different port. + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.2 vid 10 + + # Different VLAN ID. + bridge mdb add dev br0 port $swp1 grp 239.1.1.3 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.4 vid 20 + + # Different routing protocol. + bridge mdb add dev br0 port $swp1 grp 239.1.1.5 vid 10 proto bgp + bridge mdb add dev br0 port $swp1 grp 239.1.1.6 vid 10 proto zebra + + # Different state. + bridge mdb add dev br0 port $swp1 grp 239.1.1.7 vid 10 permanent + bridge mdb add dev br0 port $swp1 grp 239.1.1.8 vid 10 temp + + bridge mdb flush dev br0 + num_entries=$(bridge mdb show dev br0 | wc -l) + [[ $num_entries -eq 0 ]] + check_err $? 0 "Not all entries flushed after flush all" + + # Check that when flushing by port only entries programmed with the + # specified port are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 port $swp1 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong port" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong port" + + bridge mdb flush dev br0 port br0 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_fail $? "Host entry not flushed by specified port" + + bridge mdb flush dev br0 + + # Check that when flushing by VLAN ID only entries programmed with the + # specified VLAN ID are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 20 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 20 + + bridge mdb flush dev br0 vid 10 + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "Entry not flushed by specified VLAN ID" + bridge mdb get dev br0 grp 239.1.1.1 vid 20 &> /dev/null + check_err $? "Entry flushed by wrong VLAN ID" + + bridge mdb flush dev br0 + + # Check that all permanent entries are flushed when "permanent" is + # specified and that temporary entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by \"permanent\" state" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 + + # Check that all temporary entries are flushed when "nopermanent" is + # specified and that permanent entries are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 permanent vid 10 + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_err $? "Entry flushed by wrong state (\"nopermanent\")" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_fail $? "Entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that L2 host entries are not flushed when "nopermanent" is + # specified, but flushed when "permanent" is specified. + + bridge mdb add dev br0 port br0 grp 01:02:03:04:05:06 permanent vid 10 + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_err $? "L2 host entry flushed by wrong state (\"nopermanent\")" + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 01:02:03:04:05:06 vid 10 &> /dev/null + check_fail $? "L2 host entry not flushed by \"permanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv4 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp 239.1.1.1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_err $? "IPv4 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 &> /dev/null + check_fail $? "IPv4 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that IPv6 host entries are not flushed when "permanent" is + # specified, but flushed when "nopermanent" is specified. + + bridge mdb add dev br0 port br0 grp ff0e::1 temp vid 10 + + bridge mdb flush dev br0 permanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_err $? "IPv6 host entry flushed by wrong state (\"permanent\")" + + bridge mdb flush dev br0 nopermanent + + bridge mdb get dev br0 grp ff0e::1 vid 10 &> /dev/null + check_fail $? "IPv6 host entry not flushed by \"nopermanent\" state" + + bridge mdb flush dev br0 + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + bridge mdb add dev br0 port $swp1 grp 239.1.1.1 vid 10 proto bgp + bridge mdb add dev br0 port $swp2 grp 239.1.1.1 vid 10 proto zebra + bridge mdb add dev br0 port br0 grp 239.1.1.1 vid 10 + + bridge mdb flush dev br0 proto bgp + + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp1" + check_fail $? "Entry not flushed by specified routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port $swp2" + check_err $? "Entry flushed by wrong routing protocol" + bridge mdb get dev br0 grp 239.1.1.1 vid 10 | grep -q "port br0" + check_err $? "Host entry flushed by wrong routing protocol" + + bridge mdb flush dev br0 + + # Test that an error is returned when trying to flush using unsupported + # parameters. + + bridge mdb flush dev br0 src_vni 10 &> /dev/null + check_fail $? "Managed to flush by source VNI" + + bridge mdb flush dev br0 dst 198.51.100.1 &> /dev/null + check_fail $? "Managed to flush by destination IP" + + bridge mdb flush dev br0 dst_port 4789 &> /dev/null + check_fail $? "Managed to flush by UDP destination port" + + bridge mdb flush dev br0 vni 10 &> /dev/null + check_fail $? "Managed to flush by destination VNI" + + log_test "Flush tests" +} + cfg_test() { cfg_test_host cfg_test_port cfg_test_dump + cfg_test_flush } __fwd_test_host_ip() @@ -1065,14 +1252,17 @@ fwd_test() echo log_info "# Forwarding tests" + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br0 type bridge mcast_query_response_interval 100 + # Forwarding according to MDB entries only takes place when the bridge # detects that there is a valid querier in the network. Set the bridge # as the querier and assign it a valid IPv6 link-local address to be # used as the source address for MLD queries. ip -6 address add fe80::1/64 nodad dev br0 ip link set dev br0 type bridge mcast_querier 1 - # Wait the default Query Response Interval (10 seconds) for the bridge - # to determine that there are no other queriers in the network. sleep 10 fwd_test_host @@ -1080,6 +1270,7 @@ fwd_test() ip link set dev br0 type bridge mcast_querier 0 ip -6 address del fe80::1/64 dev br0 + ip link set dev br0 type bridge mcast_query_response_interval 1000 } ctrl_igmpv3_is_in_test() @@ -1166,8 +1357,8 @@ ctrl_test() ctrl_mldv2_is_in_test } -if ! bridge mdb help 2>&1 | grep -q "get"; then - echo "SKIP: iproute2 too old, missing bridge mdb get support" +if ! bridge mdb help 2>&1 | grep -q "flush"; then + echo "SKIP: iproute2 too old, missing bridge mdb flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278b..8d7a1a004b7c 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh index 56eb83d1a3bd..1783c10215e5 100755 --- a/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/custom_multipath_hash.sh @@ -183,42 +183,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:4::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:4::2-2001:db8:4::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -234,14 +234,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:4::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/ethtool.sh b/tools/testing/selftests/net/forwarding/ethtool.sh deleted file mode 100755 index aa2eafb7b243..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool.sh +++ /dev/null @@ -1,301 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - same_speeds_autoneg_off - different_speeds_autoneg_off - combination_of_neg_on_and_off - advertise_subset_of_speeds - check_highest_speed_is_chosen - different_speeds_autoneg_on -" -NUM_NETIFS=2 -source lib.sh -source ethtool_lib.sh - -h1_create() -{ - simple_if_init $h1 192.0.2.1/24 -} - -h1_destroy() -{ - simple_if_fini $h1 192.0.2.1/24 -} - -h2_create() -{ - simple_if_init $h2 192.0.2.2/24 -} - -h2_destroy() -{ - simple_if_fini $h2 192.0.2.2/24 -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - h1_create - h2_create -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy -} - -same_speeds_autoneg_off() -{ - # Check that when each of the reported speeds is forced, the links come - # up and are operational. - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 0)) - - for speed in "${speeds_arr[@]}"; do - RET=0 - ethtool_set $h1 speed $speed autoneg off - ethtool_set $h2 speed $speed autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "speed $speed autoneg off" - log_test "force of same speed autoneg off" - log_info "speed = $speed" - done - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -different_speeds_autoneg_off() -{ - # Test that when we force different speeds, links are not up and ping - # fails. - RET=0 - - local -a speeds_arr=($(different_speeds_get $h1 $h2 0 0)) - local speed1=${speeds_arr[0]} - local speed2=${speeds_arr[1]} - - ethtool_set $h1 speed $speed1 autoneg off - ethtool_set $h2 speed $speed2 autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_fail $? "ping with different speeds" - - log_test "force of different speeds autoneg off" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -combination_of_neg_on_and_off() -{ - # Test that when one device is forced to a speed supported by both - # endpoints and the other device is configured to autoneg on, the links - # are up and ping passes. - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) - - for speed in "${speeds_arr[@]}"; do - RET=0 - ethtool_set $h1 speed $speed autoneg off - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "h1-speed=$speed autoneg off, h2 autoneg on" - log_test "one side with autoneg off and another with autoneg on" - log_info "force speed = $speed" - done - - ethtool -s $h1 autoneg on -} - -hex_speed_value_get() -{ - local speed=$1; shift - - local shift_size=${speed_values[$speed]} - speed=$((0x1 << $"shift_size")) - printf "%#x" "$speed" -} - -subset_of_common_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local adver=$1; shift - - local -a speeds_arr=($(common_speeds_get $dev1 $dev2 0 $adver)) - local speed_to_advertise=0 - local speed_to_remove=${speeds_arr[0]} - speed_to_remove+='base' - - local -a speeds_mode_arr=($(common_speeds_get $dev1 $dev2 1 $adver)) - - for speed in ${speeds_mode_arr[@]}; do - if [[ $speed != $speed_to_remove* ]]; then - speed=$(hex_speed_value_get $speed) - speed_to_advertise=$(($speed_to_advertise | \ - $speed)) - fi - - done - - # Convert to hex. - printf "%#x" "$speed_to_advertise" -} - -speed_to_advertise_get() -{ - # The function returns the hex number that is composed by OR-ing all - # the modes corresponding to the provided speed. - local speed_without_mode=$1; shift - local supported_speeds=("$@"); shift - local speed_to_advertise=0 - - speed_without_mode+='base' - - for speed in ${supported_speeds[@]}; do - if [[ $speed == $speed_without_mode* ]]; then - speed=$(hex_speed_value_get $speed) - speed_to_advertise=$(($speed_to_advertise | \ - $speed)) - fi - - done - - # Convert to hex. - printf "%#x" "$speed_to_advertise" -} - -advertise_subset_of_speeds() -{ - # Test that when one device advertises a subset of speeds and another - # advertises a specific speed (but all modes of this speed), the links - # are up and ping passes. - RET=0 - - local speed_1_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) - ethtool_set $h1 advertise $speed_1_to_advertise - - if [ $RET != 0 ]; then - log_test "advertise subset of speeds" - return - fi - - local -a speeds_arr_without_mode=($(common_speeds_get $h1 $h2 0 1)) - # Check only speeds that h1 advertised. Remove the first speed. - unset speeds_arr_without_mode[0] - local -a speeds_arr_with_mode=($(common_speeds_get $h1 $h2 1 1)) - - for speed_value in ${speeds_arr_without_mode[@]}; do - RET=0 - local speed_2_to_advertise=$(speed_to_advertise_get $speed_value \ - "${speeds_arr_with_mode[@]}") - ethtool_set $h2 advertise $speed_2_to_advertise - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_err $? "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise ($speed_value)" - - log_test "advertise subset of speeds" - log_info "h1=$speed_1_to_advertise, h2=$speed_2_to_advertise" - done - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -check_highest_speed_is_chosen() -{ - # Test that when one device advertises a subset of speeds, the other - # chooses the highest speed. This test checks configuration without - # traffic. - RET=0 - - local max_speed - local chosen_speed - local speed_to_advertise=$(subset_of_common_speeds_get $h1 $h2 1) - - ethtool_set $h1 advertise $speed_to_advertise - - if [ $RET != 0 ]; then - log_test "check highest speed" - return - fi - - local -a speeds_arr=($(common_speeds_get $h1 $h2 0 1)) - - max_speed=${speeds_arr[0]} - for current in ${speeds_arr[@]}; do - if [[ $current -gt $max_speed ]]; then - max_speed=$current - fi - done - - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - chosen_speed=$(ethtool $h1 | grep 'Speed:') - chosen_speed=${chosen_speed%"Mb/s"*} - chosen_speed=${chosen_speed#*"Speed: "} - ((chosen_speed == max_speed)) - check_err $? "h1 advertise $speed_to_advertise, h2 sync to speed $chosen_speed" - - log_test "check highest speed" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -different_speeds_autoneg_on() -{ - # Test that when we configure links to advertise different speeds, - # links are not up and ping fails. - RET=0 - - local -a speeds=($(different_speeds_get $h1 $h2 1 1)) - local speed1=${speeds[0]} - local speed2=${speeds[1]} - - speed1=$(hex_speed_value_get $speed1) - speed2=$(hex_speed_value_get $speed2) - - ethtool_set $h1 advertise $speed1 - ethtool_set $h2 advertise $speed2 - - if (($RET)); then - setup_wait_dev_with_timeout $h1 - setup_wait_dev_with_timeout $h2 - ping_do $h1 192.0.2.2 - check_fail $? "ping with different speeds autoneg on" - fi - - log_test "advertise different speeds autoneg on" - - ethtool -s $h2 autoneg on - ethtool -s $h1 autoneg on -} - -skip_on_veth - -trap cleanup EXIT - -setup_prepare -setup_wait - -declare -gA speed_values -eval "speed_values=($(speeds_arr_get))" - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh b/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh deleted file mode 100755 index 17f89c3b7c02..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_extended_state.sh +++ /dev/null @@ -1,117 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - autoneg - autoneg_force_mode - no_cable -" - -NUM_NETIFS=2 -source lib.sh -source ethtool_lib.sh - -TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms - -setup_prepare() -{ - swp1=${NETIFS[p1]} - swp2=${NETIFS[p2]} - swp3=$NETIF_NO_CABLE -} - -ethtool_ext_state() -{ - local dev=$1; shift - local expected_ext_state=$1; shift - local expected_ext_substate=${1:-""}; shift - - local ext_state=$(ethtool $dev | grep "Link detected" \ - | cut -d "(" -f2 | cut -d ")" -f1) - local ext_substate=$(echo $ext_state | cut -sd "," -f2 \ - | sed -e 's/^[[:space:]]*//') - ext_state=$(echo $ext_state | cut -d "," -f1) - - if [[ $ext_state != $expected_ext_state ]]; then - echo "Expected \"$expected_ext_state\", got \"$ext_state\"" - return 1 - fi - if [[ $ext_substate != $expected_ext_substate ]]; then - echo "Expected \"$expected_ext_substate\", got \"$ext_substate\"" - return 1 - fi -} - -autoneg() -{ - local msg - - RET=0 - - ip link set dev $swp1 up - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ - "Autoneg" "No partner detected") - check_err $? "$msg" - - log_test "Autoneg, No partner detected" - - ip link set dev $swp1 down -} - -autoneg_force_mode() -{ - local msg - - RET=0 - - ip link set dev $swp1 up - ip link set dev $swp2 up - - local -a speeds_arr=($(different_speeds_get $swp1 $swp2 0 0)) - local speed1=${speeds_arr[0]} - local speed2=${speeds_arr[1]} - - ethtool_set $swp1 speed $speed1 autoneg off - ethtool_set $swp2 speed $speed2 autoneg off - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp1 \ - "Autoneg" "No partner detected during force mode") - check_err $? "$msg" - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp2 \ - "Autoneg" "No partner detected during force mode") - check_err $? "$msg" - - log_test "Autoneg, No partner detected during force mode" - - ethtool -s $swp2 autoneg on - ethtool -s $swp1 autoneg on - - ip link set dev $swp2 down - ip link set dev $swp1 down -} - -no_cable() -{ - local msg - - RET=0 - - ip link set dev $swp3 up - - msg=$(busywait $TIMEOUT ethtool_ext_state $swp3 "No cable") - check_err $? "$msg" - - log_test "No cable" - - ip link set dev $swp3 down -} - -skip_on_veth - -setup_prepare - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ethtool_lib.sh b/tools/testing/selftests/net/forwarding/ethtool_lib.sh deleted file mode 100644 index b9bfb45085af..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_lib.sh +++ /dev/null @@ -1,120 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -speeds_arr_get() -{ - cmd='/ETHTOOL_LINK_MODE_[^[:space:]]*_BIT[[:space:]]+=[[:space:]]+/ \ - {sub(/,$/, "") \ - sub(/ETHTOOL_LINK_MODE_/,"") \ - sub(/_BIT/,"") \ - sub(/_Full/,"/Full") \ - sub(/_Half/,"/Half");\ - print "["$1"]="$3}' - - awk "${cmd}" /usr/include/linux/ethtool.h -} - -ethtool_set() -{ - local cmd="$@" - local out=$(ethtool -s $cmd 2>&1 | wc -l) - - check_err $out "error in configuration. $cmd" -} - -dev_linkmodes_params_get() -{ - local dev=$1; shift - local adver=$1; shift - local -a linkmodes_params - local param_count - local arr - - if (($adver)); then - mode="Advertised link modes" - else - mode="Supported link modes" - fi - - local -a dev_linkmodes=($(dev_speeds_get $dev 1 $adver)) - for ((i=0; i<${#dev_linkmodes[@]}; i++)); do - linkmodes_params[$i]=$(echo -e "${dev_linkmodes[$i]}" | \ - # Replaces all non numbers with spaces - sed -e 's/[^0-9]/ /g' | \ - # Squeeze spaces in sequence to 1 space - tr -s ' ') - # Count how many numbers were found in the linkmode - param_count=$(echo "${linkmodes_params[$i]}" | wc -w) - if [[ $param_count -eq 1 ]]; then - linkmodes_params[$i]="${linkmodes_params[$i]} 1" - elif [[ $param_count -ge 3 ]]; then - arr=(${linkmodes_params[$i]}) - # Take only first two params - linkmodes_params[$i]=$(echo "${arr[@]:0:2}") - fi - done - echo ${linkmodes_params[@]} -} - -dev_speeds_get() -{ - local dev=$1; shift - local with_mode=$1; shift - local adver=$1; shift - local speeds_str - - if (($adver)); then - mode="Advertised link modes" - else - mode="Supported link modes" - fi - - speeds_str=$(ethtool "$dev" | \ - # Snip everything before the link modes section. - sed -n '/'"$mode"':/,$p' | \ - # Quit processing the rest at the start of the next section. - # When checking, skip the header of this section (hence the 2,). - sed -n '2,${/^[\t][^ \t]/q};p' | \ - # Drop the section header of the current section. - cut -d':' -f2) - - local -a speeds_arr=($speeds_str) - if [[ $with_mode -eq 0 ]]; then - for ((i=0; i<${#speeds_arr[@]}; i++)); do - speeds_arr[$i]=${speeds_arr[$i]%base*} - done - fi - echo ${speeds_arr[@]} -} - -common_speeds_get() -{ - dev1=$1; shift - dev2=$1; shift - with_mode=$1; shift - adver=$1; shift - - local -a dev1_speeds=($(dev_speeds_get $dev1 $with_mode $adver)) - local -a dev2_speeds=($(dev_speeds_get $dev2 $with_mode $adver)) - - comm -12 \ - <(printf '%s\n' "${dev1_speeds[@]}" | sort -u) \ - <(printf '%s\n' "${dev2_speeds[@]}" | sort -u) -} - -different_speeds_get() -{ - local dev1=$1; shift - local dev2=$1; shift - local with_mode=$1; shift - local adver=$1; shift - - local -a speeds_arr - - speeds_arr=($(common_speeds_get $dev1 $dev2 $with_mode $adver)) - if [[ ${#speeds_arr[@]} < 2 ]]; then - check_err 1 "cannot check different speeds. There are not enough speeds" - fi - - echo ${speeds_arr[0]} ${speeds_arr[1]} -} diff --git a/tools/testing/selftests/net/forwarding/ethtool_mm.sh b/tools/testing/selftests/net/forwarding/ethtool_mm.sh deleted file mode 100755 index 39e736f30322..000000000000 --- a/tools/testing/selftests/net/forwarding/ethtool_mm.sh +++ /dev/null @@ -1,296 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -ALL_TESTS=" - manual_with_verification_h1_to_h2 - manual_with_verification_h2_to_h1 - manual_without_verification_h1_to_h2 - manual_without_verification_h2_to_h1 - manual_failed_verification_h1_to_h2 - manual_failed_verification_h2_to_h1 - lldp -" - -NUM_NETIFS=2 -REQUIRE_MZ=no -PREEMPTIBLE_PRIO=0 -source lib.sh - -traffic_test() -{ - local if=$1; shift - local src=$1; shift - local num_pkts=10000 - local before= - local after= - local delta= - - before=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) - - $MZ $if -q -c $num_pkts -p 64 -b bcast -t ip -R $PREEMPTIBLE_PRIO - - after=$(ethtool_std_stats_get $if "eth-mac" "FramesTransmittedOK" $src) - - delta=$((after - before)) - - # Allow an extra 1% tolerance for random packets sent by the stack - [ $delta -ge $num_pkts ] && [ $delta -le $((num_pkts + 100)) ] -} - -manual_with_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - # It isn't completely clear from IEEE 802.3-2018 Figure 99-5: Transmit - # Processing state diagram whether the "send_r" variable (send response - # to verification frame) should be taken into consideration while the - # MAC Merge TX direction is disabled. That being said, at least the - # NXP ENETC does not, and requires tx-enabled on in order to respond to - # the link partner's verification frames. - ethtool --set-mm $rx tx-enabled on - ethtool --set-mm $tx verify-enabled on tx-enabled on - - # Wait for verification to finish - sleep 1 - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'SUCCEEDED' - check_err "$?" "Verification did not succeed" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_err "$?" "pMAC TX is not active" - - traffic_test $tx "pmac" - check_err "$?" "Traffic did not get sent through $tx's pMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - ethtool --set-mm $rx tx-enabled off - - log_test "Manual configuration with verification: $tx to $rx" -} - -manual_with_verification_h1_to_h2() -{ - manual_with_verification $h1 $h2 -} - -manual_with_verification_h2_to_h1() -{ - manual_with_verification $h2 $h1 -} - -manual_without_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - ethtool --set-mm $tx verify-enabled off tx-enabled on - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'DISABLED' - check_err "$?" "Verification is not disabled" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_err "$?" "pMAC TX is not active" - - traffic_test $tx "pmac" - check_err "$?" "Traffic did not get sent through $tx's pMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - - log_test "Manual configuration without verification: $tx to $rx" -} - -manual_without_verification_h1_to_h2() -{ - manual_without_verification $h1 $h2 -} - -manual_without_verification_h2_to_h1() -{ - manual_without_verification $h2 $h1 -} - -manual_failed_verification() -{ - local tx=$1; shift - local rx=$1; shift - - RET=0 - - ethtool --set-mm $rx pmac-enabled off - ethtool --set-mm $tx verify-enabled on tx-enabled on - - # Wait for verification to time out - sleep 1 - - ethtool --json --show-mm $tx | jq -r '.[]."verify-status"' | \ - grep -q 'SUCCEEDED' - check_fail "$?" "Verification succeeded when it shouldn't have" - - ethtool --json --show-mm $tx | jq -r '.[]."tx-active"' | grep -q 'true' - check_fail "$?" "pMAC TX is active when it shouldn't have" - - traffic_test $tx "emac" - check_err "$?" "Traffic did not get sent through $tx's eMAC" - - ethtool --set-mm $tx verify-enabled off tx-enabled off - ethtool --set-mm $rx pmac-enabled on - - log_test "Manual configuration with failed verification: $tx to $rx" -} - -manual_failed_verification_h1_to_h2() -{ - manual_failed_verification $h1 $h2 -} - -manual_failed_verification_h2_to_h1() -{ - manual_failed_verification $h2 $h1 -} - -lldp_change_add_frag_size() -{ - local add_frag_size=$1 - - lldptool -T -i $h1 -V addEthCaps addFragSize=$add_frag_size >/dev/null - # Wait for TLVs to be received - sleep 2 - lldptool -i $h2 -t -n -V addEthCaps | \ - grep -q "Additional fragment size: $add_frag_size" -} - -lldp() -{ - RET=0 - - systemctl start lldpad - - # Configure the interfaces to receive and transmit LLDPDUs - lldptool -L -i $h1 adminStatus=rxtx >/dev/null - lldptool -L -i $h2 adminStatus=rxtx >/dev/null - - # Enable the transmission of Additional Ethernet Capabilities TLV - lldptool -T -i $h1 -V addEthCaps enableTx=yes >/dev/null - lldptool -T -i $h2 -V addEthCaps enableTx=yes >/dev/null - - # Wait for TLVs to be received - sleep 2 - - lldptool -i $h1 -t -n -V addEthCaps | \ - grep -q "Preemption capability active" - check_err "$?" "$h1 pMAC TX is not active" - - lldptool -i $h2 -t -n -V addEthCaps | \ - grep -q "Preemption capability active" - check_err "$?" "$h2 pMAC TX is not active" - - lldp_change_add_frag_size 3 - check_err "$?" "addFragSize 3" - - lldp_change_add_frag_size 2 - check_err "$?" "addFragSize 2" - - lldp_change_add_frag_size 1 - check_err "$?" "addFragSize 1" - - lldp_change_add_frag_size 0 - check_err "$?" "addFragSize 0" - - traffic_test $h1 "pmac" - check_err "$?" "Traffic did not get sent through $h1's pMAC" - - traffic_test $h2 "pmac" - check_err "$?" "Traffic did not get sent through $h2's pMAC" - - systemctl stop lldpad - - log_test "LLDP" -} - -h1_create() -{ - ip link set dev $h1 up - - tc qdisc add dev $h1 root mqprio num_tc 4 map 0 1 2 3 \ - queues 1@0 1@1 1@2 1@3 \ - fp P E E E \ - hw 1 - - ethtool --set-mm $h1 pmac-enabled on tx-enabled off verify-enabled off -} - -h2_create() -{ - ip link set dev $h2 up - - ethtool --set-mm $h2 pmac-enabled on tx-enabled off verify-enabled off - - tc qdisc add dev $h2 root mqprio num_tc 4 map 0 1 2 3 \ - queues 1@0 1@1 1@2 1@3 \ - fp P E E E \ - hw 1 -} - -h1_destroy() -{ - ethtool --set-mm $h1 pmac-enabled off tx-enabled off verify-enabled off - - tc qdisc del dev $h1 root - - ip link set dev $h1 down -} - -h2_destroy() -{ - tc qdisc del dev $h2 root - - ethtool --set-mm $h2 pmac-enabled off tx-enabled off verify-enabled off - - ip link set dev $h2 down -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - h1_create - h2_create -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy -} - -check_ethtool_mm_support -check_tc_fp_support -require_command lldptool -bail_on_lldpad "autoconfigure the MAC Merge layer" "configure it manually" - -for netif in ${NETIFS[@]}; do - ethtool --show-mm $netif 2>&1 &> /dev/null - if [[ $? -ne 0 ]]; then - echo "SKIP: $netif does not support MAC Merge" - exit $ksft_skip - fi -done - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/forwarding.config.sample b/tools/testing/selftests/net/forwarding/forwarding.config.sample index 4a546509de90..f1ca95e79a65 100644 --- a/tools/testing/selftests/net/forwarding/forwarding.config.sample +++ b/tools/testing/selftests/net/forwarding/forwarding.config.sample @@ -3,49 +3,28 @@ ############################################################################## # Topology description. p1 looped back to p2, p3 to p4 and so on. -declare -A NETIFS -NETIFS[p1]=veth0 -NETIFS[p2]=veth1 -NETIFS[p3]=veth2 -NETIFS[p4]=veth3 -NETIFS[p5]=veth4 -NETIFS[p6]=veth5 -NETIFS[p7]=veth6 -NETIFS[p8]=veth7 -NETIFS[p9]=veth8 -NETIFS[p10]=veth9 +NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) # Port that does not have a cable connected. NETIF_NO_CABLE=eth8 ############################################################################## -# Defines +# In addition to the topology-related variables, it is also possible to override +# in this file other variables that net/lib.sh, net/forwarding/lib.sh or other +# libraries or selftests use. E.g.: -# IPv4 ping utility name -PING=ping -# IPv6 ping utility name. Some distributions use 'ping' for IPv6. PING6=ping6 -# Packet generator. Some distributions use 'mz'. MZ=mausezahn -# Time to wait after interfaces participating in the test are all UP WAIT_TIME=5 -# Whether to pause on failure or not. -PAUSE_ON_FAIL=no -# Whether to pause on cleanup or not. -PAUSE_ON_CLEANUP=no -# Type of network interface to create -NETIF_TYPE=veth -# Whether to create virtual interfaces (veth) or not -NETIF_CREATE=yes -# Timeout (in seconds) before ping exits regardless of how many packets have -# been sent or received -PING_TIMEOUT=5 -# Minimum ageing_time (in centiseconds) supported by hardware -LOW_AGEING_TIME=1000 -# Flag for tc match, supposed to be skip_sw/skip_hw which means do not process -# filter by software/hardware -TC_FLAG=skip_hw -# IPv6 traceroute utility name. -TROUTE6=traceroute6 - diff --git a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh index 0446db9c6f74..9788bd0f6e8b 100755 --- a/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/gre_custom_multipath_hash.sh @@ -278,42 +278,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -329,14 +329,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh index e4009f658003..efca6114a3ce 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v4_multipath.sh @@ -267,7 +267,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh index e449475c4d3e..a71ad39fc0c3 100755 --- a/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_inner_v6_multipath.sh @@ -266,9 +266,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath.sh b/tools/testing/selftests/net/forwarding/gre_multipath.sh index a8d8e8b3dc81..57531c1d884d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath.sh @@ -220,7 +220,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh index d03aa2cab9fd..7d5b2b9cc133 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -245,7 +244,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -264,34 +263,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 @@ -300,7 +271,7 @@ multipath6_l4_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -339,14 +310,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh index 088b65e64d66..370f9925302d 100755 --- a/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/gre_multipath_nh_res.sh @@ -64,7 +64,6 @@ ALL_TESTS=" ping_ipv6 multipath_ipv4 multipath_ipv6 - multipath_ipv6_l4 " NUM_NETIFS=6 @@ -248,7 +247,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A 192.0.2.1 -B 192.0.2.18 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -267,35 +266,6 @@ multipath6_test() local weight1=$1; shift local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 0 - ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ - type resilient - - local t0_111=$(tc_rule_stats_get $ul2 111 ingress) - local t0_222=$(tc_rule_stats_get $ul2 222 ingress) - - # Generate 16384 echo requests, each with a random flow label. - for ((i=0; i < 16384; ++i)); do - ip vrf exec v$h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - local t1_111=$(tc_rule_stats_get $ul2 111 ingress) - local t1_222=$(tc_rule_stats_get $ul2 222 ingress) - - local d111=$((t1_111 - t0_111)) - local d222=$((t1_222 - t0_222)) - multipath_eval "$what" $weight1 $weight2 $d111 $d222 - - ip nexthop replace id 103 group 101/102 type resilient - sysctl_restore net.ipv6.fib_multipath_hash_policy -} - -multipath6_l4_test() -{ - local what=$1; shift - local weight1=$1; shift - local weight2=$1; shift - sysctl_set net.ipv6.fib_multipath_hash_policy 1 ip nexthop replace id 103 group 101,$weight1/102,$weight2 \ type resilient @@ -305,7 +275,7 @@ multipath6_l4_test() ip vrf exec v$h1 \ $MZ $h1 -6 -q -p 64 -A 2001:db8:1::1 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" local t1_111=$(tc_rule_stats_get $ul2 111 ingress) local t1_222=$(tc_rule_stats_get $ul2 222 ingress) @@ -344,14 +314,6 @@ multipath_ipv6() multipath6_test "Weighted MP 11:45" 11 45 } -multipath_ipv6_l4() -{ - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 -} - trap cleanup EXIT setup_prepare diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3.sh deleted file mode 100755 index 48584a51388f..000000000000 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3.sh +++ /dev/null @@ -1,340 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# +--------------------+ +----------------------+ -# | H1 | | H2 | -# | | | | -# | $h1.200 + | | + $h2.200 | -# | 192.0.2.1/28 | | | | 192.0.2.18/28 | -# | 2001:db8:1::1/64 | | | | 2001:db8:2::1/64 | -# | | | | | | -# | $h1 + | | + $h2 | -# | | | | | | -# +------------------|-+ +-|--------------------+ -# | | -# +------------------|-------------------------|--------------------+ -# | SW | | | -# | | | | -# | $rp1 + + $rp2 | -# | | | | -# | $rp1.200 + + $rp2.200 | -# | 192.0.2.2/28 192.0.2.17/28 | -# | 2001:db8:1::2/64 2001:db8:2::2/64 | -# | | -# +-----------------------------------------------------------------+ - -ALL_TESTS=" - ping_ipv4 - ping_ipv6 - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - respin_enablement - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - reapply_config - ping_ipv4 - ping_ipv6 - test_stats_rx_ipv4 - test_stats_tx_ipv4 - test_stats_rx_ipv6 - test_stats_tx_ipv6 - test_stats_report_rx - test_stats_report_tx - test_destroy_enabled - test_double_enable -" -NUM_NETIFS=4 -source lib.sh - -h1_create() -{ - simple_if_init $h1 - vlan_create $h1 200 v$h1 192.0.2.1/28 2001:db8:1::1/64 - ip route add 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 - ip -6 route add 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 -} - -h1_destroy() -{ - ip -6 route del 2001:db8:2::/64 vrf v$h1 nexthop via 2001:db8:1::2 - ip route del 192.0.2.16/28 vrf v$h1 nexthop via 192.0.2.2 - vlan_destroy $h1 200 - simple_if_fini $h1 -} - -h2_create() -{ - simple_if_init $h2 - vlan_create $h2 200 v$h2 192.0.2.18/28 2001:db8:2::1/64 - ip route add 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 - ip -6 route add 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 -} - -h2_destroy() -{ - ip -6 route del 2001:db8:1::/64 vrf v$h2 nexthop via 2001:db8:2::2 - ip route del 192.0.2.0/28 vrf v$h2 nexthop via 192.0.2.17 - vlan_destroy $h2 200 - simple_if_fini $h2 -} - -router_rp1_200_create() -{ - ip link add name $rp1.200 link $rp1 type vlan id 200 - ip link set dev $rp1.200 addrgenmode eui64 - ip link set dev $rp1.200 up - ip address add dev $rp1.200 192.0.2.2/28 - ip address add dev $rp1.200 2001:db8:1::2/64 - ip stats set dev $rp1.200 l3_stats on -} - -router_rp1_200_destroy() -{ - ip stats set dev $rp1.200 l3_stats off - ip address del dev $rp1.200 2001:db8:1::2/64 - ip address del dev $rp1.200 192.0.2.2/28 - ip link del dev $rp1.200 -} - -router_create() -{ - ip link set dev $rp1 up - router_rp1_200_create - - ip link set dev $rp2 up - vlan_create $rp2 200 "" 192.0.2.17/28 2001:db8:2::2/64 -} - -router_destroy() -{ - vlan_destroy $rp2 200 - ip link set dev $rp2 down - - router_rp1_200_destroy - ip link set dev $rp1 down -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - rp1=${NETIFS[p2]} - - rp2=${NETIFS[p3]} - h2=${NETIFS[p4]} - - rp1mac=$(mac_get $rp1) - rp2mac=$(mac_get $rp2) - - vrf_prepare - - h1_create - h2_create - - router_create - - forwarding_enable -} - -cleanup() -{ - pre_cleanup - - forwarding_restore - - router_destroy - - h2_destroy - h1_destroy - - vrf_cleanup -} - -ping_ipv4() -{ - ping_test $h1.200 192.0.2.18 " IPv4" -} - -ping_ipv6() -{ - ping_test $h1.200 2001:db8:2::1 " IPv6" -} - -send_packets_rx_ipv4() -{ - # Send 21 packets instead of 20, because the first one might trap and go - # through the SW datapath, which might not bump the HW counter. - $MZ $h1.200 -c 21 -d 20msec -p 100 \ - -a own -b $rp1mac -A 192.0.2.1 -B 192.0.2.18 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_rx_ipv6() -{ - $MZ $h1.200 -6 -c 21 -d 20msec -p 100 \ - -a own -b $rp1mac -A 2001:db8:1::1 -B 2001:db8:2::1 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_tx_ipv4() -{ - $MZ $h2.200 -c 21 -d 20msec -p 100 \ - -a own -b $rp2mac -A 192.0.2.18 -B 192.0.2.1 \ - -q -t udp sp=54321,dp=12345 -} - -send_packets_tx_ipv6() -{ - $MZ $h2.200 -6 -c 21 -d 20msec -p 100 \ - -a own -b $rp2mac -A 2001:db8:2::1 -B 2001:db8:1::1 \ - -q -t udp sp=54321,dp=12345 -} - -___test_stats() -{ - local dir=$1; shift - local prot=$1; shift - - local a - local b - - a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) - send_packets_${dir}_${prot} - "$@" - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $rp1.200 ${dir} packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" -} - -__test_stats() -{ - local dir=$1; shift - local prot=$1; shift - - RET=0 - ___test_stats "$dir" "$prot" - log_test "Test $dir packets: $prot" -} - -test_stats_rx_ipv4() -{ - __test_stats rx ipv4 -} - -test_stats_tx_ipv4() -{ - __test_stats tx ipv4 -} - -test_stats_rx_ipv6() -{ - __test_stats rx ipv6 -} - -test_stats_tx_ipv6() -{ - __test_stats tx ipv6 -} - -# Make sure everything works well even after stats have been disabled and -# reenabled on the same device without touching the L3 configuration. -respin_enablement() -{ - log_info "Turning stats off and on again" - ip stats set dev $rp1.200 l3_stats off - ip stats set dev $rp1.200 l3_stats on -} - -# For the initial run, l3_stats is enabled on a completely set up netdevice. Now -# do it the other way around: enabling the L3 stats on an L2 netdevice, and only -# then apply the L3 configuration. -reapply_config() -{ - log_info "Reapplying configuration" - - router_rp1_200_destroy - - ip link add name $rp1.200 link $rp1 type vlan id 200 - ip link set dev $rp1.200 addrgenmode none - ip stats set dev $rp1.200 l3_stats on - ip link set dev $rp1.200 addrgenmode eui64 - ip link set dev $rp1.200 up - ip address add dev $rp1.200 192.0.2.2/28 - ip address add dev $rp1.200 2001:db8:1::2/64 -} - -__test_stats_report() -{ - local dir=$1; shift - local prot=$1; shift - - local a - local b - - RET=0 - - a=$(hw_stats_get l3_stats $rp1.200 ${dir} packets) - send_packets_${dir}_${prot} - ip address flush dev $rp1.200 - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $rp1.200 ${dir} packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" - log_test "Test ${dir} packets: stats pushed on loss of L3" - - ip stats set dev $rp1.200 l3_stats off - ip link del dev $rp1.200 - router_rp1_200_create -} - -test_stats_report_rx() -{ - __test_stats_report rx ipv4 -} - -test_stats_report_tx() -{ - __test_stats_report tx ipv4 -} - -test_destroy_enabled() -{ - RET=0 - - ip link del dev $rp1.200 - router_rp1_200_create - - log_test "Destroy l3_stats-enabled netdev" -} - -test_double_enable() -{ - RET=0 - ___test_stats rx ipv4 \ - ip stats set dev $rp1.200 l3_stats on - log_test "Test stat retention across a spurious enablement" -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -used=$(ip -j stats show dev $rp1.200 group offload subgroup hw_stats_info | - jq '.[].info.l3_stats.used') -kind=$(ip -j -d link show dev $rp1 | - jq -r '.[].linkinfo.info_kind') -if [[ $used != true ]]; then - if [[ $kind == veth ]]; then - log_test_skip "l3_stats not offloaded on veth interface" - EXIT_STATUS=$ksft_skip - else - RET=1 log_test "l3_stats not offloaded" - fi -else - tests_run -fi - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh b/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh deleted file mode 100755 index 7594bbb49029..000000000000 --- a/tools/testing/selftests/net/forwarding/hw_stats_l3_gre.sh +++ /dev/null @@ -1,111 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Test L3 stats on IP-in-IP GRE tunnel without key. - -# This test uses flat topology for IP tunneling tests. See ipip_lib.sh for more -# details. - -ALL_TESTS=" - ping_ipv4 - test_stats_rx - test_stats_tx -" -NUM_NETIFS=6 -source lib.sh -source ipip_lib.sh - -setup_prepare() -{ - h1=${NETIFS[p1]} - ol1=${NETIFS[p2]} - - ul1=${NETIFS[p3]} - ul2=${NETIFS[p4]} - - ol2=${NETIFS[p5]} - h2=${NETIFS[p6]} - - ol1mac=$(mac_get $ol1) - - forwarding_enable - vrf_prepare - h1_create - h2_create - sw1_flat_create gre $ol1 $ul1 - sw2_flat_create gre $ol2 $ul2 - ip stats set dev g1a l3_stats on - ip stats set dev g2a l3_stats on -} - -cleanup() -{ - pre_cleanup - - ip stats set dev g1a l3_stats off - ip stats set dev g2a l3_stats off - - sw2_flat_destroy $ol2 $ul2 - sw1_flat_destroy $ol1 $ul1 - h2_destroy - h1_destroy - - vrf_cleanup - forwarding_restore -} - -ping_ipv4() -{ - RET=0 - - ping_test $h1 192.0.2.18 " gre flat" -} - -send_packets_ipv4() -{ - # Send 21 packets instead of 20, because the first one might trap and go - # through the SW datapath, which might not bump the HW counter. - $MZ $h1 -c 21 -d 20msec -p 100 \ - -a own -b $ol1mac -A 192.0.2.1 -B 192.0.2.18 \ - -q -t udp sp=54321,dp=12345 -} - -test_stats() -{ - local dev=$1; shift - local dir=$1; shift - - local a - local b - - RET=0 - - a=$(hw_stats_get l3_stats $dev $dir packets) - send_packets_ipv4 - b=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= $a + 20" \ - hw_stats_get l3_stats $dev $dir packets) - check_err $? "Traffic not reflected in the counter: $a -> $b" - - log_test "Test $dir packets: $prot" -} - -test_stats_tx() -{ - test_stats g1a tx -} - -test_stats_rx() -{ - test_stats g2a rx -} - -skip_on_veth - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh index d40183b4eccc..2ab9eaaa5532 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_custom_multipath_hash.sh @@ -280,42 +280,42 @@ send_src_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A "198.51.100.2-198.51.100.253" -B 203.0.113.2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B "203.0.113.2-203.0.113.253" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_src_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp4() { ip vrf exec v$h1 $MZ $h1 -q -p 64 \ -A 198.51.100.2 -B 203.0.113.2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } send_src_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A "2001:db8:1::2-2001:db8:1::fd" -B 2001:db8:2::2 \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_dst_ipv6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B "2001:db8:2::2-2001:db8:2::fd" \ - -d 1msec -c 50 -t udp "sp=20000,dp=30000" + -d $MZ_DELAY -c 50 -t udp "sp=20000,dp=30000" } send_flowlabel() @@ -331,14 +331,14 @@ send_src_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=0-32768,dp=30000" + -d $MZ_DELAY -t udp "sp=0-32768,dp=30000" } send_dst_udp6() { ip vrf exec v$h1 $MZ -6 $h1 -q -p 64 \ -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=20000,dp=0-32768" + -d $MZ_DELAY -t udp "sp=20000,dp=0-32768" } custom_hash_test() diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh index a257979d3fc5..32d1461f37b7 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v4_multipath.sh @@ -266,7 +266,7 @@ multipath4_test() ip vrf exec v$h1 \ $MZ $h1 -q -p 64 -A "192.0.3.2-192.0.3.62" -B "192.0.4.2-192.0.4.62" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh index d208f5243ade..e1a4b50505f5 100755 --- a/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_inner_v6_multipath.sh @@ -265,9 +265,9 @@ multipath6_test() local t0_222=$(tc_rule_stats_get $ul32 222 ingress) ip vrf exec v$h1 \ - $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::1e" \ - -B "2001:db8:2::2-2001:db8:2::1e" \ - -d 1msec -c 50 -t udp "sp=1024,dp=1024" + $MZ $h1 -6 -q -p 64 -A "2001:db8:1::2-2001:db8:1::3e" \ + -B "2001:db8:2::2-2001:db8:2::3e" \ + -d $MZ_DELAY -c 50 -t udp "sp=1024,dp=1024" sleep 1 local t1_111=$(tc_rule_stats_get $ul32 111 ingress) diff --git a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh index 58a3597037b1..24f4ab328bd2 100644 --- a/tools/testing/selftests/net/forwarding/ip6gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/ip6gre_lib.sh @@ -356,7 +356,7 @@ test_traffic_ip4ip6() flower $TC_FLAG dst_ip 203.0.113.1 action pass $MZ $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 198.51.100.1 \ - -B 203.0.113.1 -t ip -q -d 1msec + -B 203.0.113.1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 @@ -389,7 +389,7 @@ test_traffic_ip6ip6() flower $TC_FLAG dst_ip 2001:db8:2::1 action pass $MZ -6 $h1 -c 1000 -p 64 -a $h1mac -b $ol1mac -A 2001:db8:1::1 \ - -B 2001:db8:2::1 -t ip -q -d 1msec + -B 2001:db8:2::1 -t ip -q -d $MZ_DELAY # Check ports after encap and after decap. tc_check_at_least_x_packets "dev $ul1 egress" 101 1000 diff --git a/tools/testing/selftests/net/forwarding/ipip_lib.sh b/tools/testing/selftests/net/forwarding/ipip_lib.sh index 30f36a57bae6..01e62c4ac94d 100644 --- a/tools/testing/selftests/net/forwarding/ipip_lib.sh +++ b/tools/testing/selftests/net/forwarding/ipip_lib.sh @@ -141,7 +141,6 @@ # | $h2 + | # | 192.0.2.18/28 | # +---------------------------+ -source lib.sh h1_create() { diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index e37a15eda6c2..112c85c35092 100755..100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -2,45 +2,141 @@ # SPDX-License-Identifier: GPL-2.0 ############################################################################## +# Topology description. p1 looped back to p2, p3 to p4 and so on. + +declare -A NETIFS=( + [p1]=veth0 + [p2]=veth1 + [p3]=veth2 + [p4]=veth3 + [p5]=veth4 + [p6]=veth5 + [p7]=veth6 + [p8]=veth7 + [p9]=veth8 + [p10]=veth9 +) + +# Port that does not have a cable connected. +: "${NETIF_NO_CABLE:=eth8}" + +############################################################################## # Defines -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -# Can be overridden by the configuration file. -PING=${PING:=ping} -PING6=${PING6:=ping6} -MZ=${MZ:=mausezahn} -ARPING=${ARPING:=arping} -TEAMD=${TEAMD:=teamd} -WAIT_TIME=${WAIT_TIME:=5} -PAUSE_ON_FAIL=${PAUSE_ON_FAIL:=no} -PAUSE_ON_CLEANUP=${PAUSE_ON_CLEANUP:=no} -NETIF_TYPE=${NETIF_TYPE:=veth} -NETIF_CREATE=${NETIF_CREATE:=yes} -MCD=${MCD:=smcrouted} -MC_CLI=${MC_CLI:=smcroutectl} -PING_COUNT=${PING_COUNT:=10} -PING_TIMEOUT=${PING_TIMEOUT:=5} -WAIT_TIMEOUT=${WAIT_TIMEOUT:=20} -INTERFACE_TIMEOUT=${INTERFACE_TIMEOUT:=600} -LOW_AGEING_TIME=${LOW_AGEING_TIME:=1000} -REQUIRE_JQ=${REQUIRE_JQ:=yes} -REQUIRE_MZ=${REQUIRE_MZ:=yes} -REQUIRE_MTOOLS=${REQUIRE_MTOOLS:=no} -STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} -TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} -TROUTE6=${TROUTE6:=traceroute6} - -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." +# Networking utilities. +: "${PING:=ping}" +: "${PING6:=ping6}" # Some distros just use ping. +: "${ARPING:=arping}" +: "${TROUTE6:=traceroute6}" + +# Packet generator. +: "${MZ:=mausezahn}" # Some distributions use 'mz'. +: "${MZ_DELAY:=0}" + +# Host configuration tools. +: "${TEAMD:=teamd}" +: "${MCD:=smcrouted}" +: "${MC_CLI:=smcroutectl}" + +# Constants for netdevice bring-up: +# Default time in seconds to wait for an interface to come up before giving up +# and bailing out. Used during initial setup. +: "${INTERFACE_TIMEOUT:=600}" +# Like INTERFACE_TIMEOUT, but default for ad-hoc waiting in testing scripts. +: "${WAIT_TIMEOUT:=20}" +# Time to wait after interfaces participating in the test are all UP. +: "${WAIT_TIME:=5}" + +# Whether to pause on, respectively, after a failure and before cleanup. +: "${PAUSE_ON_FAIL:=no}" +: "${PAUSE_ON_CLEANUP:=no}" + +# Whether to create virtual interfaces, and what netdevice type they should be. +: "${NETIF_CREATE:=yes}" +: "${NETIF_TYPE:=veth}" + +# Constants for ping tests: +# How many packets should be sent. +: "${PING_COUNT:=10}" +# Timeout (in seconds) before ping exits regardless of how many packets have +# been sent or received +: "${PING_TIMEOUT:=5}" + +# Minimum ageing_time (in centiseconds) supported by hardware +: "${LOW_AGEING_TIME:=1000}" + +# Whether to check for availability of certain tools. +: "${REQUIRE_JQ:=yes}" +: "${REQUIRE_MZ:=yes}" +: "${REQUIRE_MTOOLS:=no}" + +# Whether to override MAC addresses on interfaces participating in the test. +: "${STABLE_MAC_ADDRS:=no}" + +# Flags for tcpdump +: "${TCPDUMP_EXTRA_FLAGS:=}" + +# Flags for TC filters. +: "${TC_FLAG:=skip_hw}" + +# Whether the machine is "slow" -- i.e. might be incapable of running tests +# involving heavy traffic. This might be the case on a debug kernel, a VM, or +# e.g. a low-power board. +: "${KSFT_MACHINE_SLOW:=no}" + +############################################################################## +# Find netifs by test-specified driver name + +driver_name_get() +{ + local dev=$1; shift + local driver_path="/sys/class/net/$dev/device/driver" + + if [[ -L $driver_path ]]; then + basename `realpath $driver_path` + fi +} + +netif_find_driver() +{ + local ifnames=`ip -j link show | jq -r ".[].ifname"` + local count=0 + + for ifname in $ifnames + do + local driver_name=`driver_name_get $ifname` + if [[ ! -z $driver_name && $driver_name == $NETIF_FIND_DRIVER ]]; then + count=$((count + 1)) + NETIFS[p$count]="$ifname" + fi + done +} + +# Whether to find netdevice according to the driver speficied by the importer +: "${NETIF_FIND_DRIVER:=}" + +if [[ $NETIF_FIND_DRIVER ]]; then + unset NETIFS + declare -A NETIFS + netif_find_driver fi -if [[ -f $relative_path/forwarding.config ]]; then - source "$relative_path/forwarding.config" +net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +if [[ -f $net_forwarding_dir/forwarding.config ]]; then + source "$net_forwarding_dir/forwarding.config" fi +source "$net_forwarding_dir/../lib.sh" + +# timeout in seconds +slowwait() +{ + local timeout_sec=$1; shift + + loopy_wait "sleep 0.1" "$((timeout_sec * 1000))" "$@" +} + ############################################################################## # Sanity checks @@ -148,6 +244,24 @@ check_ethtool_mm_support() fi } +check_ethtool_counter_group_support() +{ + ethtool --help 2>&1| grep -- '--all-groups' &> /dev/null + if [[ $? -ne 0 ]]; then + echo "SKIP: ethtool too old; it is missing standard counter group support" + exit $ksft_skip + fi +} + +check_ethtool_pmac_std_stats_support() +{ + local dev=$1; shift + local grp=$1; shift + + [ 0 -ne $(ethtool --json -S $dev --all-groups --src pmac 2>/dev/null \ + | jq ".[].\"$grp\" | length") ] +} + check_locked_port_support() { if ! bridge -d link show | grep -q " locked"; then @@ -164,22 +278,23 @@ check_port_mab_support() fi } -skip_on_veth() +if [[ "$(id -u)" -ne 0 ]]; then + echo "SKIP: need root privileges" + exit $ksft_skip +fi + +check_driver() { - local kind=$(ip -j -d link show dev ${NETIFS[p1]} | - jq -r '.[].linkinfo.info_kind') + local dev=$1; shift + local expected=$1; shift + local driver_name=`driver_name_get $dev` - if [[ $kind == veth ]]; then - echo "SKIP: Test cannot be run with veth pairs" + if [[ $driver_name != $expected ]]; then + echo "SKIP: expected driver $expected for $dev, got $driver_name instead" exit $ksft_skip fi } -if [[ "$(id -u)" -ne 0 ]]; then - echo "SKIP: need root privileges" - exit $ksft_skip -fi - if [[ "$CHECK_TC" = "yes" ]]; then check_tc_version fi @@ -194,6 +309,21 @@ require_command() fi } +# IPv6 support was added in v3.0 +check_mtools_version() +{ + local version="$(msend -v)" + local major + + version=${version##msend version } + major=$(echo $version | cut -d. -f1) + + if [ $major -lt 3 ]; then + echo "SKIP: expected mtools version 3.0, got $version" + exit $ksft_skip + fi +} + if [[ "$REQUIRE_JQ" = "yes" ]]; then require_command jq fi @@ -201,15 +331,10 @@ if [[ "$REQUIRE_MZ" = "yes" ]]; then require_command $MZ fi if [[ "$REQUIRE_MTOOLS" = "yes" ]]; then - # https://github.com/vladimiroltean/mtools/ - # patched for IPv6 support + # https://github.com/troglobit/mtools require_command msend require_command mreceive -fi - -if [[ ! -v NUM_NETIFS ]]; then - echo "SKIP: importer does not define \"NUM_NETIFS\"" - exit $ksft_skip + check_mtools_version fi ############################################################################## @@ -230,6 +355,23 @@ done ############################################################################## # Network interfaces configuration +if [[ ! -v NUM_NETIFS ]]; then + echo "SKIP: importer does not define \"NUM_NETIFS\"" + exit $ksft_skip +fi + +if (( NUM_NETIFS > ${#NETIFS[@]} )); then + echo "SKIP: Importer requires $NUM_NETIFS NETIFS, but only ${#NETIFS[@]} are defined (${NETIFS[@]})" + exit $ksft_skip +fi + +for i in $(seq ${#NETIFS[@]}); do + if [[ ! ${NETIFS[p$i]} ]]; then + echo "SKIP: NETIFS[p$i] not given" + exit $ksft_skip + fi +done + create_netif_veth() { local i @@ -317,14 +459,31 @@ EXIT_STATUS=0 # Per-test return value. Clear at the beginning of each test. RET=0 +ret_set_ksft_status() +{ + local ksft_status=$1; shift + local msg=$1; shift + + RET=$(ksft_status_merge $RET $ksft_status) + if (( $? )); then + retmsg=$msg + fi +} + +# Whether FAILs should be interpreted as XFAILs. Internal. +FAIL_TO_XFAIL= + check_err() { local err=$1 local msg=$2 - if [[ $RET -eq 0 && $err -ne 0 ]]; then - RET=$err - retmsg=$msg + if ((err)); then + if [[ $FAIL_TO_XFAIL = yes ]]; then + ret_set_ksft_status $ksft_xfail "$msg" + else + ret_set_ksft_status $ksft_fail "$msg" + fi fi } @@ -333,10 +492,7 @@ check_fail() local err=$1 local msg=$2 - if [[ $RET -eq 0 && $err -eq 0 ]]; then - RET=1 - retmsg=$msg - fi + check_err $((!err)) "$msg" } check_err_fail() @@ -352,6 +508,85 @@ check_err_fail() fi } +xfail_on_slow() +{ + if [[ $KSFT_MACHINE_SLOW = yes ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +xfail_on_veth() +{ + local dev=$1; shift + local kind + + kind=$(ip -j -d link show dev $dev | + jq -r '.[].linkinfo.info_kind') + if [[ $kind = veth ]]; then + FAIL_TO_XFAIL=yes "$@" + else + "$@" + fi +} + +log_test_result() +{ + local test_name=$1; shift + local opt_str=$1; shift + local result=$1; shift + local retmsg=$1; shift + + printf "TEST: %-60s [%s]\n" "$test_name $opt_str" "$result" + if [[ $retmsg ]]; then + printf "\t%s\n" "$retmsg" + fi +} + +pause_on_fail() +{ + if [[ $PAUSE_ON_FAIL == yes ]]; then + echo "Hit enter to continue, 'q' to quit" + read a + [[ $a == q ]] && exit 1 + fi +} + +handle_test_result_pass() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" " OK " +} + +handle_test_result_fail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" FAIL "$retmsg" + pause_on_fail +} + +handle_test_result_xfail() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" XFAIL "$retmsg" + pause_on_fail +} + +handle_test_result_skip() +{ + local test_name=$1; shift + local opt_str=$1; shift + + log_test_result "$test_name" "$opt_str" SKIP "$retmsg" +} + log_test() { local test_name=$1 @@ -361,31 +596,28 @@ log_test() opt_str="($opt_str)" fi - if [[ $RET -ne 0 ]]; then - EXIT_STATUS=1 - printf "TEST: %-60s [FAIL]\n" "$test_name $opt_str" - if [[ ! -z "$retmsg" ]]; then - printf "\t%s\n" "$retmsg" - fi - if [ "${PAUSE_ON_FAIL}" = "yes" ]; then - echo "Hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 - fi - return 1 + if ((RET == ksft_pass)); then + handle_test_result_pass "$test_name" "$opt_str" + elif ((RET == ksft_xfail)); then + handle_test_result_xfail "$test_name" "$opt_str" + elif ((RET == ksft_skip)); then + handle_test_result_skip "$test_name" "$opt_str" + else + handle_test_result_fail "$test_name" "$opt_str" fi - printf "TEST: %-60s [ OK ]\n" "$test_name $opt_str" - return 0 + EXIT_STATUS=$(ksft_exit_status_merge $EXIT_STATUS $RET) + return $RET } log_test_skip() { - local test_name=$1 - local opt_str=$2 + RET=$ksft_skip retmsg= log_test "$@" +} - printf "TEST: %-60s [SKIP]\n" "$test_name $opt_str" - return 0 +log_test_xfail() +{ + RET=$ksft_xfail retmsg= log_test "$@" } log_info() @@ -395,29 +627,6 @@ log_info() echo "INFO: $msg" } -busywait() -{ - local timeout=$1; shift - - local start_time="$(date -u +%s%3N)" - while true - do - local out - out=$("$@") - local ret=$? - if ((!ret)); then - echo -n "$out" - return 0 - fi - - local current_time="$(date -u +%s%3N)" - if ((current_time - start_time > timeout)); then - echo -n "$out" - return 1 - fi - done -} - not() { "$@" @@ -487,6 +696,15 @@ busywait_for_counter() busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +slowwait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + setup_wait_dev() { local dev=$1; shift @@ -535,6 +753,19 @@ setup_wait() sleep $WAIT_TIME } +wait_for_dev() +{ + local dev=$1; shift + local timeout=${1:-$WAIT_TIMEOUT}; shift + + slowwait $timeout ip link show dev $dev &> /dev/null + if (( $? )); then + check_err 1 + log_test wait_for_dev "Interface $dev did not appear." + exit $EXIT_STATUS + fi +} + cmd_jq() { local cmd=$1 @@ -873,6 +1104,33 @@ hw_stats_get() jq ".[0].stats64.$dir.$stat" } +__nh_stats_get() +{ + local key=$1; shift + local group_id=$1; shift + local member_id=$1; shift + + ip -j -s -s nexthop show id $group_id | + jq --argjson member_id "$member_id" --arg key "$key" \ + '.[].group_stats[] | select(.id == $member_id) | .[$key]' +} + +nh_stats_get() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets "$group_id" "$member_id" +} + +nh_stats_get_hw() +{ + local group_id=$1; shift + local member_id=$1; shift + + __nh_stats_get packets_hw "$group_id" "$member_id" +} + humanize() { local speed=$1; shift @@ -1957,6 +2215,8 @@ bail_on_lldpad() { local reason1="$1"; shift local reason2="$1"; shift + local caller=${FUNCNAME[1]} + local src=${BASH_SOURCE[1]} if systemctl is-active --quiet lldpad; then @@ -1977,9 +2237,17 @@ bail_on_lldpad() an environment variable ALLOW_LLDPAD to a non-empty string. EOF - exit 1 + log_test_skip $src:$caller + exit $EXIT_STATUS else return fi fi } + +absval() +{ + local v=$1; shift + + echo $((v > 0 ? v : -v)) +} diff --git a/tools/testing/selftests/net/forwarding/lib_sh_test.sh b/tools/testing/selftests/net/forwarding/lib_sh_test.sh new file mode 100755 index 000000000000..ff2accccaf4d --- /dev/null +++ b/tools/testing/selftests/net/forwarding/lib_sh_test.sh @@ -0,0 +1,208 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# This tests the operation of lib.sh itself. + +ALL_TESTS=" + test_ret + test_exit_status +" +NUM_NETIFS=0 +source lib.sh + +# Simulated checks. + +do_test() +{ + local msg=$1; shift + + "$@" + check_err $? "$msg" +} + +tpass() +{ + do_test "tpass" true +} + +tfail() +{ + do_test "tfail" false +} + +txfail() +{ + FAIL_TO_XFAIL=yes do_test "txfail" false +} + +# Simulated tests. + +pass() +{ + RET=0 + do_test "true" true + log_test "true" +} + +fail() +{ + RET=0 + do_test "false" false + log_test "false" +} + +xfail() +{ + RET=0 + FAIL_TO_XFAIL=yes do_test "xfalse" false + log_test "xfalse" +} + +skip() +{ + RET=0 + log_test_skip "skip" +} + +slow_xfail() +{ + RET=0 + xfail_on_slow do_test "slow_false" false + log_test "slow_false" +} + +# lib.sh tests. + +ret_tests_run() +{ + local t + + RET=0 + retmsg= + for t in "$@"; do + $t + done + echo "$retmsg" + return $RET +} + +ret_subtest() +{ + local expect_ret=$1; shift + local expect_retmsg=$1; shift + local -a tests=( "$@" ) + + local status_names=(pass fail xfail xpass skip) + local ret + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(ret_tests_run "${tests[@]}") + ret=$? + + (( ret == expect_ret )) + check_err $? "RET=$ret expected $expect_ret" + + [[ $out == $expect_retmsg ]] + check_err $? "retmsg=$out expected $expect_retmsg" + + log_test "RET $(echo ${tests[@]}) -> ${status_names[$ret]}" +} + +test_ret() +{ + ret_subtest $ksft_pass "" + + ret_subtest $ksft_pass "" tpass + ret_subtest $ksft_fail "tfail" tfail + ret_subtest $ksft_xfail "txfail" txfail + + ret_subtest $ksft_pass "" tpass tpass + ret_subtest $ksft_fail "tfail" tpass tfail + ret_subtest $ksft_xfail "txfail" tpass txfail + + ret_subtest $ksft_fail "tfail" tfail tpass + ret_subtest $ksft_xfail "txfail" txfail tpass + + ret_subtest $ksft_fail "tfail" tfail tfail + ret_subtest $ksft_fail "tfail" tfail txfail + + ret_subtest $ksft_fail "tfail" txfail tfail + + ret_subtest $ksft_xfail "txfail" txfail txfail +} + +exit_status_tests_run() +{ + EXIT_STATUS=0 + tests_run > /dev/null + return $EXIT_STATUS +} + +exit_status_subtest() +{ + local expect_exit_status=$1; shift + local tests=$1; shift + local what=$1; shift + + local status_names=(pass fail xfail xpass skip) + local exit_status + local out + + RET=0 + + # Run this in a subshell, so that our environment is intact. + out=$(TESTS="$tests" exit_status_tests_run) + exit_status=$? + + (( exit_status == expect_exit_status )) + check_err $? "EXIT_STATUS=$exit_status, expected $expect_exit_status" + + log_test "EXIT_STATUS $tests$what -> ${status_names[$exit_status]}" +} + +test_exit_status() +{ + exit_status_subtest $ksft_pass ":" + + exit_status_subtest $ksft_pass "pass" + exit_status_subtest $ksft_fail "fail" + exit_status_subtest $ksft_pass "xfail" + exit_status_subtest $ksft_skip "skip" + + exit_status_subtest $ksft_pass "pass pass" + exit_status_subtest $ksft_fail "pass fail" + exit_status_subtest $ksft_pass "pass xfail" + exit_status_subtest $ksft_skip "pass skip" + + exit_status_subtest $ksft_fail "fail pass" + exit_status_subtest $ksft_pass "xfail pass" + exit_status_subtest $ksft_skip "skip pass" + + exit_status_subtest $ksft_fail "fail fail" + exit_status_subtest $ksft_fail "fail xfail" + exit_status_subtest $ksft_fail "fail skip" + + exit_status_subtest $ksft_fail "xfail fail" + exit_status_subtest $ksft_fail "skip fail" + + exit_status_subtest $ksft_pass "xfail xfail" + exit_status_subtest $ksft_skip "xfail skip" + exit_status_subtest $ksft_skip "skip xfail" + + exit_status_subtest $ksft_skip "skip skip" + + KSFT_MACHINE_SLOW=yes \ + exit_status_subtest $ksft_pass "slow_xfail" ": slow" + + KSFT_MACHINE_SLOW=no \ + exit_status_subtest $ksft_fail "slow_xfail" ": fast" +} + +trap pre_cleanup EXIT + +tests_run + +exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/loopback.sh b/tools/testing/selftests/net/forwarding/loopback.sh deleted file mode 100755 index 8f4057310b5b..000000000000 --- a/tools/testing/selftests/net/forwarding/loopback.sh +++ /dev/null @@ -1,102 +0,0 @@ -#!/bin/bash -# SPDX-License-Identifier: GPL-2.0 - -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - -ALL_TESTS="loopback_test" -NUM_NETIFS=2 -source tc_common.sh -source lib.sh - -h1_create() -{ - simple_if_init $h1 192.0.2.1/24 - tc qdisc add dev $h1 clsact -} - -h1_destroy() -{ - tc qdisc del dev $h1 clsact - simple_if_fini $h1 192.0.2.1/24 -} - -h2_create() -{ - simple_if_init $h2 -} - -h2_destroy() -{ - simple_if_fini $h2 -} - -loopback_test() -{ - RET=0 - - tc filter add dev $h1 ingress protocol arp pref 1 handle 101 flower \ - skip_hw arp_op reply arp_tip 192.0.2.1 action drop - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 1 - check_fail $? "Matched on a filter without loopback setup" - - ethtool -K $h1 loopback on - check_err $? "Failed to enable loopback" - - setup_wait_dev $h1 - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 1 - check_err $? "Did not match on filter with loopback" - - ethtool -K $h1 loopback off - check_err $? "Failed to disable loopback" - - $MZ $h1 -c 1 -t arp -q - - tc_check_packets "dev $h1 ingress" 101 2 - check_fail $? "Matched on a filter after loopback was removed" - - tc filter del dev $h1 ingress protocol arp pref 1 handle 101 flower - - log_test "loopback" -} - -setup_prepare() -{ - h1=${NETIFS[p1]} - h2=${NETIFS[p2]} - - vrf_prepare - - h1_create - h2_create - - if ethtool -k $h1 | grep loopback | grep -q fixed; then - log_test "SKIP: dev $h1 does not support loopback feature" - exit $ksft_skip - fi -} - -cleanup() -{ - pre_cleanup - - h2_destroy - h1_destroy - - vrf_cleanup -} - -trap cleanup EXIT - -setup_prepare -setup_wait - -tests_run - -exit $EXIT_STATUS diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index fac486178ef7..0c36546e131e 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -source "$relative_path/mirror_lib.sh" +source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh index 39c03e2867f4..6e615fffa4ef 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -33,7 +33,7 @@ # | | # +-------------------------------------------------------------------------+ -source "$relative_path/mirror_topo_lib.sh" +source "$net_forwarding_dir/mirror_topo_lib.sh" mirror_gre_topo_h3_create() { diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh index a0d612e04990..2ba44247c60a 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh.sh @@ -1,15 +1,53 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test ping_ipv4_blackhole ping_ipv6_blackhole + nh_stats_test_v4 + nh_stats_test_v6 " NUM_NETIFS=8 source lib.sh +source router_mpath_nh_lib.sh h1_create() { @@ -204,7 +242,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -218,7 +256,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -237,7 +275,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -251,34 +289,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip nexthop replace id 106 group 104,$weight_rp12/105,$weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q >/dev/null 2>&1 - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip nexthop replace id 106 group 104/105 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -301,11 +311,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } ping_ipv4_blackhole() @@ -358,6 +363,16 @@ ping_ipv6_blackhole() ip -6 nexthop del id 1001 } +nh_stats_test_v4() +{ + __nh_stats_test_v4 mpath +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 mpath +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh new file mode 100644 index 000000000000..2903294d8bca --- /dev/null +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_lib.sh @@ -0,0 +1,119 @@ +# SPDX-License-Identifier: GPL-2.0 + +nh_stats_do_test() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local stats_get=$1; shift + local mz="$@" + + local dp + + RET=0 + + sleep 2 + for ((dp=0; dp < 60000; dp += 10000)); do + local dd + local t0_rp12=$(link_stats_tx_packets_get $rp12) + local t0_rp13=$(link_stats_tx_packets_get $rp13) + local t0_nh1=$($stats_get $group_id $nh1_id) + local t0_nh2=$($stats_get $group_id $nh2_id) + + ip vrf exec vrf-h1 \ + $mz -q -p 64 -d 0 -t udp \ + "sp=1024,dp=$((dp))-$((dp + 10000))" + sleep 2 + + local t1_rp12=$(link_stats_tx_packets_get $rp12) + local t1_rp13=$(link_stats_tx_packets_get $rp13) + local t1_nh1=$($stats_get $group_id $nh1_id) + local t1_nh2=$($stats_get $group_id $nh2_id) + + local d_rp12=$((t1_rp12 - t0_rp12)) + local d_rp13=$((t1_rp13 - t0_rp13)) + local d_nh1=$((t1_nh1 - t0_nh1)) + local d_nh2=$((t1_nh2 - t0_nh2)) + + dd=$(absval $((d_rp12 - d_nh1))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp12=$d_rp12 d_nh1=$d_nh1" + + dd=$(absval $((d_rp13 - d_nh2))) + ((dd < 10)) + check_err $? "Discrepancy between link and $stats_get: d_rp13=$d_rp13 d_nh2=$d_nh2" + done + + log_test "NH stats test $what" +} + +nh_stats_test_dispatch_swhw() +{ + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + nh_stats_do_test "$what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get "${mz[@]}" + + xfail_on_veth $rp11 \ + nh_stats_do_test "HW $what" "$nh1_id" "$nh2_id" "$group_id" \ + nh_stats_get_hw "${mz[@]}" +} + +nh_stats_test_dispatch() +{ + local nhgtype=$1; shift + local what=$1; shift + local nh1_id=$1; shift + local nh2_id=$1; shift + local group_id=$1; shift + local mz="$@" + + local enabled + + if ! ip nexthop help 2>&1 | grep -q hw_stats; then + log_test_skip "NH stats test: ip doesn't support HW stats" + return + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats on type $nhgtype + enabled=$(ip -s -j -d nexthop show id $group_id | + jq '.[].hw_stats.enabled') + if [[ $enabled == true ]]; then + nh_stats_test_dispatch_swhw "$what" "$nh1_id" "$nh2_id" \ + "$group_id" "${mz[@]}" + elif [[ $enabled == false ]]; then + check_err 1 "HW stats still disabled after enabling" + log_test "NH stats test" + else + log_test_skip "NH stats test: ip doesn't report hw_stats info" + fi + + ip nexthop replace id $group_id group $nh1_id/$nh2_id \ + hw_stats off type $nhgtype +} + +__nh_stats_test_v4() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv4.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv4" 101 102 103 \ + $MZ $h1 -A 192.0.2.2 -B 198.51.100.2 + sysctl_restore net.ipv4.fib_multipath_hash_policy +} + +__nh_stats_test_v6() +{ + local nhgtype=$1; shift + + sysctl_set net.ipv6.fib_multipath_hash_policy 1 + nh_stats_test_dispatch $nhgtype "IPv6" 104 105 106 \ + $MZ -6 $h1 -A 2001:db8:1::2 -B 2001:db8:2::2 + sysctl_restore net.ipv6.fib_multipath_hash_policy +} diff --git a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh index cb08ffe2356a..cd9e346436fc 100755 --- a/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh +++ b/tools/testing/selftests/net/forwarding/router_mpath_nh_res.sh @@ -1,13 +1,51 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +# | H1 | +# | $h1 + | +# | 192.0.2.2/24 | | +# | 2001:db8:1::2/64 | | +# +-------------------|-----+ +# | +# +-------------------|----------------------+ +# | | R1 | +# | $rp11 + | +# | 192.0.2.1/24 | +# | 2001:db8:1::1/64 | +# | | +# | + $rp12 + $rp13 | +# | | 169.254.2.12/24 | 169.254.3.13/24 | +# | | fe80:2::12/64 | fe80:3::13/64 | +# +--|--------------------|------------------+ +# | | +# +--|--------------------|------------------+ +# | + $rp22 + $rp23 | +# | 169.254.2.22/24 169.254.3.23/24 | +# | fe80:2::22/64 fe80:3::23/64 | +# | | +# | $rp21 + | +# | 198.51.100.1/24 | | +# | 2001:db8:2::1/64 | R2 | +# +-------------------|----------------------+ +# | +# +-------------------|-----+ +# | | | +# | $h2 + | +# | 198.51.100.2/24 | +# | 2001:db8:2::2/64 H2 | +# +-------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 multipath_test + nh_stats_test_v4 + nh_stats_test_v6 " NUM_NETIFS=8 source lib.sh +source router_mpath_nh_lib.sh h1_create() { @@ -205,7 +243,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -235,7 +273,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -333,6 +371,16 @@ multipath_test() ip nexthop replace id 106 group 104,1/105,1 type resilient } +nh_stats_test_v4() +{ + __nh_stats_test_v4 resilient +} + +nh_stats_test_v6() +{ + __nh_stats_test_v6 resilient +} + setup_prepare() { h1=${NETIFS[p1]} diff --git a/tools/testing/selftests/net/forwarding/router_multipath.sh b/tools/testing/selftests/net/forwarding/router_multipath.sh index 464821c587a5..e2be354167a1 100755 --- a/tools/testing/selftests/net/forwarding/router_multipath.sh +++ b/tools/testing/selftests/net/forwarding/router_multipath.sh @@ -179,7 +179,7 @@ multipath4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) ip vrf exec vrf-h1 $MZ $h1 -q -p 64 -A 192.0.2.2 -B 198.51.100.2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -195,7 +195,7 @@ multipath4_test() sysctl_restore net.ipv4.fib_multipath_hash_policy } -multipath6_l4_test() +multipath6_test() { local desc="$1" local weight_rp12=$2 @@ -216,7 +216,7 @@ multipath6_l4_test() t0_rp13=$(link_stats_tx_packets_get $rp13) $MZ $h1 -6 -q -p 64 -A 2001:db8:1::2 -B 2001:db8:2::2 \ - -d 1msec -t udp "sp=1024,dp=0-32768" + -d $MZ_DELAY -t udp "sp=1024,dp=0-32768" t1_rp12=$(link_stats_tx_packets_get $rp12) t1_rp13=$(link_stats_tx_packets_get $rp13) @@ -232,38 +232,6 @@ multipath6_l4_test() sysctl_restore net.ipv6.fib_multipath_hash_policy } -multipath6_test() -{ - local desc="$1" - local weight_rp12=$2 - local weight_rp13=$3 - local t0_rp12 t0_rp13 t1_rp12 t1_rp13 - local packets_rp12 packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 weight $weight_rp12 \ - nexthop via fe80:3::23 dev $rp13 weight $weight_rp13 - - t0_rp12=$(link_stats_tx_packets_get $rp12) - t0_rp13=$(link_stats_tx_packets_get $rp13) - - # Generate 16384 echo requests, each with a random flow label. - for _ in $(seq 1 16384); do - ip vrf exec vrf-h1 $PING6 2001:db8:2::2 -F 0 -c 1 -q &> /dev/null - done - - t1_rp12=$(link_stats_tx_packets_get $rp12) - t1_rp13=$(link_stats_tx_packets_get $rp13) - - let "packets_rp12 = $t1_rp12 - $t0_rp12" - let "packets_rp13 = $t1_rp13 - $t0_rp13" - multipath_eval "$desc" $weight_rp12 $weight_rp13 $packets_rp12 $packets_rp13 - - ip route replace 2001:db8:2::/64 vrf vrf-r1 \ - nexthop via fe80:2::22 dev $rp12 \ - nexthop via fe80:3::23 dev $rp13 -} - multipath_test() { log_info "Running IPv4 multipath tests" @@ -275,11 +243,6 @@ multipath_test() multipath6_test "ECMP" 1 1 multipath6_test "Weighted MP 2:1" 2 1 multipath6_test "Weighted MP 11:45" 11 45 - - log_info "Running IPv6 L4 hash multipath tests" - multipath6_l4_test "ECMP" 1 1 - multipath6_l4_test "Weighted MP 2:1" 2 1 - multipath6_l4_test "Weighted MP 11:45" 11 45 } setup_prepare() diff --git a/tools/testing/selftests/net/forwarding/router_nh.sh b/tools/testing/selftests/net/forwarding/router_nh.sh index f3a53738bdcc..92904b01eae9 100755 --- a/tools/testing/selftests/net/forwarding/router_nh.sh +++ b/tools/testing/selftests/net/forwarding/router_nh.sh @@ -1,6 +1,20 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# +-------------------------+ +-------------------------+ +# | H1 | | H2 | +# | $h1 + | | $h2 + | +# | 192.0.2.2/24 | | | 198.51.100.2/24 | | +# | 2001:db8:1::2/64 | | | 2001:db8:2::2/64 | | +# +-------------------|-----+ +-------------------|-----+ +# | | +# +-------------------|----------------------------|-----+ +# | R1 | | | +# | $rp1 + $rp2 + | +# | 192.0.2.1/24 198.51.100.1/24 | +# | 2001:db8:1::1/64 2001:db8:2::1/64 | +# +------------------------------------------------------+ + ALL_TESTS=" ping_ipv4 ping_ipv6 diff --git a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh index cdf689e99458..f9d26a7911bb 100644 --- a/tools/testing/selftests/net/forwarding/sch_ets_tests.sh +++ b/tools/testing/selftests/net/forwarding/sch_ets_tests.sh @@ -199,25 +199,28 @@ ets_set_dwrr_two_bands() ets_test_strict() { ets_set_strict - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_mixed() { ets_set_mixed - ets_dwrr_test_01 - ets_dwrr_test_12 + xfail_on_slow ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_12 } ets_test_dwrr() { ets_set_dwrr_uniform - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_varying - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_change_quantum - ets_dwrr_test_012 + xfail_on_slow ets_dwrr_test_012 + ets_set_dwrr_two_bands - ets_dwrr_test_01 + xfail_on_slow ets_dwrr_test_01 } diff --git a/tools/testing/selftests/net/forwarding/sch_red.sh b/tools/testing/selftests/net/forwarding/sch_red.sh index 81f31179ac88..17f28644568e 100755 --- a/tools/testing/selftests/net/forwarding/sch_red.sh +++ b/tools/testing/selftests/net/forwarding/sch_red.sh @@ -451,35 +451,35 @@ uninstall_qdisc() ecn_test() { install_qdisc ecn - do_ecn_test $BACKLOG + xfail_on_slow do_ecn_test $BACKLOG uninstall_qdisc } ecn_nodrop_test() { install_qdisc ecn nodrop - do_ecn_nodrop_test $BACKLOG + xfail_on_slow do_ecn_nodrop_test $BACKLOG uninstall_qdisc } red_test() { install_qdisc - do_red_test $BACKLOG + xfail_on_slow do_red_test $BACKLOG uninstall_qdisc } red_qevent_test() { install_qdisc qevent early_drop block 10 - do_red_qevent_test $BACKLOG + xfail_on_slow do_red_qevent_test $BACKLOG uninstall_qdisc } ecn_qevent_test() { install_qdisc ecn qevent mark block 10 - do_ecn_qevent_test $BACKLOG + xfail_on_slow do_ecn_qevent_test $BACKLOG uninstall_qdisc } diff --git a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh index d1f26cb7cd73..9cd884d4a5de 100644 --- a/tools/testing/selftests/net/forwarding/sch_tbf_core.sh +++ b/tools/testing/selftests/net/forwarding/sch_tbf_core.sh @@ -227,7 +227,7 @@ do_tbf_test() local nr=$(rate $t2 $t3 10) local nr_pct=$((100 * (nr - er) / er)) ((-5 <= nr_pct && nr_pct <= 5)) - check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." + xfail_on_slow check_err $? "Expected rate $(humanize $er), got $(humanize $nr), which is $nr_pct% off. Required accuracy is +-5%." log_test "TC $((vlan - 10)): TBF rate ${mbit}Mbit" } diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh index b0f5e55d2d0b..589629636502 100755 --- a/tools/testing/selftests/net/forwarding/tc_actions.sh +++ b/tools/testing/selftests/net/forwarding/tc_actions.sh @@ -235,9 +235,6 @@ mirred_egress_to_ingress_tcp_test() check_err $? "didn't mirred redirect ICMP" tc_check_packets "dev $h1 ingress" 102 10 check_err $? "didn't drop mirred ICMP" - local overlimits=$(tc_rule_stats_get ${h1} 101 egress .overlimits) - test ${overlimits} = 10 - check_err $? "wrong overlimits, expected 10 got ${overlimits}" tc filter del dev $h1 egress protocol ip pref 100 handle 100 flower tc filter del dev $h1 egress protocol ip pref 101 handle 101 flower diff --git a/tools/testing/selftests/net/forwarding/tc_common.sh b/tools/testing/selftests/net/forwarding/tc_common.sh index bce8bb8d2b6f..2e3326edfa9a 100644 --- a/tools/testing/selftests/net/forwarding/tc_common.sh +++ b/tools/testing/selftests/net/forwarding/tc_common.sh @@ -4,7 +4,7 @@ CHECK_TC="yes" # Can be overridden by the configuration file. See lib.sh -TC_HIT_TIMEOUT=${TC_HIT_TIMEOUT:=1000} # ms +: "${TC_HIT_TIMEOUT:=1000}" # ms tc_check_packets() { diff --git a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh index 20a7cb7222b8..c2420bb72c12 100755 --- a/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh +++ b/tools/testing/selftests/net/forwarding/tc_flower_l2_miss.sh @@ -209,14 +209,17 @@ test_l2_miss_multicast() # both registered and unregistered multicast traffic. bridge link set dev $swp2 mcast_router 2 + # Set the Max Response Delay to 100 centiseconds (1 second) so that the + # bridge will start forwarding according to its MDB soon after a + # multicast querier is enabled. + ip link set dev br1 type bridge mcast_query_response_interval 100 + # Forwarding according to MDB entries only takes place when the bridge # detects that there is a valid querier in the network. Set the bridge # as the querier and assign it a valid IPv6 link-local address to be # used as the source address for MLD queries. ip link set dev br1 type bridge mcast_querier 1 ip -6 address add fe80::1/64 nodad dev br1 - # Wait the default Query Response Interval (10 seconds) for the bridge - # to determine that there are no other queriers in the network. sleep 10 test_l2_miss_multicast_ipv4 @@ -224,6 +227,7 @@ test_l2_miss_multicast() ip -6 address del fe80::1/64 dev br1 ip link set dev br1 type bridge mcast_querier 0 + ip link set dev br1 type bridge mcast_query_response_interval 1000 bridge link set dev $swp2 mcast_router 1 } diff --git a/tools/testing/selftests/net/forwarding/tc_police.sh b/tools/testing/selftests/net/forwarding/tc_police.sh index 0a51eef21b9e..5103f64a71d6 100755 --- a/tools/testing/selftests/net/forwarding/tc_police.sh +++ b/tools/testing/selftests/net/forwarding/tc_police.sh @@ -140,7 +140,7 @@ police_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -157,7 +157,7 @@ police_rx_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on rx" @@ -169,7 +169,7 @@ police_tx_test() # Rule to police traffic destined to $h2 on egress of $rp2 tc filter add dev $rp2 egress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok + action police rate 10mbit burst 16k conform-exceed drop/ok police_common_test "police on tx" @@ -190,7 +190,7 @@ police_shared_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -211,7 +211,7 @@ police_shared_test() # Rule to police traffic destined to $h2 on ingress of $rp1 tc filter add dev $rp1 ingress protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/ok \ + action police rate 10mbit burst 16k conform-exceed drop/ok \ index 10 # Rule to police a different flow destined to $h2 on egress of $rp2 @@ -250,7 +250,7 @@ police_mirror_common_test() # Rule to police traffic destined to $h2 and mirror to $h3 tc filter add dev $pol_if $dir protocol ip pref 1 handle 101 flower \ dst_ip 198.51.100.1 ip_proto udp dst_port 54321 \ - action police rate 80mbit burst 16k conform-exceed drop/pipe \ + action police rate 10mbit burst 16k conform-exceed drop/pipe \ action mirred egress mirror dev $rp3 mausezahn $h1 -a own -b $(mac_get $rp1) -A 192.0.2.1 -B 198.51.100.1 \ @@ -260,7 +260,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h2 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) @@ -270,7 +270,7 @@ police_mirror_common_test() sleep 10 local t1=$(tc_rule_stats_get $h3 1 ingress .bytes) - local er=$((80 * 1000 * 1000)) + local er=$((10 * 1000 * 1000)) local nr=$(rate $t0 $t1 10) local nr_pct=$((100 * (nr - er) / er)) ((-10 <= nr_pct && nr_pct <= 10)) diff --git a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh index 5a5dd9034819..79775b10b99f 100755 --- a/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh +++ b/tools/testing/selftests/net/forwarding/tc_tunnel_key.sh @@ -1,7 +1,5 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 ALL_TESTS="tunnel_key_nofrag_test" diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh index eb307ca37bfa..6f0a2e452ba1 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d.sh @@ -495,7 +495,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -532,7 +532,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 prot ip \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 192.0.2.3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 prot ip diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh index ac97f07e5ce8..a603f7b0a08f 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1d_ipv6.sh @@ -354,7 +354,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -410,7 +410,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 @@ -616,7 +616,7 @@ vxlan_ping_test() local delta=$((t1 - t0)) # Tolerate a couple stray extra packets. - ((expect <= delta && delta <= expect + 2)) + ((expect <= delta && delta <= expect + 5)) check_err $? "$capture_dev: Expected to capture $expect packets, got $delta." } @@ -653,7 +653,7 @@ __test_ecn_encap() RET=0 tc filter add dev v1 egress pref 77 protocol ipv6 \ - flower ip_tos $tos action pass + flower ip_tos $tos ip_proto udp dst_port $VXPORT action pass sleep 1 vxlan_ping_test $h1 2001:db8:1::3 "-Q $q" v1 egress 77 10 tc filter del dev v1 egress pref 77 protocol ipv6 diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh index a596bbf3ed6a..fb9a34cb50c6 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q.sh @@ -750,7 +750,7 @@ __test_learning() expects[0]=0; expects[$idx1]=10; expects[$idx2]=0 vxlan_flood_test $mac $dst $vid "${expects[@]}" - sleep 20 + sleep 60 bridge fdb show brport $vx | grep $mac | grep -q self check_fail $? @@ -796,11 +796,11 @@ test_learning() local dst=192.0.2.100 local vid=10 - # Enable learning on the VxLAN devices and set ageing time to 10 seconds - ip link set dev br1 type bridge ageing_time 1000 - ip link set dev vx10 type vxlan ageing 10 + # Enable learning on the VxLAN devices and set ageing time to 30 seconds + ip link set dev br1 type bridge ageing_time 3000 + ip link set dev vx10 type vxlan ageing 30 ip link set dev vx10 type vxlan learning - ip link set dev vx20 type vxlan ageing 10 + ip link set dev vx20 type vxlan ageing 30 ip link set dev vx20 type vxlan learning reapply_config diff --git a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh index d880df89bc8b..e83fde79f40d 100755 --- a/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh +++ b/tools/testing/selftests/net/forwarding/vxlan_bridge_1q_ipv6.sh @@ -457,7 +457,7 @@ __ping_ipv4() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome ARP noise. - PING_COUNT=100 PING_TIMEOUT=11 ping_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 10 100 @@ -522,7 +522,7 @@ __ping_ipv6() # Send 100 packets and verify that at least 100 packets hit the rule, # to overcome neighbor discovery noise. - PING_COUNT=100 PING_TIMEOUT=11 ping6_do $dev $dst_ip + PING_COUNT=100 PING_TIMEOUT=20 ping6_do $dev $dst_ip check_err $? "Ping failed" tc_check_at_least_x_packets "dev $rp1 egress" 101 100 diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh new file mode 100755 index 000000000000..977070ed42b3 --- /dev/null +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -0,0 +1,59 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Verify that FQ has a packet limit per band: +# +# 1. set the limit to 10 per band +# 2. send 20 pkts on band A: verify that 10 are queued, 10 dropped +# 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped +# 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped +# +# Send packets with a delay to ensure that previously sent +# packets are still queued when later ones are sent. +# Use SO_TXTIME for this. + +die() { + echo "$1" + exit 1 +} + +# run inside private netns +if [[ $# -eq 0 ]]; then + ./in_netns.sh "$0" __subprocess + exit +fi + +ip link add type dummy +ip link set dev dummy0 up +ip -6 addr add fdaa::1/128 dev dummy0 +ip -6 route add fdaa::/64 dev dummy0 +tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 + +DELAY=400000 + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 +OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 +OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 +OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Initial stats will report zero sent, as all packets are still +# queued in FQ. Sleep for at least the delay period and see that +# twenty are now sent. +sleep 0.6 +OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" + +# Log the output after the test +echo "${OUT1}" +echo "${OUT2}" +echo "${OUT3}" +echo "${OUT4}" + +# Test the output for expected values +echo "${OUT1}" | grep -q '0\ pkt\ (dropped\ 10' || die "unexpected drop count at 1" +echo "${OUT2}" | grep -q '0\ pkt\ (dropped\ 30' || die "unexpected drop count at 2" +echo "${OUT3}" | grep -q '0\ pkt\ (dropped\ 40' || die "unexpected drop count at 3" +echo "${OUT4}" | grep -q '20\ pkt\ (dropped\ 40' || die "unexpected accept count at 4" diff --git a/tools/testing/selftests/net/gre_gso.sh b/tools/testing/selftests/net/gre_gso.sh index 3224651db97b..5100d90f92d2 100755 --- a/tools/testing/selftests/net/gre_gso.sh +++ b/tools/testing/selftests/net/gre_gso.sh @@ -2,10 +2,8 @@ # SPDX-License-Identifier: GPL-2.0 # This test is for checking GRE GSO. - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS="gre_gso" @@ -13,8 +11,6 @@ TESTS="gre_gso" VERBOSE=0 PAUSE_ON_FAIL=no PAUSE=no -IP="ip -netns ns1" -NS_EXEC="ip netns exec ns1" TMPFILE=`mktemp` PID= @@ -50,13 +46,13 @@ log_test() setup() { set -e - ip netns add ns1 - ip netns set ns1 auto - $IP link set dev lo up + setup_ns ns1 + IP="ip -netns $ns1" + NS_EXEC="ip netns exec $ns1" ip link add veth0 type veth peer name veth1 ip link set veth0 up - ip link set veth1 netns ns1 + ip link set veth1 netns $ns1 $IP link set veth1 name veth0 $IP link set veth0 up @@ -70,7 +66,7 @@ cleanup() [ -n "$PID" ] && kill $PID ip link del dev gre1 &> /dev/null ip link del dev veth0 &> /dev/null - ip netns del ns1 + cleanup_ns $ns1 } get_linklocal() @@ -145,7 +141,7 @@ gre6_gso_test() setup a1=$(get_linklocal veth0) - a2=$(get_linklocal veth0 ns1) + a2=$(get_linklocal veth0 $ns1) gre_create_tun $a1 $a2 diff --git a/tools/testing/selftests/net/gro.c b/tools/testing/selftests/net/gro.c index 30024d0ed373..b2184847e388 100644 --- a/tools/testing/selftests/net/gro.c +++ b/tools/testing/selftests/net/gro.c @@ -71,6 +71,12 @@ #define MAX_PAYLOAD (IP_MAXPACKET - sizeof(struct tcphdr) - sizeof(struct ipv6hdr)) #define NUM_LARGE_PKT (MAX_PAYLOAD / MSS) #define MAX_HDR_LEN (ETH_HLEN + sizeof(struct ipv6hdr) + sizeof(struct tcphdr)) +#define MIN_EXTHDR_SIZE 8 +#define EXT_PAYLOAD_1 "\x00\x00\x00\x00\x00\x00" +#define EXT_PAYLOAD_2 "\x11\x11\x11\x11\x11\x11" + +#define ipv6_optlen(p) (((p)->hdrlen+1) << 3) /* calculate IPv6 extension header len */ +#define BUILD_BUG_ON(condition) ((void)sizeof(char[1 - 2*!!(condition)])) static const char *addr6_src = "fdaa::2"; static const char *addr6_dst = "fdaa::1"; @@ -87,6 +93,7 @@ static bool tx_socket = true; static int tcp_offset = -1; static int total_hdr_len = -1; static int ethhdr_proto = -1; +static const int num_flush_id_cases = 6; static void vlog(const char *fmt, ...) { @@ -104,7 +111,7 @@ static void setup_sock_filter(int fd) const int dport_off = tcp_offset + offsetof(struct tcphdr, dest); const int ethproto_off = offsetof(struct ethhdr, h_proto); int optlen = 0; - int ipproto_off; + int ipproto_off, opt_ipproto_off; int next_off; if (proto == PF_INET) @@ -113,17 +120,36 @@ static void setup_sock_filter(int fd) next_off = offsetof(struct ipv6hdr, nexthdr); ipproto_off = ETH_HLEN + next_off; + /* Overridden later if exthdrs are used: */ + opt_ipproto_off = ipproto_off; + if (strcmp(testname, "ip") == 0) { if (proto == PF_INET) optlen = sizeof(struct ip_timestamp); - else - optlen = sizeof(struct ip6_frag); + else { + BUILD_BUG_ON(sizeof(struct ip6_hbh) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_dest) > MIN_EXTHDR_SIZE); + BUILD_BUG_ON(sizeof(struct ip6_frag) > MIN_EXTHDR_SIZE); + + /* same size for HBH and Fragment extension header types */ + optlen = MIN_EXTHDR_SIZE; + opt_ipproto_off = ETH_HLEN + sizeof(struct ipv6hdr) + + offsetof(struct ip6_ext, ip6e_nxt); + } } + /* this filter validates the following: + * - packet is IPv4/IPv6 according to the running test. + * - packet is TCP. Also handles the case of one extension header and then TCP. + * - checks the packet tcp dport equals to DPORT. Also handles the case of one + * extension header and then TCP. + */ struct sock_filter filter[] = { BPF_STMT(BPF_LD + BPF_H + BPF_ABS, ethproto_off), - BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 7), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, ntohs(ethhdr_proto), 0, 9), BPF_STMT(BPF_LD + BPF_B + BPF_ABS, ipproto_off), + BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 2, 0), + BPF_STMT(BPF_LD + BPF_B + BPF_ABS, opt_ipproto_off), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, IPPROTO_TCP, 0, 5), BPF_STMT(BPF_LD + BPF_H + BPF_ABS, dport_off), BPF_JUMP(BPF_JMP + BPF_JEQ + BPF_K, DPORT, 2, 0), @@ -576,6 +602,146 @@ static void add_ipv4_ts_option(void *buf, void *optpkt) iph->check = checksum_fold(iph, sizeof(struct iphdr) + optlen, 0); } +static void add_ipv6_exthdr(void *buf, void *optpkt, __u8 exthdr_type, char *ext_payload) +{ + struct ipv6_opt_hdr *exthdr = (struct ipv6_opt_hdr *)(optpkt + tcp_offset); + struct ipv6hdr *iph = (struct ipv6hdr *)(optpkt + ETH_HLEN); + char *exthdr_payload_start = (char *)(exthdr + 1); + + exthdr->hdrlen = 0; + exthdr->nexthdr = IPPROTO_TCP; + + memcpy(exthdr_payload_start, ext_payload, MIN_EXTHDR_SIZE - sizeof(*exthdr)); + + memcpy(optpkt, buf, tcp_offset); + memcpy(optpkt + tcp_offset + MIN_EXTHDR_SIZE, buf + tcp_offset, + sizeof(struct tcphdr) + PAYLOAD_LEN); + + iph->nexthdr = exthdr_type; + iph->payload_len = htons(ntohs(iph->payload_len) + MIN_EXTHDR_SIZE); +} + +static void fix_ip4_checksum(struct iphdr *iph) +{ + iph->check = 0; + iph->check = checksum_fold(iph, sizeof(struct iphdr), 0); +} + +static void send_flush_id_case(int fd, struct sockaddr_ll *daddr, int tcase) +{ + static char buf1[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf2[MAX_HDR_LEN + PAYLOAD_LEN]; + static char buf3[MAX_HDR_LEN + PAYLOAD_LEN]; + bool send_three = false; + struct iphdr *iph1; + struct iphdr *iph2; + struct iphdr *iph3; + + iph1 = (struct iphdr *)(buf1 + ETH_HLEN); + iph2 = (struct iphdr *)(buf2 + ETH_HLEN); + iph3 = (struct iphdr *)(buf3 + ETH_HLEN); + + create_packet(buf1, 0, 0, PAYLOAD_LEN, 0); + create_packet(buf2, PAYLOAD_LEN, 0, PAYLOAD_LEN, 0); + create_packet(buf3, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); + + switch (tcase) { + case 0: /* DF=1, Incrementing - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + break; + + case 1: /* DF=1, Fixed - should coalesce */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + break; + + case 2: /* DF=0, Incrementing - should coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(9); + break; + + case 3: /* DF=0, Fixed - should not coalesce */ + iph1->frag_off &= ~htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off &= ~htons(IP_DF); + iph2->id = htons(8); + break; + + case 4: /* DF=1, two packets incrementing, and one fixed - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(9); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + + case 5: /* DF=1, two packets fixed, and one incrementing - should + * coalesce only the first two packets + */ + iph1->frag_off |= htons(IP_DF); + iph1->id = htons(8); + + iph2->frag_off |= htons(IP_DF); + iph2->id = htons(8); + + iph3->frag_off |= htons(IP_DF); + iph3->id = htons(9); + send_three = true; + break; + } + + fix_ip4_checksum(iph1); + fix_ip4_checksum(iph2); + write_packet(fd, buf1, total_hdr_len + PAYLOAD_LEN, daddr); + write_packet(fd, buf2, total_hdr_len + PAYLOAD_LEN, daddr); + + if (send_three) { + fix_ip4_checksum(iph3); + write_packet(fd, buf3, total_hdr_len + PAYLOAD_LEN, daddr); + } +} + +static void test_flush_id(int fd, struct sockaddr_ll *daddr, char *fin_pkt) +{ + for (int i = 0; i < num_flush_id_cases; i++) { + sleep(1); + send_flush_id_case(fd, daddr, i); + sleep(1); + write_packet(fd, fin_pkt, total_hdr_len, daddr); + } +} + +static void send_ipv6_exthdr(int fd, struct sockaddr_ll *daddr, char *ext_data1, char *ext_data2) +{ + static char buf[MAX_HDR_LEN + PAYLOAD_LEN]; + static char exthdr_pck[sizeof(buf) + MIN_EXTHDR_SIZE]; + + create_packet(buf, 0, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data1); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); + + create_packet(buf, PAYLOAD_LEN * 1, 0, PAYLOAD_LEN, 0); + add_ipv6_exthdr(buf, exthdr_pck, IPPROTO_HOPOPTS, ext_data2); + write_packet(fd, exthdr_pck, total_hdr_len + PAYLOAD_LEN + MIN_EXTHDR_SIZE, daddr); +} + /* IPv4 options shouldn't coalesce */ static void send_ip_options(int fd, struct sockaddr_ll *daddr) { @@ -697,7 +863,7 @@ static void send_fragment6(int fd, struct sockaddr_ll *daddr) create_packet(buf, PAYLOAD_LEN * i, 0, PAYLOAD_LEN, 0); write_packet(fd, buf, bufpkt_len, daddr); } - + sleep(1); create_packet(buf, PAYLOAD_LEN * 2, 0, PAYLOAD_LEN, 0); memset(extpkt, 0, extpkt_len); @@ -760,6 +926,7 @@ static void check_recv_pkts(int fd, int *correct_payload, vlog("}, Total %d packets\nReceived {", correct_num_pkts); while (1) { + ip_ext_len = 0; pkt_size = recv(fd, buffer, IP_MAXPACKET + ETH_HLEN + 1, 0); if (pkt_size < 0) error(1, errno, "could not receive"); @@ -767,7 +934,7 @@ static void check_recv_pkts(int fd, int *correct_payload, if (iph->version == 4) ip_ext_len = (iph->ihl - 5) * 4; else if (ip6h->version == 6 && ip6h->nexthdr != IPPROTO_TCP) - ip_ext_len = sizeof(struct ip6_frag); + ip_ext_len = MIN_EXTHDR_SIZE; tcph = (struct tcphdr *)(buffer + tcp_offset + ip_ext_len); @@ -879,8 +1046,24 @@ static void gro_sender(void) send_fragment4(txfd, &daddr); sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + test_flush_id(txfd, &daddr, fin_pkt); } else if (proto == PF_INET6) { + sleep(1); send_fragment6(txfd, &daddr); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with same payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_1); + sleep(1); + write_packet(txfd, fin_pkt, total_hdr_len, &daddr); + + sleep(1); + /* send IPv6 packets with ext header with different payload */ + send_ipv6_exthdr(txfd, &daddr, EXT_PAYLOAD_1, EXT_PAYLOAD_2); + sleep(1); write_packet(txfd, fin_pkt, total_hdr_len, &daddr); } } else if (strcmp(testname, "large") == 0) { @@ -991,12 +1174,51 @@ static void gro_receiver(void) printf("fragmented ip4 doesn't coalesce: "); check_recv_pkts(rxfd, correct_payload, 2); + + /* is_atomic checks */ + printf("DF=1, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=1, Fixed - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Incrementing - should coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("DF=0, Fixed - should not coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Incrementing and one fixed - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); + + printf("DF=1, 2 Fixed and one incrementing - should coalesce only first 2 packets: "); + correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 2); } else if (proto == PF_INET6) { /* GRO doesn't check for ipv6 hop limit when flushing. * Hence no corresponding test to the ipv4 case. */ printf("fragmented ip6 doesn't coalesce: "); correct_payload[0] = PAYLOAD_LEN * 2; + correct_payload[1] = PAYLOAD_LEN; + correct_payload[2] = PAYLOAD_LEN; + check_recv_pkts(rxfd, correct_payload, 3); + + printf("ipv6 with ext header does coalesce: "); + correct_payload[0] = PAYLOAD_LEN * 2; + check_recv_pkts(rxfd, correct_payload, 1); + + printf("ipv6 with ext header with different payloads doesn't coalesce: "); + correct_payload[0] = PAYLOAD_LEN; + correct_payload[1] = PAYLOAD_LEN; check_recv_pkts(rxfd, correct_payload, 2); } } else if (strcmp(testname, "large") == 0) { diff --git a/tools/testing/selftests/net/gro.sh b/tools/testing/selftests/net/gro.sh index 342ad27f631b..02c21ff4ca81 100755 --- a/tools/testing/selftests/net/gro.sh +++ b/tools/testing/selftests/net/gro.sh @@ -23,14 +23,19 @@ run_test() { # on every try. for tries in {1..3}; do # Actual test starts here - ip netns exec server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ + ip netns exec $server_ns ./gro "${ARGS[@]}" "--rx" "--iface" "server" \ 1>>log.txt & server_pid=$! sleep 0.5 # to allow for socket init - ip netns exec client_ns ./gro "${ARGS[@]}" "--iface" "client" \ + ip netns exec $client_ns ./gro "${ARGS[@]}" "--iface" "client" \ 1>>log.txt wait "${server_pid}" exit_code=$? + if [[ ${test} == "large" && -n "${KSFT_MACHINE_SLOW}" && \ + ${exit_code} -ne 0 ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + exit_code=0 + fi if [[ "${exit_code}" -eq 0 ]]; then break; fi diff --git a/tools/testing/selftests/net/hsr/Makefile b/tools/testing/selftests/net/hsr/Makefile index 92c1d9d080cd..884cd2cc0681 100644 --- a/tools/testing/selftests/net/hsr/Makefile +++ b/tools/testing/selftests/net/hsr/Makefile @@ -2,6 +2,7 @@ top_srcdir = ../../../../.. -TEST_PROGS := hsr_ping.sh +TEST_PROGS := hsr_ping.sh hsr_redbox.sh +TEST_FILES += hsr_common.sh include ../../lib.mk diff --git a/tools/testing/selftests/net/hsr/hsr_common.sh b/tools/testing/selftests/net/hsr/hsr_common.sh new file mode 100644 index 000000000000..8e97b1f2e7e5 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_common.sh @@ -0,0 +1,84 @@ +# SPDX-License-Identifier: GPL-2.0 +# Common code for HSR testing scripts + +source ../lib.sh +ret=0 +ksft_skip=4 + +# $1: IP address +is_v6() +{ + [ -z "${1##*:*}" ] +} + +do_ping() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 2" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + return 0 +} + +do_ping_long() +{ + local netns="$1" + local connect_addr="$2" + local ping_args="-q -c 10" + + if is_v6 "${connect_addr}"; then + $ipv6 || return 0 + ping_args="${ping_args} -6" + fi + + OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" + if [ $? -ne 0 ] ; then + echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 + ret=1 + return 1 + fi + + VAL="$(echo $OUT | cut -d' ' -f1-8)" + SED_VAL="$(echo ${VAL} | sed -r -e 's/([0-9]{2}).*([0-9]{2}).*[[:space:]]([0-9]+%).*/\1 transmitted \2 received \3 loss/')" + if [ "${SED_VAL}" != "10 transmitted 10 received 0% loss" ] + then + echo "$netns -> $connect_addr ping TEST [ FAIL ]" + echo "Expect to send and receive 10 packets and no duplicates." + echo "Full message: ${OUT}." + ret=1 + return 1 + fi + + return 0 +} + +stop_if_error() +{ + local msg="$1" + + if [ ${ret} -ne 0 ]; then + echo "FAIL: ${msg}" 1>&2 + exit ${ret} + fi +} + +check_prerequisites() +{ + ip -Version > /dev/null 2>&1 + if [ $? -ne 0 ];then + echo "SKIP: Could not run test without ip tool" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/hsr/hsr_ping.sh b/tools/testing/selftests/net/hsr/hsr_ping.sh index 1c6457e54625..790294c8af83 100755 --- a/tools/testing/selftests/net/hsr/hsr_ping.sh +++ b/tools/testing/selftests/net/hsr/hsr_ping.sh @@ -1,10 +1,10 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -ret=0 -ksft_skip=4 ipv6=true +source ./hsr_common.sh + optstring="h4" usage() { echo "Usage: $0 [OPTION]" @@ -27,88 +27,6 @@ while getopts "$optstring" option;do esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" - -cleanup() -{ - local netns - for netns in "$ns1" "$ns2" "$ns3" ;do - ip netns del $netns - done -} - -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - -do_ping() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 2" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - ip netns exec ${netns} ping ${ping_args} $connect_addr >/dev/null - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - return 0 -} - -do_ping_long() -{ - local netns="$1" - local connect_addr="$2" - local ping_args="-q -c 10" - - if is_v6 "${connect_addr}"; then - $ipv6 || return 0 - ping_args="${ping_args} -6" - fi - - OUT="$(LANG=C ip netns exec ${netns} ping ${ping_args} $connect_addr | grep received)" - if [ $? -ne 0 ] ; then - echo "$netns -> $connect_addr ping [ FAIL ]" 1>&2 - ret=1 - return 1 - fi - - VAL="$(echo $OUT | cut -d' ' -f1-8)" - if [ "$VAL" != "10 packets transmitted, 10 received, 0% packet loss," ] - then - echo "$netns -> $connect_addr ping TEST [ FAIL ]" - echo "Expect to send and receive 10 packets and no duplicates." - echo "Full message: ${OUT}." - ret=1 - return 1 - fi - - return 0 -} - -stop_if_error() -{ - local msg="$1" - - if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" 1>&2 - exit ${ret} - fi -} - do_complete_ping_test() { echo "INFO: Initial validation ping." @@ -248,27 +166,13 @@ setup_hsr_interfaces() ip -net "$ns3" link set hsr3 up } -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -trap cleanup EXIT +check_prerequisites +setup_ns ns1 ns2 ns3 -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +trap cleanup_all_ns EXIT setup_hsr_interfaces 0 do_complete_ping_test -cleanup - -for i in "$ns1" "$ns2" "$ns3" ;do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done setup_hsr_interfaces 1 do_complete_ping_test diff --git a/tools/testing/selftests/net/hsr/hsr_redbox.sh b/tools/testing/selftests/net/hsr/hsr_redbox.sh new file mode 100755 index 000000000000..1f36785347c0 --- /dev/null +++ b/tools/testing/selftests/net/hsr/hsr_redbox.sh @@ -0,0 +1,121 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +ipv6=false + +source ./hsr_common.sh + +do_complete_ping_test() +{ + echo "INFO: Initial validation ping (HSR-SAN/RedBox)." + # Each node has to be able to reach each one. + do_ping "${ns1}" 100.64.0.2 + do_ping "${ns2}" 100.64.0.1 + # Ping between SANs (test bridge) + do_ping "${ns4}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.41 + # Ping from SANs to hsr1 (via hsr2) (and opposite) + do_ping "${ns3}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.3 + do_ping "${ns1}" 100.64.0.41 + do_ping "${ns4}" 100.64.0.1 + do_ping "${ns1}" 100.64.0.51 + do_ping "${ns5}" 100.64.0.1 + stop_if_error "Initial validation failed." + + # Wait for MGNT HSR frames being received and nodes being + # merged. + sleep 5 + + echo "INFO: Longer ping test (HSR-SAN/RedBox)." + # Ping from SAN to hsr1 (via hsr2) + do_ping_long "${ns3}" 100.64.0.1 + # Ping from hsr1 (via hsr2) to SANs (and opposite) + do_ping_long "${ns1}" 100.64.0.3 + do_ping_long "${ns1}" 100.64.0.41 + do_ping_long "${ns4}" 100.64.0.1 + do_ping_long "${ns1}" 100.64.0.51 + do_ping_long "${ns5}" 100.64.0.1 + stop_if_error "Longer ping test failed." + + echo "INFO: All good." +} + +setup_hsr_interfaces() +{ + local HSRv="$1" + + echo "INFO: preparing interfaces for HSRv${HSRv} (HSR-SAN/RedBox)." +# +# IPv4 addresses (100.64.X.Y/24), and [X.Y] is presented on below diagram: +# +# +# |NS1 | |NS4 | +# | [0.1] | | | +# | /-- hsr1 --\ | | [0.41] | +# | ns1eth1 ns1eth2 | | ns4eth1 (SAN) | +# |------------------------| |-------------------| +# | | | +# | | | +# | | | +# |------------------------| |-------------------------------| +# | ns2eth1 ns2eth2 | | ns3eth2 | +# | \-- hsr2 --/ | | / | +# | [0.2] \ | | / | |------------| +# | ns2eth3 |---| ns3eth1 -- ns3br1 -- ns3eth3--|--| ns5eth1 | +# | (interlink)| | [0.3] [0.11] | | [0.51] | +# |NS2 (RedBOX) | |NS3 (BR) | | NS5 (SAN) | +# +# + # Check if iproute2 supports adding interlink port to hsrX device + ip link help hsr | grep -q INTERLINK + [ $? -ne 0 ] && { echo "iproute2: HSR interlink interface not supported!"; exit 0; } + + # Create interfaces for name spaces + ip link add ns1eth1 netns "${ns1}" type veth peer name ns2eth1 netns "${ns2}" + ip link add ns1eth2 netns "${ns1}" type veth peer name ns2eth2 netns "${ns2}" + ip link add ns2eth3 netns "${ns2}" type veth peer name ns3eth1 netns "${ns3}" + ip link add ns3eth2 netns "${ns3}" type veth peer name ns4eth1 netns "${ns4}" + ip link add ns3eth3 netns "${ns3}" type veth peer name ns5eth1 netns "${ns5}" + + sleep 1 + + ip -n "${ns1}" link set ns1eth1 up + ip -n "${ns1}" link set ns1eth2 up + + ip -n "${ns2}" link set ns2eth1 up + ip -n "${ns2}" link set ns2eth2 up + ip -n "${ns2}" link set ns2eth3 up + + ip -n "${ns3}" link add name ns3br1 type bridge + ip -n "${ns3}" link set ns3br1 up + ip -n "${ns3}" link set ns3eth1 master ns3br1 up + ip -n "${ns3}" link set ns3eth2 master ns3br1 up + ip -n "${ns3}" link set ns3eth3 master ns3br1 up + + ip -n "${ns4}" link set ns4eth1 up + ip -n "${ns5}" link set ns5eth1 up + + ip -net "${ns1}" link add name hsr1 type hsr slave1 ns1eth1 slave2 ns1eth2 supervision 45 version ${HSRv} proto 0 + ip -net "${ns2}" link add name hsr2 type hsr slave1 ns2eth1 slave2 ns2eth2 interlink ns2eth3 supervision 45 version ${HSRv} proto 0 + + ip -n "${ns1}" addr add 100.64.0.1/24 dev hsr1 + ip -n "${ns2}" addr add 100.64.0.2/24 dev hsr2 + ip -n "${ns3}" addr add 100.64.0.11/24 dev ns3br1 + ip -n "${ns3}" addr add 100.64.0.3/24 dev ns3eth1 + ip -n "${ns4}" addr add 100.64.0.41/24 dev ns4eth1 + ip -n "${ns5}" addr add 100.64.0.51/24 dev ns5eth1 + + ip -n "${ns1}" link set hsr1 up + ip -n "${ns2}" link set hsr2 up +} + +check_prerequisites +setup_ns ns1 ns2 ns3 ns4 ns5 + +trap cleanup_all_ns EXIT + +setup_hsr_interfaces 1 +do_complete_ping_test + +exit $ret diff --git a/tools/testing/selftests/net/icmp.sh b/tools/testing/selftests/net/icmp.sh index e4b04cd1644a..824cb0e35eff 100755 --- a/tools/testing/selftests/net/icmp.sh +++ b/tools/testing/selftests/net/icmp.sh @@ -18,8 +18,8 @@ # that address space, so the kernel should substitute the dummy address # 192.0.0.8 defined in RFC7600. -NS1=ns1 -NS2=ns2 +source lib.sh + H1_IP=172.16.0.1/32 H1_IP6=2001:db8:1::1 RT1=172.16.1.0/24 @@ -32,15 +32,13 @@ TMPFILE=$(mktemp) cleanup() { rm -f "$TMPFILE" - ip netns del $NS1 - ip netns del $NS2 + cleanup_ns $NS1 $NS2 } trap cleanup EXIT # Namespaces -ip netns add $NS1 -ip netns add $NS2 +setup_ns NS1 NS2 # Connectivity ip -netns $NS1 link add veth0 type veth peer name veth0 netns $NS2 diff --git a/tools/testing/selftests/net/icmp_redirect.sh b/tools/testing/selftests/net/icmp_redirect.sh index 7b9d6e31b8e7..d6f0e449c029 100755 --- a/tools/testing/selftests/net/icmp_redirect.sh +++ b/tools/testing/selftests/net/icmp_redirect.sh @@ -19,6 +19,7 @@ # Route on r1 changed to go to r2 via eth0. This causes a redirect to be sent # from r1 to h1 telling h1 to use r2 when talking to h2. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -140,11 +141,7 @@ get_linklocal() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } create_vrf() @@ -171,102 +168,99 @@ setup() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns li set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 - ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 - ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 - - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.accept_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.send_redirects=1 + ip netns exec $ns sysctl -q -w net.ipv4.conf.default.rp_filter=0 + ip netns exec $ns sysctl -q -w net.ipv4.conf.all.rp_filter=0 + + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + ip netns exec $ns sysctl -q -w net.ipv6.route.mtu_expires=10 + fi done # # create interconnects # - ip -netns h1 li add eth0 type veth peer name r1h1 - ip -netns h1 li set r1h1 netns r1 name eth0 up + ip -netns $h1 li add eth0 type veth peer name r1h1 + ip -netns $h1 li set r1h1 netns $r1 name eth0 up - ip -netns h1 li add eth1 type veth peer name r2h1 - ip -netns h1 li set r2h1 netns r2 name eth0 up + ip -netns $h1 li add eth1 type veth peer name r2h1 + ip -netns $h1 li set r2h1 netns $r2 name eth0 up - ip -netns h2 li add eth0 type veth peer name r2h2 - ip -netns h2 li set eth0 up - ip -netns h2 li set r2h2 netns r2 name eth2 up + ip -netns $h2 li add eth0 type veth peer name r2h2 + ip -netns $h2 li set eth0 up + ip -netns $h2 li set r2h2 netns $r2 name eth2 up - ip -netns r1 li add eth1 type veth peer name r2r1 - ip -netns r1 li set eth1 up - ip -netns r1 li set r2r1 netns r2 name eth1 up + ip -netns $r1 li add eth1 type veth peer name r2r1 + ip -netns $r1 li set eth1 up + ip -netns $r1 li set r2r1 netns $r2 name eth1 up # # h1 # if [ "${WITH_VRF}" = "yes" ]; then - create_vrf "h1" + create_vrf "$h1" H1_VRF_ARG="vrf ${VRF}" H1_PING_ARG="-I ${VRF}" else H1_VRF_ARG= H1_PING_ARG= fi - ip -netns h1 li add br0 type bridge + ip -netns $h1 li add br0 type bridge if [ "${WITH_VRF}" = "yes" ]; then - ip -netns h1 li set br0 vrf ${VRF} up + ip -netns $h1 li set br0 vrf ${VRF} up else - ip -netns h1 li set br0 up + ip -netns $h1 li set br0 up fi - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 li set eth0 master br0 up - ip -netns h1 li set eth1 master br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 li set eth0 master br0 up + ip -netns $h1 li set eth1 master br0 up # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 ro add default via ${R2_N2_IP} dev eth0 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 -6 ro add default via ${R2_N2_IP6} dev eth0 + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 ro add default via ${R2_N2_IP} dev eth0 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 -6 ro add default via ${R2_N2_IP6} dev eth0 # # r1 # - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_R2_N1_IP}/30 - ip -netns r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_R2_N1_IP}/30 + ip -netns $r1 -6 addr add dev eth1 ${R1_R2_N1_IP6}/126 nodad # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_R1_N1_IP}/30 - ip -netns r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad - ip -netns r2 addr add dev eth2 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_R1_N1_IP}/30 + ip -netns $r2 -6 addr add dev eth1 ${R2_R1_N1_IP6}/126 nodad + ip -netns $r2 addr add dev eth2 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth2 ${R2_N2_IP6}/64 nodad sleep 2 - R1_LLADDR=$(get_linklocal r1 eth0) + R1_LLADDR=$(get_linklocal $r1 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r1's eth0" exit 1 fi log_debug "initial gateway is R1's lladdr = ${R1_LLADDR}" - R2_LLADDR=$(get_linklocal r2 eth0) + R2_LLADDR=$(get_linklocal $r2 eth0) if [ $? -ne 0 ]; then echo "Error: Failed to get link-local address of r2's eth0" exit 1 @@ -278,8 +272,8 @@ change_h2_mtu() { local mtu=$1 - run_cmd ip -netns h2 li set eth0 mtu ${mtu} - run_cmd ip -netns r2 li set eth2 mtu ${mtu} + run_cmd ip -netns $h2 li set eth0 mtu ${mtu} + run_cmd ip -netns $r2 li set eth2 mtu ${mtu} } check_exception() @@ -291,40 +285,40 @@ check_exception() # From 172.16.1.101: icmp_seq=1 Redirect Host(New nexthop: 172.16.1.102) if [ "$VERBOSE" = "1" ]; then echo "Commands to check for exception:" - run_cmd ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} - run_cmd ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} + run_cmd ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} + run_cmd ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} fi if [ -n "${mtu}" ]; then mtu=" mtu ${mtu}" fi if [ "$with_redirect" = "yes" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache <redirected> expires [0-9]*sec${mtu}" elif [ -n "${mtu}" ]; then - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -q "cache expires [0-9]*sec${mtu}" else # want to verify that neither mtu nor redirected appears in # the route get output. The -v will wipe out the cache line # if either are set so the last grep -q will not find a match - ip -netns h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ + ip -netns $h1 ro get ${H1_VRF_ARG} ${H2_N2_IP} | \ grep -E -v 'mtu|redirected' | grep -q "cache" fi log_test $? 0 "IPv4: ${desc}" 0 # No PMTU info for test "redirect" and "mtu exception plus redirect" if [ "$with_redirect" = "yes" ] && [ "$desc" != "redirect exception plus mtu" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${H2_N2_IP6} .*via ${R2_LLADDR} dev br0" elif [ -n "${mtu}" ]; then - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -q "${mtu}" else # IPv6 is a bit harder. First strip out the match if it # contains an mtu exception and then look for the first # gateway - R1's lladdr - ip -netns h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ + ip -netns $h1 -6 ro get ${H1_VRF_ARG} ${H2_N2_IP6} | \ grep -v "mtu" | grep -q "${R1_LLADDR}" fi log_test $? 0 "IPv6: ${desc}" 1 @@ -334,21 +328,21 @@ run_ping() { local sz=$1 - run_cmd ip netns exec h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} - run_cmd ip netns exec h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ping -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ${ping6} -q -M want -i 0.5 -c 10 -w 2 -s ${sz} ${H1_PING_ARG} ${H2_N2_IP6} } replace_route_new() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 1 via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 nexthop replace id 2 via ${R2_LLADDR} dev eth0 } reset_route_new() { - run_cmd ip -netns r1 nexthop flush - run_cmd ip -netns h1 nexthop flush + run_cmd ip -netns $r1 nexthop flush + run_cmd ip -netns $h1 nexthop flush initial_route_new } @@ -356,34 +350,34 @@ reset_route_new() initial_route_new() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 ro add ${H2_N2} nhid 1 + run_cmd ip -netns $r1 nexthop add id 1 via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} nhid 1 - run_cmd ip -netns r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} nhid 2 + run_cmd ip -netns $r1 nexthop add id 2 via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} nhid 2 # h1 to h2 via r1 - run_cmd ip -netns h1 nexthop add id 1 via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 + run_cmd ip -netns $h1 nexthop add id 1 via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} nhid 1 - run_cmd ip -netns h1 nexthop add id 2 via ${R1_LLADDR} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 + run_cmd ip -netns $h1 nexthop add id 2 via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} nhid 2 } replace_route_legacy() { # r1 to h2 via r2 and eth0 - run_cmd ip -netns r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 - run_cmd ip -netns r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 + run_cmd ip -netns $r1 ro replace ${H2_N2} via ${R2_N1_IP} dev eth0 + run_cmd ip -netns $r1 -6 ro replace ${H2_N2_6} via ${R2_LLADDR} dev eth0 } reset_route_legacy() { - run_cmd ip -netns r1 ro del ${H2_N2} - run_cmd ip -netns r1 -6 ro del ${H2_N2_6} + run_cmd ip -netns $r1 ro del ${H2_N2} + run_cmd ip -netns $r1 -6 ro del ${H2_N2_6} - run_cmd ip -netns h1 ro del ${H1_VRF_ARG} ${H2_N2} - run_cmd ip -netns h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} + run_cmd ip -netns $h1 ro del ${H1_VRF_ARG} ${H2_N2} + run_cmd ip -netns $h1 -6 ro del ${H1_VRF_ARG} ${H2_N2_6} initial_route_legacy } @@ -391,22 +385,22 @@ reset_route_legacy() initial_route_legacy() { # r1 to h2 via r2 and eth1 - run_cmd ip -netns r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 - run_cmd ip -netns r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 + run_cmd ip -netns $r1 ro add ${H2_N2} via ${R2_R1_N1_IP} dev eth1 + run_cmd ip -netns $r1 -6 ro add ${H2_N2_6} via ${R2_R1_N1_IP6} dev eth1 # h1 to h2 via r1 # - IPv6 redirect only works if gateway is the LLA - run_cmd ip -netns h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 - run_cmd ip -netns h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 + run_cmd ip -netns $h1 ro add ${H1_VRF_ARG} ${H2_N2} via ${R1_N1_IP} dev br0 + run_cmd ip -netns $h1 -6 ro add ${H1_VRF_ARG} ${H2_N2_6} via ${R1_LLADDR} dev br0 } check_connectivity() { local rc - run_cmd ip netns exec h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} + run_cmd ip netns exec $h1 ping -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP} rc=$? - run_cmd ip netns exec h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} + run_cmd ip netns exec $h1 ${ping6} -c1 -w1 ${H1_PING_ARG} ${H2_N2_IP6} [ $? -ne 0 ] && rc=$? return $rc diff --git a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh index 9ac4456d48fc..123439545013 100755 --- a/tools/testing/selftests/net/io_uring_zerocopy_tx.sh +++ b/tools/testing/selftests/net/io_uring_zerocopy_tx.sh @@ -76,23 +76,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/ioam6.sh b/tools/testing/selftests/net/ioam6.sh index 4ceb401da1bf..12491850ae98 100755 --- a/tools/testing/selftests/net/ioam6.sh +++ b/tools/testing/selftests/net/ioam6.sh @@ -117,8 +117,7 @@ # | Schema Data | | # +-----------------------------------------------------------+ -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ################################################################################ # # @@ -195,32 +194,32 @@ TESTS_GLOBAL=" check_kernel_compatibility() { - ip netns add ioam-tmp-node - ip link add name veth0 netns ioam-tmp-node type veth \ - peer name veth1 netns ioam-tmp-node + setup_ns ioam_tmp_node + ip link add name veth0 netns $ioam_tmp_node type veth \ + peer name veth1 netns $ioam_tmp_node - ip -netns ioam-tmp-node link set veth0 up - ip -netns ioam-tmp-node link set veth1 up + ip -netns $ioam_tmp_node link set veth0 up + ip -netns $ioam_tmp_node link set veth1 up - ip -netns ioam-tmp-node ioam namespace add 0 + ip -netns $ioam_tmp_node ioam namespace add 0 ns_ad=$? - ip -netns ioam-tmp-node ioam namespace show | grep -q "namespace 0" + ip -netns $ioam_tmp_node ioam namespace show | grep -q "namespace 0" ns_sh=$? if [[ $ns_ad != 0 || $ns_sh != 0 ]] then echo "SKIP: kernel version probably too old, missing ioam support" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi - ip -netns ioam-tmp-node route add db02::/64 encap ioam6 mode inline \ + ip -netns $ioam_tmp_node route add db02::/64 encap ioam6 mode inline \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 tr_ad=$? - ip -netns ioam-tmp-node -6 route | grep -q "encap ioam6" + ip -netns $ioam_tmp_node -6 route | grep -q "encap ioam6" tr_sh=$? if [[ $tr_ad != 0 || $tr_sh != 0 ]] @@ -228,12 +227,12 @@ check_kernel_compatibility() echo "SKIP: cannot attach an ioam trace to a route, did you compile" \ "without CONFIG_IPV6_IOAM6_LWTUNNEL?" ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true exit $ksft_skip fi ip link del veth0 2>/dev/null || true - ip netns del ioam-tmp-node || true + cleanup_ns $ioam_tmp_node || true lsmod | grep -q "ip6_tunnel" ip6tnl_loaded=$? @@ -265,9 +264,7 @@ cleanup() ip link del ioam-veth-alpha 2>/dev/null || true ip link del ioam-veth-gamma 2>/dev/null || true - ip netns del ioam-node-alpha || true - ip netns del ioam-node-beta || true - ip netns del ioam-node-gamma || true + cleanup_ns $ioam_node_alpha $ioam_node_beta $ioam_node_gamma || true if [ $ip6tnl_loaded != 0 ] then @@ -277,69 +274,67 @@ cleanup() setup() { - ip netns add ioam-node-alpha - ip netns add ioam-node-beta - ip netns add ioam-node-gamma - - ip link add name ioam-veth-alpha netns ioam-node-alpha type veth \ - peer name ioam-veth-betaL netns ioam-node-beta - ip link add name ioam-veth-betaR netns ioam-node-beta type veth \ - peer name ioam-veth-gamma netns ioam-node-gamma - - ip -netns ioam-node-alpha link set ioam-veth-alpha name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaL name veth0 - ip -netns ioam-node-beta link set ioam-veth-betaR name veth1 - ip -netns ioam-node-gamma link set ioam-veth-gamma name veth0 - - ip -netns ioam-node-alpha addr add db01::2/64 dev veth0 - ip -netns ioam-node-alpha link set veth0 up - ip -netns ioam-node-alpha link set lo up - ip -netns ioam-node-alpha route add db02::/64 via db01::1 dev veth0 - ip -netns ioam-node-alpha route del db01::/64 - ip -netns ioam-node-alpha route add db01::/64 dev veth0 - - ip -netns ioam-node-beta addr add db01::1/64 dev veth0 - ip -netns ioam-node-beta addr add db02::1/64 dev veth1 - ip -netns ioam-node-beta link set veth0 up - ip -netns ioam-node-beta link set veth1 up - ip -netns ioam-node-beta link set lo up - - ip -netns ioam-node-gamma addr add db02::2/64 dev veth0 - ip -netns ioam-node-gamma link set veth0 up - ip -netns ioam-node-gamma link set lo up - ip -netns ioam-node-gamma route add db01::/64 via db02::1 dev veth0 + setup_ns ioam_node_alpha ioam_node_beta ioam_node_gamma + + ip link add name ioam-veth-alpha netns $ioam_node_alpha type veth \ + peer name ioam-veth-betaL netns $ioam_node_beta + ip link add name ioam-veth-betaR netns $ioam_node_beta type veth \ + peer name ioam-veth-gamma netns $ioam_node_gamma + + ip -netns $ioam_node_alpha link set ioam-veth-alpha name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaL name veth0 + ip -netns $ioam_node_beta link set ioam-veth-betaR name veth1 + ip -netns $ioam_node_gamma link set ioam-veth-gamma name veth0 + + ip -netns $ioam_node_alpha addr add db01::2/64 dev veth0 + ip -netns $ioam_node_alpha link set veth0 up + ip -netns $ioam_node_alpha link set lo up + ip -netns $ioam_node_alpha route add db02::/64 via db01::1 dev veth0 + ip -netns $ioam_node_alpha route del db01::/64 + ip -netns $ioam_node_alpha route add db01::/64 dev veth0 + + ip -netns $ioam_node_beta addr add db01::1/64 dev veth0 + ip -netns $ioam_node_beta addr add db02::1/64 dev veth1 + ip -netns $ioam_node_beta link set veth0 up + ip -netns $ioam_node_beta link set veth1 up + ip -netns $ioam_node_beta link set lo up + + ip -netns $ioam_node_gamma addr add db02::2/64 dev veth0 + ip -netns $ioam_node_gamma link set veth0 up + ip -netns $ioam_node_gamma link set lo up + ip -netns $ioam_node_gamma route add db01::/64 via db02::1 dev veth0 # - IOAM config - - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} - ip netns exec ioam-node-alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} - ip -netns ioam-node-alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.all.forwarding=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} - ip -netns ioam-node-beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} - ip -netns ioam-node-beta ioam schema add ${BETA[8]} "${BETA[9]}" - ip -netns ioam-node-beta ioam namespace set 123 schema ${BETA[8]} - - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} - ip netns exec ioam-node-gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} - ip -netns ioam-node-gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id=${ALPHA[0]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.ioam6_id_wide=${ALPHA[1]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id=${ALPHA[4]} + ip netns exec $ioam_node_alpha sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${ALPHA[5]} + ip -netns $ioam_node_alpha ioam namespace add 123 data ${ALPHA[6]} wide ${ALPHA[7]} + ip -netns $ioam_node_alpha ioam schema add ${ALPHA[8]} "${ALPHA[9]}" + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.all.forwarding=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id=${BETA[0]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.ioam6_id_wide=${BETA[1]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id=${BETA[2]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${BETA[3]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id=${BETA[4]} + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth1.ioam6_id_wide=${BETA[5]} + ip -netns $ioam_node_beta ioam namespace add 123 data ${BETA[6]} wide ${BETA[7]} + ip -netns $ioam_node_beta ioam schema add ${BETA[8]} "${BETA[9]}" + ip -netns $ioam_node_beta ioam namespace set 123 schema ${BETA[8]} + + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id=${GAMMA[0]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.ioam6_id_wide=${GAMMA[1]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id=${GAMMA[2]} + ip netns exec $ioam_node_gamma sysctl -wq net.ipv6.conf.veth0.ioam6_id_wide=${GAMMA[3]} + ip -netns $ioam_node_gamma ioam namespace add 123 data ${GAMMA[6]} wide ${GAMMA[7]} sleep 1 - ip netns exec ioam-node-alpha ping6 -c 5 -W 1 db02::2 &>/dev/null + ip netns exec $ioam_node_alpha ping6 -c 5 -W 1 db02::2 &>/dev/null if [ $? != 0 ] then echo "Setup FAILED" @@ -372,14 +367,12 @@ run_test() local desc=$2 local node_src=$3 local node_dst=$4 - local ip6_src=$5 - local ip6_dst=$6 - local if_dst=$7 - local trace_type=$8 - local ioam_ns=$9 - - ip netns exec $node_dst ./ioam6_parser $if_dst $name $ip6_src $ip6_dst \ - $trace_type $ioam_ns & + local ip6_dst=$5 + local trace_type=$6 + local ioam_ns=$7 + local type=$8 + + ip netns exec $node_dst ./ioam6_parser $name $trace_type $ioam_ns $type & local spid=$! sleep 0.1 @@ -412,7 +405,7 @@ run() echo # set OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=0 for t in $TESTS_OUTPUT do @@ -421,8 +414,8 @@ run() done # clean OUTPUT settings - ip netns exec ioam-node-beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip netns exec $ioam_node_beta sysctl -wq net.ipv6.conf.veth0.ioam6_enabled=1 + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo @@ -433,7 +426,7 @@ run() echo # set INPUT settings - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 for t in $TESTS_INPUT do @@ -442,10 +435,10 @@ run() done # clean INPUT settings - ip -netns ioam-node-alpha ioam namespace add 123 \ + ip -netns $ioam_node_alpha ioam namespace add 123 \ data ${ALPHA[6]} wide ${ALPHA[7]} - ip -netns ioam-node-alpha ioam namespace set 123 schema ${ALPHA[8]} - ip -netns ioam-node-alpha route change db01::/64 dev veth0 + ip -netns $ioam_node_alpha ioam namespace set 123 schema ${ALPHA[8]} + ip -netns $ioam_node_alpha route change db01::/64 dev veth0 echo printf "%0.s-" {1..74} @@ -488,15 +481,15 @@ out_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_no_room() @@ -508,15 +501,15 @@ out_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } out_bits() @@ -532,11 +525,11 @@ out_bits() bit2size[22]=$(( $tmp + ${#ALPHA[9]} + ((4 - (${#ALPHA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 &>/dev/null @@ -548,18 +541,18 @@ out_bits() if [ $cmd_res != 0 ] then npassed=$((npassed+1)) - log_test_passed "$descr" + log_test_passed "$descr ($1 mode)" else nfailed=$((nfailed+1)) - log_test_failed "$descr" + log_test_failed "$descr ($1 mode)" fi else - run_test "out_bit$i" "$descr ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "out_bit$i" "$descr ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 fi done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -573,15 +566,15 @@ out_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 100 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -603,15 +596,15 @@ in_undef_ns() local desc="Unknown IOAM namespace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0x800000 ns 0 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0x800000 0 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0x800000 0 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_no_room() @@ -623,15 +616,15 @@ in_no_room() local desc="Missing trace room" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } in_bits() @@ -647,19 +640,19 @@ in_bits() bit2size[22]=$(( $tmp + ${#BETA[9]} + ((4 - (${#BETA[9]} % 4)) % 4) )) [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up for i in {0..11} {22..22} do - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type ${bit2type[$i]} ns 123 size ${bit2size[$i]} \ dev veth0 - run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" ioam-node-alpha \ - ioam-node-beta db01::2 db01::1 veth0 ${bit2type[$i]} 123 + run_test "in_bit$i" "${desc/<n>/$i} ($1 mode)" $ioam_node_alpha \ + $ioam_node_beta db01::1 ${bit2type[$i]} 123 $1 done - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down bit2size[22]=$tmp } @@ -675,22 +668,22 @@ in_oflag() # Exception: # Here, we need the sender to set the Overflow flag. For that, we will add # back the IOAM namespace that was previously configured on the sender. - ip -netns ioam-node-alpha ioam namespace add 123 + ip -netns $ioam_node_alpha ioam namespace add 123 [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xc00000 ns 123 size 4 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xc00000 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xc00000 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down # And we clean the exception for this test to get things back to normal for # other INPUT tests - ip -netns ioam-node-alpha ioam namespace del 123 + ip -netns $ioam_node_alpha ioam namespace del 123 } in_full_supp_trace() @@ -702,15 +695,15 @@ in_full_supp_trace() local desc="Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db01::1" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 up - ip -netns ioam-node-alpha route change db01::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db01::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 80 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-beta \ - db01::2 db01::1 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_beta \ + db01::1 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-beta link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_beta link set ip6tnl0 down } @@ -730,15 +723,15 @@ fwd_full_supp_trace() local desc="Forward - Full supported trace" [ "$1" = "encap" ] && mode="$1 tundst db02::2" || mode="$1" - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 up + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 up - ip -netns ioam-node-alpha route change db02::/64 encap ioam6 mode $mode \ + ip -netns $ioam_node_alpha route change db02::/64 encap ioam6 mode $mode \ trace prealloc type 0xfff002 ns 123 size 244 via db01::1 dev veth0 - run_test ${FUNCNAME[0]} "${desc} ($1 mode)" ioam-node-alpha ioam-node-gamma \ - db01::2 db02::2 veth0 0xfff002 123 + run_test ${FUNCNAME[0]} "${desc} ($1 mode)" $ioam_node_alpha $ioam_node_gamma \ + db02::2 0xfff002 123 $1 - [ "$1" = "encap" ] && ip -netns ioam-node-gamma link set ip6tnl0 down + [ "$1" = "encap" ] && ip -netns $ioam_node_gamma link set ip6tnl0 down } diff --git a/tools/testing/selftests/net/ioam6_parser.c b/tools/testing/selftests/net/ioam6_parser.c index d9d1d4190126..895e5bb5044b 100644 --- a/tools/testing/selftests/net/ioam6_parser.c +++ b/tools/testing/selftests/net/ioam6_parser.c @@ -8,7 +8,6 @@ #include <errno.h> #include <limits.h> #include <linux/const.h> -#include <linux/if_ether.h> #include <linux/ioam6.h> #include <linux/ipv6.h> #include <stdlib.h> @@ -512,14 +511,6 @@ static int str2id(const char *tname) return -1; } -static int ipv6_addr_equal(const struct in6_addr *a1, const struct in6_addr *a2) -{ - return ((a1->s6_addr32[0] ^ a2->s6_addr32[0]) | - (a1->s6_addr32[1] ^ a2->s6_addr32[1]) | - (a1->s6_addr32[2] ^ a2->s6_addr32[2]) | - (a1->s6_addr32[3] ^ a2->s6_addr32[3])) == 0; -} - static int get_u32(__u32 *val, const char *arg, int base) { unsigned long res; @@ -603,70 +594,80 @@ static int (*func[__TEST_MAX])(int, struct ioam6_trace_hdr *, __u32, __u16) = { int main(int argc, char **argv) { - int fd, size, hoplen, tid, ret = 1; - struct in6_addr src, dst; + int fd, size, hoplen, tid, ret = 1, on = 1; struct ioam6_hdr *opt; - struct ipv6hdr *ip6h; - __u8 buffer[400], *p; - __u16 ioam_ns; + struct cmsghdr *cmsg; + struct msghdr msg; + struct iovec iov; + __u8 buffer[512]; __u32 tr_type; + __u16 ioam_ns; + __u8 *ptr; - if (argc != 7) + if (argc != 5) goto out; - tid = str2id(argv[2]); + tid = str2id(argv[1]); if (tid < 0 || !func[tid]) goto out; - if (inet_pton(AF_INET6, argv[3], &src) != 1 || - inet_pton(AF_INET6, argv[4], &dst) != 1) + if (get_u32(&tr_type, argv[2], 16) || + get_u16(&ioam_ns, argv[3], 0)) goto out; - if (get_u32(&tr_type, argv[5], 16) || - get_u16(&ioam_ns, argv[6], 0)) + fd = socket(PF_INET6, SOCK_RAW, + !strcmp(argv[4], "encap") ? IPPROTO_IPV6 : IPPROTO_ICMPV6); + if (fd < 0) goto out; - fd = socket(AF_PACKET, SOCK_DGRAM, __cpu_to_be16(ETH_P_IPV6)); - if (!fd) - goto out; + setsockopt(fd, IPPROTO_IPV6, IPV6_RECVHOPOPTS, &on, sizeof(on)); - if (setsockopt(fd, SOL_SOCKET, SO_BINDTODEVICE, - argv[1], strlen(argv[1]))) + iov.iov_len = 1; + iov.iov_base = malloc(CMSG_SPACE(sizeof(buffer))); + if (!iov.iov_base) goto close; - recv: - size = recv(fd, buffer, sizeof(buffer), 0); + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + msg.msg_control = buffer; + msg.msg_controllen = CMSG_SPACE(sizeof(buffer)); + + size = recvmsg(fd, &msg, 0); if (size <= 0) goto close; - ip6h = (struct ipv6hdr *)buffer; + for (cmsg = CMSG_FIRSTHDR(&msg); cmsg; cmsg = CMSG_NXTHDR(&msg, cmsg)) { + if (cmsg->cmsg_level != IPPROTO_IPV6 || + cmsg->cmsg_type != IPV6_HOPOPTS || + cmsg->cmsg_len < sizeof(struct ipv6_hopopt_hdr)) + continue; - if (!ipv6_addr_equal(&ip6h->saddr, &src) || - !ipv6_addr_equal(&ip6h->daddr, &dst)) - goto recv; + ptr = (__u8 *)CMSG_DATA(cmsg); - if (ip6h->nexthdr != IPPROTO_HOPOPTS) - goto close; + hoplen = (ptr[1] + 1) << 3; + ptr += sizeof(struct ipv6_hopopt_hdr); - p = buffer + sizeof(*ip6h); - hoplen = (p[1] + 1) << 3; - p += sizeof(struct ipv6_hopopt_hdr); + while (hoplen > 0) { + opt = (struct ioam6_hdr *)ptr; - while (hoplen > 0) { - opt = (struct ioam6_hdr *)p; + if (opt->opt_type == IPV6_TLV_IOAM && + opt->type == IOAM6_TYPE_PREALLOC) { + ptr += sizeof(*opt); + ret = func[tid](tid, + (struct ioam6_trace_hdr *)ptr, + tr_type, ioam_ns); + goto close; + } - if (opt->opt_type == IPV6_TLV_IOAM && - opt->type == IOAM6_TYPE_PREALLOC) { - p += sizeof(*opt); - ret = func[tid](tid, (struct ioam6_trace_hdr *)p, - tr_type, ioam_ns); - break; + ptr += opt->opt_len + 2; + hoplen -= opt->opt_len + 2; } - - p += opt->opt_len + 2; - hoplen -= opt->opt_len + 2; } + + goto recv; close: + free(iov.iov_base); close(fd); out: return ret; diff --git a/tools/testing/selftests/net/ip_local_port_range.c b/tools/testing/selftests/net/ip_local_port_range.c index 75e3fdacdf73..29451d2244b7 100644 --- a/tools/testing/selftests/net/ip_local_port_range.c +++ b/tools/testing/selftests/net/ip_local_port_range.c @@ -16,6 +16,10 @@ #define IP_LOCAL_PORT_RANGE 51 #endif +#ifndef IPPROTO_MPTCP +#define IPPROTO_MPTCP 262 +#endif + static __u32 pack_port_range(__u16 lo, __u16 hi) { return (hi << 16) | (lo << 0); @@ -146,6 +150,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip4_mptcp) { + .so_domain = AF_INET, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_tcp) { .so_domain = AF_INET6, .so_type = SOCK_STREAM, @@ -164,6 +174,12 @@ FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_stcp) { .so_protocol = IPPROTO_SCTP, }; +FIXTURE_VARIANT_ADD(ip_local_port_range, ip6_mptcp) { + .so_domain = AF_INET6, + .so_type = SOCK_STREAM, + .so_protocol = IPPROTO_MPTCP, +}; + TEST_F(ip_local_port_range, invalid_option_value) { __u16 val16; @@ -343,15 +359,12 @@ TEST_F(ip_local_port_range, late_bind) struct sockaddr_in v4; struct sockaddr_in6 v6; } addr; - socklen_t addr_len; + socklen_t addr_len = 0; const int one = 1; int fd, err; __u32 range; __u16 port; - if (variant->so_protocol == IPPROTO_SCTP) - SKIP(return, "SCTP doesn't support IP_BIND_ADDRESS_NO_PORT"); - fd = socket(variant->so_domain, variant->so_type, 0); ASSERT_GE(fd, 0) TH_LOG("socket failed"); @@ -398,6 +411,9 @@ TEST_F(ip_local_port_range, late_bind) ASSERT_TRUE(!err) TH_LOG("close failed"); } +XFAIL_ADD(ip_local_port_range, ip4_stcp, late_bind); +XFAIL_ADD(ip_local_port_range, ip6_stcp, late_bind); + TEST_F(ip_local_port_range, get_port_range) { __u16 lo, hi; diff --git a/tools/testing/selftests/net/ipsec.c b/tools/testing/selftests/net/ipsec.c index 9a8229abfa02..be4a30a0d02a 100644 --- a/tools/testing/selftests/net/ipsec.c +++ b/tools/testing/selftests/net/ipsec.c @@ -2263,7 +2263,7 @@ static int check_results(void) int main(int argc, char **argv) { - unsigned int nr_process = 1; + long nr_process = 1; int route_sock = -1, ret = KSFT_SKIP; int test_desc_fd[2]; uint32_t route_seq; @@ -2284,7 +2284,7 @@ int main(int argc, char **argv) exit_usage(argv); } - if (nr_process > MAX_PROCESSES || !nr_process) { + if (nr_process > MAX_PROCESSES || nr_process < 1) { printk("nr_process should be between [1; %u]", MAX_PROCESSES); exit_usage(argv); diff --git a/tools/testing/selftests/net/l2tp.sh b/tools/testing/selftests/net/l2tp.sh index 5782433886fc..88de7166c8ae 100755 --- a/tools/testing/selftests/net/l2tp.sh +++ b/tools/testing/selftests/net/l2tp.sh @@ -13,6 +13,7 @@ # 10.1.1.1 | | 10.1.2.1 # 2001:db8:1::1 | | 2001:db8:2::1 +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -80,9 +81,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -133,12 +131,7 @@ connect_ns() cleanup() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $host_1 $host_2 $router } setup_l2tp_ipv4() @@ -146,28 +139,28 @@ setup_l2tp_ipv4() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1041 peer_tunnel_id 1042 \ encap ip local 10.1.1.1 remote 10.1.2.1 - ip -netns host-1 l2tp add session name l2tp4 tunnel_id 1041 \ + ip -netns $host_1 l2tp add session name l2tp4 tunnel_id 1041 \ session_id 1041 peer_session_id 1042 - ip -netns host-1 link set dev l2tp4 up - ip -netns host-1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 + ip -netns $host_1 link set dev l2tp4 up + ip -netns $host_1 addr add dev l2tp4 172.16.1.1 peer 172.16.1.2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1042 peer_tunnel_id 1041 \ encap ip local 10.1.2.1 remote 10.1.1.1 - ip -netns host-2 l2tp add session name l2tp4 tunnel_id 1042 \ + ip -netns $host_2 l2tp add session name l2tp4 tunnel_id 1042 \ session_id 1042 peer_session_id 1041 - ip -netns host-2 link set dev l2tp4 up - ip -netns host-2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 + ip -netns $host_2 link set dev l2tp4 up + ip -netns $host_2 addr add dev l2tp4 172.16.1.2 peer 172.16.1.1 # # add routes to loopback addresses # - ip -netns host-1 ro add 172.16.101.2/32 via 172.16.1.2 - ip -netns host-2 ro add 172.16.101.1/32 via 172.16.1.1 + ip -netns $host_1 ro add 172.16.101.2/32 via 172.16.1.2 + ip -netns $host_2 ro add 172.16.101.1/32 via 172.16.1.1 } setup_l2tp_ipv6() @@ -175,28 +168,28 @@ setup_l2tp_ipv6() # # configure l2tpv3 tunnel on host-1 # - ip -netns host-1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ + ip -netns $host_1 l2tp add tunnel tunnel_id 1061 peer_tunnel_id 1062 \ encap ip local 2001:db8:1::1 remote 2001:db8:2::1 - ip -netns host-1 l2tp add session name l2tp6 tunnel_id 1061 \ + ip -netns $host_1 l2tp add session name l2tp6 tunnel_id 1061 \ session_id 1061 peer_session_id 1062 - ip -netns host-1 link set dev l2tp6 up - ip -netns host-1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 + ip -netns $host_1 link set dev l2tp6 up + ip -netns $host_1 addr add dev l2tp6 fc00:1::1 peer fc00:1::2 # # configure l2tpv3 tunnel on host-2 # - ip -netns host-2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ + ip -netns $host_2 l2tp add tunnel tunnel_id 1062 peer_tunnel_id 1061 \ encap ip local 2001:db8:2::1 remote 2001:db8:1::1 - ip -netns host-2 l2tp add session name l2tp6 tunnel_id 1062 \ + ip -netns $host_2 l2tp add session name l2tp6 tunnel_id 1062 \ session_id 1062 peer_session_id 1061 - ip -netns host-2 link set dev l2tp6 up - ip -netns host-2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 + ip -netns $host_2 link set dev l2tp6 up + ip -netns $host_2 addr add dev l2tp6 fc00:1::2 peer fc00:1::1 # # add routes to loopback addresses # - ip -netns host-1 -6 ro add fc00:101::2/128 via fc00:1::2 - ip -netns host-2 -6 ro add fc00:101::1/128 via fc00:1::1 + ip -netns $host_1 -6 ro add fc00:101::2/128 via fc00:1::2 + ip -netns $host_2 -6 ro add fc00:101::1/128 via fc00:1::1 } setup() @@ -205,21 +198,22 @@ setup() cleanup set -e - create_ns host-1 172.16.101.1/32 fc00:101::1/128 - create_ns host-2 172.16.101.2/32 fc00:101::2/128 - create_ns router + setup_ns host_1 host_2 router + create_ns $host_1 172.16.101.1/32 fc00:101::1/128 + create_ns $host_2 172.16.101.2/32 fc00:101::2/128 + create_ns $router - connect_ns host-1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ - router eth1 10.1.1.2/24 2001:db8:1::2/64 + connect_ns $host_1 eth0 10.1.1.1/24 2001:db8:1::1/64 \ + $router eth1 10.1.1.2/24 2001:db8:1::2/64 - connect_ns host-2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ - router eth2 10.1.2.2/24 2001:db8:2::2/64 + connect_ns $host_2 eth0 10.1.2.1/24 2001:db8:2::1/64 \ + $router eth2 10.1.2.2/24 2001:db8:2::2/64 - ip -netns host-1 ro add 10.1.2.0/24 via 10.1.1.2 - ip -netns host-1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 + ip -netns $host_1 ro add 10.1.2.0/24 via 10.1.1.2 + ip -netns $host_1 -6 ro add 2001:db8:2::/64 via 2001:db8:1::2 - ip -netns host-2 ro add 10.1.1.0/24 via 10.1.2.2 - ip -netns host-2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 + ip -netns $host_2 ro add 10.1.1.0/24 via 10.1.2.2 + ip -netns $host_2 -6 ro add 2001:db8:1::/64 via 2001:db8:2::2 setup_l2tp_ipv4 setup_l2tp_ipv6 @@ -231,38 +225,38 @@ setup_ipsec() # # IPv4 # - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip xfrm policy add \ + run_cmd $host_1 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.1.1 dst 10.1.2.1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip xfrm policy add \ + run_cmd $host_2 ip xfrm policy add \ src 10.1.2.1 dst 10.1.1.1 dir out \ tmpl proto esp mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 xfrm state add \ + ip -netns $host_1 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.1.1 dst 10.1.2.1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 xfrm state add \ + ip -netns $host_2 xfrm state add \ src 10.1.2.1 dst 10.1.1.1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -270,38 +264,38 @@ setup_ipsec() # # IPV6 # - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir out \ tmpl proto esp mode transport - run_cmd host-1 ip -6 xfrm policy add \ + run_cmd $host_1 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:1::1 dst 2001:db8:2::1 dir in \ tmpl proto esp mode transport - run_cmd host-2 ip -6 xfrm policy add \ + run_cmd $host_2 ip -6 xfrm policy add \ src 2001:db8:2::1 dst 2001:db8:1::1 dir out \ tmpl proto esp mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-1 -6 xfrm state add \ + ip -netns $host_1 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:1::1 dst 2001:db8:2::1 \ spi 0x1000 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport - ip -netns host-2 -6 xfrm state add \ + ip -netns $host_2 -6 xfrm state add \ src 2001:db8:2::1 dst 2001:db8:1::1 \ spi 0x1001 proto esp aead 'rfc4106(gcm(aes))' \ 0x0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f0f 128 mode transport @@ -309,10 +303,10 @@ setup_ipsec() teardown_ipsec() { - run_cmd host-1 ip xfrm state flush - run_cmd host-1 ip xfrm policy flush - run_cmd host-2 ip xfrm state flush - run_cmd host-2 ip xfrm policy flush + run_cmd $host_1 ip xfrm state flush + run_cmd $host_1 ip xfrm policy flush + run_cmd $host_2 ip xfrm state flush + run_cmd $host_2 ip xfrm policy flush } ################################################################################ @@ -322,16 +316,16 @@ run_ping() { local desc="$1" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel ${desc}" } @@ -344,16 +338,16 @@ run_tests() setup_ipsec run_ping "- with IPsec" - run_cmd host-1 ping -c1 -w1 172.16.1.2 + run_cmd $host_1 ping -c1 -w1 172.16.1.2 log_test $? 0 "IPv4 basic L2TP tunnel ${desc}" - run_cmd host-1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 + run_cmd $host_1 ping -c1 -w1 -I 172.16.101.1 172.16.101.2 log_test $? 0 "IPv4 route through L2TP tunnel ${desc}" - run_cmd host-1 ${ping6} -c1 -w1 fc00:1::2 + run_cmd $host_1 ${ping6} -c1 -w1 fc00:1::2 log_test $? 0 "IPv6 basic L2TP tunnel - with IPsec" - run_cmd host-1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 + run_cmd $host_1 ${ping6} -c1 -w1 -I fc00:101::1 fc00:101::2 log_test $? 0 "IPv6 route through L2TP tunnel - with IPsec" teardown_ipsec diff --git a/tools/testing/selftests/net/lib.sh b/tools/testing/selftests/net/lib.sh new file mode 100644 index 000000000000..c868c0aec121 --- /dev/null +++ b/tools/testing/selftests/net/lib.sh @@ -0,0 +1,150 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +############################################################################## +# Defines + +: "${WAIT_TIMEOUT:=20}" + +BUSYWAIT_TIMEOUT=$((WAIT_TIMEOUT * 1000)) # ms + +# Kselftest framework constants. +ksft_pass=0 +ksft_fail=1 +ksft_xfail=2 +ksft_skip=4 + +# namespace list created by setup_ns +NS_LIST="" + +############################################################################## +# Helpers + +__ksft_status_merge() +{ + local a=$1; shift + local b=$1; shift + local -A weights + local weight=0 + + for i in "$@"; do + weights[$i]=$((weight++)) + done + + if [[ ${weights[$a]} > ${weights[$b]} ]]; then + echo "$a" + return 0 + else + echo "$b" + return 1 + fi +} + +ksft_status_merge() +{ + local a=$1; shift + local b=$1; shift + + __ksft_status_merge "$a" "$b" \ + $ksft_pass $ksft_xfail $ksft_skip $ksft_fail +} + +ksft_exit_status_merge() +{ + local a=$1; shift + local b=$1; shift + + __ksft_status_merge "$a" "$b" \ + $ksft_xfail $ksft_pass $ksft_skip $ksft_fail +} + +loopy_wait() +{ + local sleep_cmd=$1; shift + local timeout_ms=$1; shift + + local start_time="$(date -u +%s%3N)" + while true + do + local out + out=$("$@") + local ret=$? + if ((!ret)); then + echo -n "$out" + return 0 + fi + + local current_time="$(date -u +%s%3N)" + if ((current_time - start_time > timeout_ms)); then + echo -n "$out" + return 1 + fi + + $sleep_cmd + done +} + +busywait() +{ + local timeout_ms=$1; shift + + loopy_wait : "$timeout_ms" "$@" +} + +cleanup_ns() +{ + local ns="" + local errexit=0 + local ret=0 + + # disable errexit temporary + if [[ $- =~ "e" ]]; then + errexit=1 + set +e + fi + + for ns in "$@"; do + ip netns delete "${ns}" &> /dev/null + if ! busywait $BUSYWAIT_TIMEOUT ip netns list \| grep -vq "^$ns$" &> /dev/null; then + echo "Warn: Failed to remove namespace $ns" + ret=1 + fi + done + + [ $errexit -eq 1 ] && set -e + return $ret +} + +cleanup_all_ns() +{ + cleanup_ns $NS_LIST +} + +# setup netns with given names as prefix. e.g +# setup_ns local remote +setup_ns() +{ + local ns="" + local ns_name="" + local ns_list="" + for ns_name in "$@"; do + # Some test may setup/remove same netns multi times + if unset ${ns_name} 2> /dev/null; then + ns="${ns_name,,}-$(mktemp -u XXXXXX)" + eval readonly ${ns_name}="$ns" + else + eval ns='$'${ns_name} + cleanup_ns "$ns" + + fi + + if ! ip netns add "$ns"; then + echo "Failed to create namespace $ns_name" + cleanup_ns "$ns_list" + return $ksft_skip + fi + ip -n "$ns" link set lo up + ns_list="$ns_list $ns" + done + NS_LIST="$NS_LIST $ns_list" +} diff --git a/tools/testing/selftests/net/lib/.gitignore b/tools/testing/selftests/net/lib/.gitignore new file mode 100644 index 000000000000..1ebc6187f421 --- /dev/null +++ b/tools/testing/selftests/net/lib/.gitignore @@ -0,0 +1,2 @@ +# SPDX-License-Identifier: GPL-2.0-only +csum diff --git a/tools/testing/selftests/net/lib/Makefile b/tools/testing/selftests/net/lib/Makefile new file mode 100644 index 000000000000..82c3264b115e --- /dev/null +++ b/tools/testing/selftests/net/lib/Makefile @@ -0,0 +1,15 @@ +# SPDX-License-Identifier: GPL-2.0 + +CFLAGS = -Wall -Wl,--no-as-needed -O2 -g +CFLAGS += -I../../../../../usr/include/ $(KHDR_INCLUDES) +# Additional include paths needed by kselftest.h +CFLAGS += -I../../ + +TEST_FILES := ../../../../../Documentation/netlink/specs +TEST_FILES += ../../../../net/ynl + +TEST_GEN_FILES += csum + +TEST_INCLUDES := $(wildcard py/*.py) + +include ../../lib.mk diff --git a/tools/testing/selftests/net/csum.c b/tools/testing/selftests/net/lib/csum.c index 90eb06fefa59..b9f3fc3c3426 100644 --- a/tools/testing/selftests/net/csum.c +++ b/tools/testing/selftests/net/lib/csum.c @@ -682,7 +682,7 @@ static int recv_verify_packet_ipv6(void *nh, int len) } /* return whether auxdata includes TP_STATUS_CSUM_VALID */ -static bool recv_verify_packet_csum(struct msghdr *msg) +static uint32_t recv_get_packet_csum_status(struct msghdr *msg) { struct tpacket_auxdata *aux = NULL; struct cmsghdr *cm; @@ -706,7 +706,7 @@ static bool recv_verify_packet_csum(struct msghdr *msg) if (!aux) error(1, 0, "cmsg: no auxdata"); - return aux->tp_status & TP_STATUS_CSUM_VALID; + return aux->tp_status; } static int recv_packet(int fd) @@ -716,6 +716,7 @@ static int recv_packet(int fd) char ctrl[CMSG_SPACE(sizeof(struct tpacket_auxdata))]; struct pkt *buf = (void *)_buf; struct msghdr msg = {0}; + uint32_t tp_status; struct iovec iov; int len, ret; @@ -737,6 +738,17 @@ static int recv_packet(int fd) if (len == -1) error(1, errno, "recv p"); + tp_status = recv_get_packet_csum_status(&msg); + + /* GRO might coalesce randomized packets. Such GSO packets are + * then reinitialized for csum offload (CHECKSUM_PARTIAL), with + * a pseudo csum. Do not try to validate these checksums. + */ + if (tp_status & TP_STATUS_CSUMNOTREADY) { + fprintf(stderr, "cmsg: GSO packet has partial csum: skip\n"); + continue; + } + if (cfg_family == PF_INET6) ret = recv_verify_packet_ipv6(buf, len); else @@ -753,7 +765,7 @@ static int recv_packet(int fd) * Do not fail if kernel does not validate a good csum: * Absence of validation does not imply invalid. */ - if (recv_verify_packet_csum(&msg) && cfg_bad_csum) { + if (tp_status & TP_STATUS_CSUM_VALID && cfg_bad_csum) { fprintf(stderr, "cmsg: expected bad csum, pf_packet returns valid\n"); bad_validations++; } diff --git a/tools/testing/selftests/net/lib/py/__init__.py b/tools/testing/selftests/net/lib/py/__init__.py new file mode 100644 index 000000000000..b6d498d125fe --- /dev/null +++ b/tools/testing/selftests/net/lib/py/__init__.py @@ -0,0 +1,8 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .consts import KSRC +from .ksft import * +from .netns import NetNS +from .nsim import * +from .utils import * +from .ynl import NlError, YnlFamily, EthtoolFamily, NetdevFamily, RtnlFamily diff --git a/tools/testing/selftests/net/lib/py/consts.py b/tools/testing/selftests/net/lib/py/consts.py new file mode 100644 index 000000000000..f518ce79d82c --- /dev/null +++ b/tools/testing/selftests/net/lib/py/consts.py @@ -0,0 +1,9 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../..").resolve() +KSRC = (Path(__file__).parent / "../../../../../..").resolve() + +KSFT_MAIN_NAME = Path(sys.argv[0]).with_suffix("").name diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py new file mode 100644 index 000000000000..4769b4eb1ea1 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -0,0 +1,159 @@ +# SPDX-License-Identifier: GPL-2.0 + +import builtins +import inspect +import sys +import time +import traceback +from .consts import KSFT_MAIN_NAME + +KSFT_RESULT = None +KSFT_RESULT_ALL = True + + +class KsftFailEx(Exception): + pass + + +class KsftSkipEx(Exception): + pass + + +class KsftXfailEx(Exception): + pass + + +def ksft_pr(*objs, **kwargs): + print("#", *objs, **kwargs) + + +def _fail(*args): + global KSFT_RESULT + KSFT_RESULT = False + + frame = inspect.stack()[2] + ksft_pr("At " + frame.filename + " line " + str(frame.lineno) + ":") + ksft_pr(*args) + + +def ksft_eq(a, b, comment=""): + global KSFT_RESULT + if a != b: + _fail("Check failed", a, "!=", b, comment) + + +def ksft_true(a, comment=""): + if not a: + _fail("Check failed", a, "does not eval to True", comment) + + +def ksft_in(a, b, comment=""): + if a not in b: + _fail("Check failed", a, "not in", b, comment) + + +def ksft_ge(a, b, comment=""): + if a < b: + _fail("Check failed", a, "<", b, comment) + + +class ksft_raises: + def __init__(self, expected_type): + self.exception = None + self.expected_type = expected_type + + def __enter__(self): + return self + + def __exit__(self, exc_type, exc_val, exc_tb): + if exc_type is None: + _fail(f"Expected exception {str(self.expected_type.__name__)}, none raised") + elif self.expected_type != exc_type: + _fail(f"Expected exception {str(self.expected_type.__name__)}, raised {str(exc_type.__name__)}") + self.exception = exc_val + # Suppress the exception if its the expected one + return self.expected_type == exc_type + + +def ksft_busy_wait(cond, sleep=0.005, deadline=1, comment=""): + end = time.monotonic() + deadline + while True: + if cond(): + return + if time.monotonic() > end: + _fail("Waiting for condition timed out", comment) + return + time.sleep(sleep) + + +def ktap_result(ok, cnt=1, case="", comment=""): + global KSFT_RESULT_ALL + KSFT_RESULT_ALL = KSFT_RESULT_ALL and ok + + res = "" + if not ok: + res += "not " + res += "ok " + res += str(cnt) + " " + res += KSFT_MAIN_NAME + if case: + res += "." + str(case.__name__) + if comment: + res += " # " + comment + print(res) + + +def ksft_run(cases=None, globs=None, case_pfx=None, args=()): + cases = cases or [] + + if globs and case_pfx: + for key, value in globs.items(): + if not callable(value): + continue + for prefix in case_pfx: + if key.startswith(prefix): + cases.append(value) + break + + totals = {"pass": 0, "fail": 0, "skip": 0, "xfail": 0} + + print("KTAP version 1") + print("1.." + str(len(cases))) + + global KSFT_RESULT + cnt = 0 + for case in cases: + KSFT_RESULT = True + cnt += 1 + try: + case(*args) + except KsftSkipEx as e: + ktap_result(True, cnt, case, comment="SKIP " + str(e)) + totals['skip'] += 1 + continue + except KsftXfailEx as e: + ktap_result(True, cnt, case, comment="XFAIL " + str(e)) + totals['xfail'] += 1 + continue + except Exception as e: + tb = traceback.format_exc() + for line in tb.strip().split('\n'): + ksft_pr("Exception|", line) + ktap_result(False, cnt, case) + totals['fail'] += 1 + continue + + ktap_result(KSFT_RESULT, cnt, case) + if KSFT_RESULT: + totals['pass'] += 1 + else: + totals['fail'] += 1 + + print( + f"# Totals: pass:{totals['pass']} fail:{totals['fail']} xfail:{totals['xfail']} xpass:0 skip:{totals['skip']} error:0" + ) + + +def ksft_exit(): + global KSFT_RESULT_ALL + sys.exit(0 if KSFT_RESULT_ALL else 1) diff --git a/tools/testing/selftests/net/lib/py/netns.py b/tools/testing/selftests/net/lib/py/netns.py new file mode 100644 index 000000000000..ecff85f9074f --- /dev/null +++ b/tools/testing/selftests/net/lib/py/netns.py @@ -0,0 +1,31 @@ +# SPDX-License-Identifier: GPL-2.0 + +from .utils import ip +import random +import string + + +class NetNS: + def __init__(self, name=None): + if name: + self.name = name + else: + self.name = ''.join(random.choice(string.ascii_lowercase) for _ in range(8)) + ip('netns add ' + self.name) + + def __del__(self): + if self.name: + ip('netns del ' + self.name) + self.name = None + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + self.__del__() + + def __str__(self): + return self.name + + def __repr__(self): + return f"NetNS({self.name})" diff --git a/tools/testing/selftests/net/lib/py/nsim.py b/tools/testing/selftests/net/lib/py/nsim.py new file mode 100644 index 000000000000..f571a8b3139b --- /dev/null +++ b/tools/testing/selftests/net/lib/py/nsim.py @@ -0,0 +1,134 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json +import os +import random +import re +import time +from .utils import cmd, ip + + +class NetdevSim: + """ + Class for netdevsim netdevice and its attributes. + """ + + def __init__(self, nsimdev, port_index, ifname, ns=None): + # In case udev renamed the netdev to according to new schema, + # check if the name matches the port_index. + nsimnamere = re.compile(r"eni\d+np(\d+)") + match = nsimnamere.match(ifname) + if match and int(match.groups()[0]) != port_index + 1: + raise Exception("netdevice name mismatches the expected one") + + self.ifname = ifname + self.nsimdev = nsimdev + self.port_index = port_index + self.ns = ns + self.dfs_dir = "%s/ports/%u/" % (nsimdev.dfs_dir, port_index) + ret = ip("-j link show dev %s" % ifname, ns=ns) + self.dev = json.loads(ret.stdout)[0] + self.ifindex = self.dev["ifindex"] + + def dfs_write(self, path, val): + self.nsimdev.dfs_write(f'ports/{self.port_index}/' + path, val) + + +class NetdevSimDev: + """ + Class for netdevsim bus device and its attributes. + """ + @staticmethod + def ctrl_write(path, val): + fullpath = os.path.join("/sys/bus/netdevsim/", path) + with open(fullpath, "w") as f: + f.write(val) + + def dfs_write(self, path, val): + fullpath = os.path.join(f"/sys/kernel/debug/netdevsim/netdevsim{self.addr}/", path) + with open(fullpath, "w") as f: + f.write(val) + + def __init__(self, port_count=1, queue_count=1, ns=None): + # nsim will spawn in init_net, we'll set to actual ns once we switch it there + self.ns = None + + if not os.path.exists("/sys/bus/netdevsim"): + cmd("modprobe netdevsim") + + addr = random.randrange(1 << 15) + while True: + try: + self.ctrl_write("new_device", "%u %u %u" % (addr, port_count, queue_count)) + except OSError as e: + if e.errno == errno.ENOSPC: + addr = random.randrange(1 << 15) + continue + raise e + break + self.addr = addr + + # As probe of netdevsim device might happen from a workqueue, + # so wait here until all netdevs appear. + self.wait_for_netdevs(port_count) + + if ns: + cmd(f"devlink dev reload netdevsim/netdevsim{addr} netns {ns.name}") + self.ns = ns + + cmd("udevadm settle", ns=self.ns) + ifnames = self.get_ifnames() + + self.dfs_dir = "/sys/kernel/debug/netdevsim/netdevsim%u/" % addr + + self.nsims = [] + for port_index in range(port_count): + self.nsims.append(self._make_port(port_index, ifnames[port_index])) + + self.removed = False + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + """ + __exit__ gets called at the end of a "with" block. + """ + self.remove() + + def _make_port(self, port_index, ifname): + return NetdevSim(self, port_index, ifname, self.ns) + + def get_ifnames(self): + ifnames = [] + listdir = cmd(f"ls /sys/bus/netdevsim/devices/netdevsim{self.addr}/net/", + ns=self.ns).stdout.split() + for ifname in listdir: + ifnames.append(ifname) + ifnames.sort() + return ifnames + + def wait_for_netdevs(self, port_count): + timeout = 5 + timeout_start = time.time() + + while True: + try: + ifnames = self.get_ifnames() + except FileNotFoundError as e: + ifnames = [] + if len(ifnames) == port_count: + break + if time.time() < timeout_start + timeout: + continue + raise Exception("netdevices did not appear within timeout") + + def remove(self): + if not self.removed: + self.ctrl_write("del_device", "%u" % (self.addr, )) + self.removed = True + + def remove_nsim(self, nsim): + self.nsims.remove(nsim) + self.ctrl_write("devices/netdevsim%u/del_port" % (self.addr, ), + "%u" % (nsim.port_index, )) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py new file mode 100644 index 000000000000..0540ea24921d --- /dev/null +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -0,0 +1,102 @@ +# SPDX-License-Identifier: GPL-2.0 + +import json as _json +import random +import re +import subprocess +import time + + +class cmd: + def __init__(self, comm, shell=True, fail=True, ns=None, background=False, host=None, timeout=5): + if ns: + comm = f'ip netns exec {ns} ' + comm + + self.stdout = None + self.stderr = None + self.ret = None + + self.comm = comm + if host: + self.proc = host.cmd(comm) + else: + self.proc = subprocess.Popen(comm, shell=shell, stdout=subprocess.PIPE, + stderr=subprocess.PIPE) + if not background: + self.process(terminate=False, fail=fail, timeout=timeout) + + def process(self, terminate=True, fail=None, timeout=5): + if fail is None: + fail = not terminate + + if terminate: + self.proc.terminate() + stdout, stderr = self.proc.communicate(timeout) + self.stdout = stdout.decode("utf-8") + self.stderr = stderr.decode("utf-8") + self.proc.stdout.close() + self.proc.stderr.close() + self.ret = self.proc.returncode + + if self.proc.returncode != 0 and fail: + if len(stderr) > 0 and stderr[-1] == "\n": + stderr = stderr[:-1] + raise Exception("Command failed: %s\nSTDOUT: %s\nSTDERR: %s" % + (self.proc.args, stdout, stderr)) + + +class bkg(cmd): + def __init__(self, comm, shell=True, fail=None, ns=None, host=None, + exit_wait=False): + super().__init__(comm, background=True, + shell=shell, fail=fail, ns=ns, host=host) + self.terminate = not exit_wait + self.check_fail = fail + + def __enter__(self): + return self + + def __exit__(self, ex_type, ex_value, ex_tb): + return self.process(terminate=self.terminate, fail=self.check_fail) + + +def tool(name, args, json=None, ns=None, host=None): + cmd_str = name + ' ' + if json: + cmd_str += '--json ' + cmd_str += args + cmd_obj = cmd(cmd_str, ns=ns, host=host) + if json: + return _json.loads(cmd_obj.stdout) + return cmd_obj + + +def ip(args, json=None, ns=None, host=None): + if ns: + args = f'-netns {ns} ' + args + return tool('ip', args, json=json, host=host) + + +def rand_port(): + """ + Get unprivileged port, for now just random, one day we may decide to check if used. + """ + return random.randint(10000, 65535) + + +def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): + end = time.monotonic() + deadline + + pattern = f":{port:04X} .* " + if proto == "tcp": # for tcp protocol additionally check the socket state + pattern += "0A" + pattern = re.compile(pattern) + + while True: + data = cmd(f'cat /proc/net/{proto}*', ns=ns, host=host, shell=True).stdout + for row in data.split("\n"): + if pattern.search(row): + return + if time.monotonic() > end: + raise Exception("Waiting for port listen timed out") + time.sleep(sleep) diff --git a/tools/testing/selftests/net/lib/py/ynl.py b/tools/testing/selftests/net/lib/py/ynl.py new file mode 100644 index 000000000000..1ace58370c06 --- /dev/null +++ b/tools/testing/selftests/net/lib/py/ynl.py @@ -0,0 +1,49 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path +from .consts import KSRC, KSFT_DIR +from .ksft import ksft_pr, ktap_result + +# Resolve paths +try: + if (KSFT_DIR / "kselftest-list.txt").exists(): + # Running in "installed" selftests + tools_full_path = KSFT_DIR + SPEC_PATH = KSFT_DIR / "net/lib/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.lib.ynl.lib import YnlFamily, NlError + else: + # Running in tree + tools_full_path = KSRC / "tools" + SPEC_PATH = KSRC / "Documentation/netlink/specs" + + sys.path.append(tools_full_path.as_posix()) + from net.ynl.lib import YnlFamily, NlError +except ModuleNotFoundError as e: + ksft_pr("Failed importing `ynl` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) + +# +# Wrapper classes, loading the right specs +# Set schema='' to avoid jsonschema validation, it's slow +# +class EthtoolFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('ethtool.yaml')).as_posix(), + schema='') + + +class RtnlFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('rt_link.yaml')).as_posix(), + schema='') + + +class NetdevFamily(YnlFamily): + def __init__(self): + super().__init__((SPEC_PATH / Path('netdev.yaml')).as_posix(), + schema='') diff --git a/tools/testing/selftests/net/mptcp/config b/tools/testing/selftests/net/mptcp/config index e317c2e44dae..4f80014cae49 100644 --- a/tools/testing/selftests/net/mptcp/config +++ b/tools/testing/selftests/net/mptcp/config @@ -22,8 +22,11 @@ CONFIG_NFT_TPROXY=m CONFIG_NFT_SOCKET=m CONFIG_IP_ADVANCED_ROUTER=y CONFIG_IP_MULTIPLE_TABLES=y +CONFIG_IP_NF_FILTER=m +CONFIG_IP_NF_MANGLE=m CONFIG_IP_NF_TARGET_REJECT=m CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IP6_NF_FILTER=m CONFIG_NET_ACT_CSUM=m CONFIG_NET_ACT_PEDIT=m CONFIG_NET_CLS_ACT=y diff --git a/tools/testing/selftests/net/mptcp/diag.sh b/tools/testing/selftests/net/mptcp/diag.sh index 85a8ee9395b3..776d43a6922d 100755 --- a/tools/testing/selftests/net/mptcp/diag.sh +++ b/tools/testing/selftests/net/mptcp/diag.sh @@ -1,14 +1,15 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns="ns1-$rndh" -ksft_skip=4 -test_cnt=1 -timeout_poll=100 +ns="" +timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) ret=0 @@ -20,31 +21,23 @@ flush_pids() ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGUSR1 &>/dev/null - for _ in $(seq 10); do + for _ in $(seq $((timeout_poll * 10))); do [ -z "$(ip netns pids "${ns}")" ] && break sleep 0.1 done } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { ip netns pids "${ns}" | xargs --no-run-if-empty kill -SIGKILL &>/dev/null - ip netns del $ns + mptcp_lib_ns_exit "${ns}" } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi -ss -h | grep -q MPTCP -if [ $? -ne 0 ];then - echo "SKIP: ss tool does not support MPTCP" - exit $ksft_skip -fi +mptcp_lib_check_tools ip ss get_msk_inuse() { @@ -56,26 +49,25 @@ __chk_nr() local command="$1" local expected=$2 local msg="$3" - local skip="${4:-SKIP}" + local skip="${4-SKIP}" local nr nr=$(eval $command) - printf "%-50s" "$msg" - if [ $nr != $expected ]; then - if [ $nr = "$skip" ] && ! mptcp_lib_expect_all_features; then - echo "[ skip ] Feature probably not supported" + mptcp_lib_print_title "$msg" + if [ "$nr" != "$expected" ]; then + if [ "$nr" = "$skip" ] && ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" mptcp_lib_result_skip "${msg}" else - echo "[ fail ] expected $expected found $nr" + mptcp_lib_pr_fail "expected $expected found $nr" mptcp_lib_result_fail "${msg}" - ret=$test_cnt + ret=${KSFT_FAIL} fi else - echo "[ ok ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "${msg}" fi - test_cnt=$((test_cnt+1)) } __chk_msk_nr() @@ -91,6 +83,15 @@ chk_msk_nr() __chk_msk_nr "grep -c token:" "$@" } +chk_listener_nr() +{ + local expected=$1 + local msg="$2" + + __chk_nr "ss -nlHMON $ns | wc -l" "$expected" "$msg - mptcp" 0 + __chk_nr "ss -nlHtON $ns | wc -l" "$expected" "$msg - subflows" +} + wait_msk_nr() { local condition="grep -c token:" @@ -111,20 +112,19 @@ wait_msk_nr() sleep 1 done - printf "%-50s" "$msg" + mptcp_lib_print_title "$msg" if [ $i -ge $timeout ]; then - echo "[ fail ] timeout while expecting $expected max $max last $nr" + mptcp_lib_pr_fail "timeout while expecting $expected max $max last $nr" mptcp_lib_result_fail "${msg} # timeout" - ret=$test_cnt + ret=${KSFT_FAIL} elif [ $nr != $expected ]; then - echo "[ fail ] expected $expected found $nr" + mptcp_lib_pr_fail "expected $expected found $nr" mptcp_lib_result_fail "${msg} # unexpected result" - ret=$test_cnt + ret=${KSFT_FAIL} else - echo "[ ok ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "${msg}" fi - test_cnt=$((test_cnt+1)) } chk_msk_fallback_nr() @@ -166,37 +166,90 @@ chk_msk_listen() chk_msk_inuse() { local expected=$1 - local msg="$2" + local msg="....chk ${2:-${expected}} msk in use" local listen_nr + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi + listen_nr=$(ss -N "${ns}" -Ml | grep -c LISTEN) expected=$((expected + listen_nr)) for _ in $(seq 10); do - if [ $(get_msk_inuse) -eq $expected ];then + if [ "$(get_msk_inuse)" -eq $expected ]; then break fi sleep 0.1 done - __chk_nr get_msk_inuse $expected "$msg" 0 + __chk_nr get_msk_inuse $expected "${msg}" 0 } -# $1: ns, $2: port -wait_local_port_listen() +# $1: cestab nr +chk_msk_cestab() { - local listener_ns="${1}" - local port="${2}" + local expected=$1 + local msg="....chk ${2:-${expected}} cestab" - local port_hex i + if [ "${expected}" -eq 0 ]; then + msg+=" after flush" + fi - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done + __chk_nr "mptcp_lib_get_counter ${ns} MPTcpExtMPCurrEstab" \ + "${expected}" "${msg}" "" +} + +msk_info_get_value() +{ + local port="${1}" + local info="${2}" + + ss -N "${ns}" -inHM dport "${port}" | \ + mptcp_lib_get_info_value "${info}" "${info}" +} + +chk_msk_info() +{ + local port="${1}" + local info="${2}" + local cnt="${3}" + local msg="....chk ${info}" + local delta_ms=250 # half what we waited before, just to be sure + local now + + now=$(msk_info_get_value "${port}" "${info}") + + mptcp_lib_print_title "${msg}" + if { [ -z "${cnt}" ] || [ -z "${now}" ]; } && + ! mptcp_lib_expect_all_features; then + mptcp_lib_pr_skip "Feature probably not supported" + mptcp_lib_result_skip "${msg}" + elif [ "$((cnt + delta_ms))" -lt "${now}" ]; then + mptcp_lib_pr_ok + mptcp_lib_result_pass "${msg}" + else + mptcp_lib_pr_fail "value of ${info} changed by $((now - cnt))ms," \ + "expected at least ${delta_ms}ms" + mptcp_lib_result_fail "${msg}" + ret=${KSFT_FAIL} + fi +} + +chk_last_time_info() +{ + local port="${1}" + local data_sent data_recv ack_recv + + data_sent=$(msk_info_get_value "${port}" "last_data_sent") + data_recv=$(msk_info_get_value "${port}" "last_data_recv") + ack_recv=$(msk_info_get_value "${port}" "last_ack_recv") + + sleep 0.5 # wait to check after if the timestamps difference + + chk_msk_info "${port}" "last_data_sent" "${data_sent}" + chk_msk_info "${port}" "last_data_recv" "${data_recv}" + chk_msk_info "${port}" "last_ack_recv" "${ack_recv}" } wait_connected() @@ -214,15 +267,14 @@ wait_connected() } trap cleanup EXIT -ip netns add $ns -ip -n $ns link set dev lo up +mptcp_lib_ns_init ns echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ ./mptcp_connect -p 10000 -l -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10000 +mptcp_lib_wait_local_port_listen $ns 10000 chk_msk_nr 0 "no msk on netns creation" chk_msk_listen 10000 @@ -233,19 +285,22 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10000 chk_msk_nr 2 "after MPC handshake " +chk_last_time_info 10000 chk_msk_remote_key_nr 2 "....chk remote_key" chk_msk_fallback_nr 0 "....chk no fallback" -chk_msk_inuse 2 "....chk 2 msk in use" +chk_msk_inuse 2 +chk_msk_cestab 2 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "2->0" +chk_msk_cestab 0 "2->0" echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ ./mptcp_connect -p 10001 -l -s TCP -t ${timeout_poll} -w 20 \ 0.0.0.0 >/dev/null & -wait_local_port_listen $ns 10001 +mptcp_lib_wait_local_port_listen $ns 10001 echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -253,22 +308,24 @@ echo "b" | \ 127.0.0.1 >/dev/null & wait_connected $ns 10001 chk_msk_fallback_nr 1 "check fallback" -chk_msk_inuse 1 "....chk 1 msk in use" +chk_msk_inuse 1 +chk_msk_cestab 1 flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "1->0" +chk_msk_cestab 0 "1->0" NR_CLIENTS=100 -for I in `seq 1 $NR_CLIENTS`; do +for I in $(seq 1 $NR_CLIENTS); do echo "a" | \ timeout ${timeout_test} \ ip netns exec $ns \ ./mptcp_connect -p $((I+10001)) -l -w 20 \ -t ${timeout_poll} 0.0.0.0 >/dev/null & done -wait_local_port_listen $ns $((NR_CLIENTS + 10001)) +mptcp_lib_wait_local_port_listen $ns $((NR_CLIENTS + 10001)) -for I in `seq 1 $NR_CLIENTS`; do +for I in $(seq 1 $NR_CLIENTS); do echo "b" | \ timeout ${timeout_test} \ ip netns exec $ns \ @@ -277,10 +334,28 @@ for I in `seq 1 $NR_CLIENTS`; do done wait_msk_nr $((NR_CLIENTS*2)) "many msk socket present" -chk_msk_inuse $((NR_CLIENTS*2)) "....chk many msk in use" +chk_msk_inuse $((NR_CLIENTS*2)) "many" +chk_msk_cestab $((NR_CLIENTS*2)) "many" flush_pids -chk_msk_inuse 0 "....chk 0 msk in use after flush" +chk_msk_inuse 0 "many->0" +chk_msk_cestab 0 "many->0" + +chk_listener_nr 0 "no listener sockets" +NR_SERVERS=100 +for I in $(seq 1 $NR_SERVERS); do + ip netns exec $ns ./mptcp_connect -p $((I + 20001)) \ + -t ${timeout_poll} -l 0.0.0.0 >/dev/null 2>&1 & +done +mptcp_lib_wait_local_port_listen $ns $((NR_SERVERS + 20001)) + +chk_listener_nr $NR_SERVERS "many listener sockets" + +# graceful termination +for I in $(seq 1 $NR_SERVERS); do + echo a | ip netns exec $ns ./mptcp_connect -p $((I + 20001)) 127.0.0.1 >/dev/null 2>&1 & +done +flush_pids mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.c b/tools/testing/selftests/net/mptcp/mptcp_connect.c index c7f9ebeebc2c..d2043ec3bf6d 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.c +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.c @@ -18,6 +18,7 @@ #include <sys/ioctl.h> #include <sys/poll.h> +#include <sys/random.h> #include <sys/sendfile.h> #include <sys/stat.h> #include <sys/socket.h> @@ -1125,15 +1126,11 @@ again: static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_connect.sh b/tools/testing/selftests/net/mptcp/mptcp_connect.sh index b1fc8afd072d..b77fb7065bfb 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_connect.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_connect.sh @@ -1,6 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" time_start=$(date +%s) @@ -13,7 +18,6 @@ sout="" cin_disconnect="" cin="" cout="" -ksft_skip=4 capture=false timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) @@ -29,6 +33,7 @@ do_tcp=0 checksum=false filesize=0 connect_per_transfer=1 +port=$((10000 - 1)) if [ $tc_loss -eq 100 ];then tc_loss=1% @@ -60,14 +65,14 @@ while getopts "$optstring" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "d") if [ $OPTARG -ge 0 ];then tc_delay="$OPTARG" else echo "-d requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "e") @@ -91,7 +96,7 @@ while getopts "$optstring" option;do sndbuf="$OPTARG" else echo "-S requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "R") @@ -99,7 +104,7 @@ while getopts "$optstring" option;do rcvbuf="$OPTARG" else echo "-R requires numeric argument, got \"$OPTARG\"" 1>&2 - exit 1 + exit ${KSFT_FAIL} fi ;; "m") @@ -116,21 +121,20 @@ while getopts "$optstring" option;do ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" -ns4="ns4-$rndh" +ns1="" +ns2="" +ns3="" +ns4="" -TEST_COUNT=0 TEST_GROUP="" +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f "$cin_disconnect" "$cout_disconnect" @@ -138,21 +142,12 @@ cleanup() rm -f "$sin" "$sout" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns del $netns - rm -f /tmp/$netns.{nstat,out} - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" "${ns4}" } mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip tc sin=$(mktemp) sout=$(mktemp) @@ -163,10 +158,7 @@ cin_disconnect="$cin".disconnect cout_disconnect="$cout".disconnect trap cleanup EXIT -for i in "$ns1" "$ns2" "$ns3" "$ns4";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up -done +mptcp_lib_ns_init ns1 ns2 ns3 ns4 # "$ns1" ns2 ns3 ns4 # ns1eth2 ns2eth1 ns2eth3 ns3eth2 ns3eth4 ns4eth3 @@ -225,8 +217,9 @@ set_ethtool_flags() { local dev="$2" local flags="$3" - ip netns exec $ns ethtool -K $dev $flags 2>/dev/null - [ $? -eq 0 ] && echo "INFO: set $ns dev $dev: ethtool -K $flags" + if ip netns exec $ns ethtool -K $dev $flags 2>/dev/null; then + mptcp_lib_pr_info "set $ns dev $dev: ethtool -K $flags" + fi } set_random_ethtool_flags() { @@ -254,41 +247,23 @@ else set_ethtool_flags "$ns4" ns4eth3 "$ethtool_args" fi -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - - return 1 - fi - - return 0 +print_larger_title() { + # here we don't have the time, a bit longer for the alignment + MPTCP_LIB_TEST_FORMAT="%02u %-69s" \ + mptcp_lib_print_title "${@}" } check_mptcp_disabled() { - local disabled_ns="ns_disabled-$rndh" - ip netns add ${disabled_ns} || exit $ksft_skip + local disabled_ns + mptcp_lib_ns_init disabled_ns + print_larger_title "New MPTCP socket can be blocked via sysctl" # net.mptcp.enabled should be enabled by default if [ "$(ip netns exec ${disabled_ns} sysctl net.mptcp.enabled | awk '{ print $3 }')" -ne 1 ]; then - echo -e "net.mptcp.enabled sysctl is not 1 by default\t\t[ FAIL ]" + mptcp_lib_pr_fail "net.mptcp.enabled sysctl is not 1 by default" mptcp_lib_result_fail "net.mptcp.enabled sysctl is not 1 by default" - ret=1 + ret=${KSFT_FAIL} return 1 fi ip netns exec ${disabled_ns} sysctl -q net.mptcp.enabled=0 @@ -296,26 +271,20 @@ check_mptcp_disabled() local err=0 LC_ALL=C ip netns exec ${disabled_ns} ./mptcp_connect -p 10000 -s MPTCP 127.0.0.1 < "$cin" 2>&1 | \ grep -q "^socket: Protocol not available$" && err=1 - ip netns delete ${disabled_ns} + mptcp_lib_ns_exit "${disabled_ns}" if [ ${err} -eq 0 ]; then - echo -e "New MPTCP socket cannot be blocked via sysctl\t\t[ FAIL ]" + mptcp_lib_pr_fail "New MPTCP socket cannot be blocked via sysctl" mptcp_lib_result_fail "New MPTCP socket cannot be blocked via sysctl" - ret=1 + ret=${KSFT_FAIL} return 1 fi - echo -e "New MPTCP socket can be blocked via sysctl\t\t[ OK ]" + mptcp_lib_pr_ok mptcp_lib_result_pass "New MPTCP socket can be blocked via sysctl" return 0 } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - do_ping() { local listener_ns="$1" @@ -324,7 +293,7 @@ do_ping() local ping_args="-q -c 1" local rc=0 - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then $ipv6 || return 0 ping_args="${ping_args} -6" fi @@ -332,8 +301,8 @@ do_ping() ip netns exec ${connector_ns} ping ${ping_args} $connect_addr >/dev/null || rc=1 if [ $rc -ne 0 ] ; then - echo "$listener_ns -> $connect_addr connectivity [ FAIL ]" 1>&2 - ret=1 + mptcp_lib_pr_fail "$listener_ns -> $connect_addr connectivity" + ret=${KSFT_FAIL} return 1 fi @@ -341,38 +310,6 @@ do_ping() return 0 } -# $1: ns, $2: MIB counter -get_mib_counter() -{ - local listener_ns="${1}" - local mib="${2}" - - # strip the header - ip netns exec "${listener_ns}" \ - nstat -z -a "${mib}" | \ - tail -n+2 | \ - while read a count c rest; do - echo $count - done -} - -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local listener_ns="$1" @@ -383,24 +320,22 @@ do_transfer() local local_addr="$6" local extra_args="$7" - local port - port=$((10000+$TEST_COUNT)) - TEST_COUNT=$((TEST_COUNT+1)) + port=$((port + 1)) if [ "$rcvbuf" -gt 0 ]; then - extra_args="$extra_args -R $rcvbuf" + extra_args+=" -R $rcvbuf" fi if [ "$sndbuf" -gt 0 ]; then - extra_args="$extra_args -S $sndbuf" + extra_args+=" -S $sndbuf" fi if [ -n "$testmode" ]; then - extra_args="$extra_args -m $testmode" + extra_args+=" -m $testmode" fi if [ -n "$extra_args" ] && $options_log; then - echo "INFO: extra options: $extra_args" + mptcp_lib_pr_info "extra options: $extra_args" fi options_log=false @@ -412,10 +347,11 @@ do_transfer() addr_port=$(printf "%s:%d" ${connect_addr} ${port}) local result_msg result_msg="$(printf "%.3s %-5s -> %.3s (%-20s) %-5s" ${connector_ns} ${cl_proto} ${listener_ns} ${addr_port} ${srv_proto})" - printf "%s\t" "${result_msg}" + mptcp_lib_print_title "${result_msg}" if $capture; then local capuser + local rndh="${connector_ns:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -441,12 +377,20 @@ do_transfer() nstat -n fi - local stat_synrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_last_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_last=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local stat_csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local stat_synrx_last_l + local stat_ackrx_last_l + local stat_cookietx_last + local stat_cookierx_last + local stat_csum_err_s + local stat_csum_err_c + local stat_tcpfb_last_l + stat_synrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_last=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + stat_csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") + stat_tcpfb_last_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") timeout ${timeout_test} \ ip netns exec ${listener_ns} \ @@ -454,7 +398,7 @@ do_transfer() $extra_args $local_addr < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" local start start=$(date +%s%3N) @@ -490,7 +434,7 @@ do_transfer() result_msg+=" # time=${duration}ms" printf "(duration %05sms) " "${duration}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo "[ FAIL ] client exit code $retc, server $rets" 1>&2 + mptcp_lib_pr_fail "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" cat /tmp/${listener_ns}.out @@ -504,16 +448,24 @@ do_transfer() return 1 fi - check_transfer $sin $cout "file received by client" + mptcp_lib_check_transfer $sin $cout "file received by client" retc=$? - check_transfer $cin $sout "file received by server" + mptcp_lib_check_transfer $cin $sout "file received by server" rets=$? - local stat_synrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") - local stat_ackrx_now_l=$(get_mib_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") - local stat_cookietx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesSent") - local stat_cookierx_now=$(get_mib_counter "${listener_ns}" "TcpExtSyncookiesRecv") - local stat_ooo_now=$(get_mib_counter "${listener_ns}" "TcpExtTCPOFOQueue") + local extra="" + local stat_synrx_now_l + local stat_ackrx_now_l + local stat_cookietx_now + local stat_cookierx_now + local stat_ooo_now + local stat_tcpfb_now_l + stat_synrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableSYNRX") + stat_ackrx_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableACKRX") + stat_cookietx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesSent") + stat_cookierx_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtSyncookiesRecv") + stat_ooo_now=$(mptcp_lib_get_counter "${listener_ns}" "TcpExtTCPOFOQueue") + stat_tcpfb_now_l=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtMPCapableFallbackACK") expect_synrx=$((stat_synrx_last_l)) expect_ackrx=$((stat_ackrx_last_l)) @@ -522,75 +474,84 @@ do_transfer() cookies=${cookies##*=} if [ ${cl_proto} = "MPTCP" ] && [ ${srv_proto} = "MPTCP" ]; then - expect_synrx=$((stat_synrx_last_l+$connect_per_transfer)) - expect_ackrx=$((stat_ackrx_last_l+$connect_per_transfer)) + expect_synrx=$((stat_synrx_last_l+connect_per_transfer)) + expect_ackrx=$((stat_ackrx_last_l+connect_per_transfer)) fi if [ ${stat_synrx_now_l} -lt ${expect_synrx} ]; then - printf "[ FAIL ] lower MPC SYN rx (%d) than expected (%d)\n" \ - "${stat_synrx_now_l}" "${expect_synrx}" 1>&2 + mptcp_lib_pr_fail "lower MPC SYN rx (${stat_synrx_now_l})" \ + "than expected (${expect_synrx})" retc=1 fi - if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} -a ${stat_ooo_now} -eq 0 ]; then + if [ ${stat_ackrx_now_l} -lt ${expect_ackrx} ] && [ ${stat_ooo_now} -eq 0 ]; then if [ ${stat_ooo_now} -eq 0 ]; then - printf "[ FAIL ] lower MPC ACK rx (%d) than expected (%d)\n" \ - "${stat_ackrx_now_l}" "${expect_ackrx}" 1>&2 + mptcp_lib_pr_fail "lower MPC ACK rx (${stat_ackrx_now_l})" \ + "than expected (${expect_ackrx})" rets=1 else - printf "[ Note ] fallback due to TCP OoO" + extra+=" [ Note ] fallback due to TCP OoO" fi fi if $checksum; then - local csum_err_s=$(get_mib_counter "${listener_ns}" "MPTcpExtDataCsumErr") - local csum_err_c=$(get_mib_counter "${connector_ns}" "MPTcpExtDataCsumErr") + local csum_err_s + local csum_err_c + csum_err_s=$(mptcp_lib_get_counter "${listener_ns}" "MPTcpExtDataCsumErr") + csum_err_c=$(mptcp_lib_get_counter "${connector_ns}" "MPTcpExtDataCsumErr") local csum_err_s_nr=$((csum_err_s - stat_csum_err_s)) if [ $csum_err_s_nr -gt 0 ]; then - printf "[ FAIL ]\nserver got $csum_err_s_nr data checksum error[s]" + mptcp_lib_pr_fail "server got ${csum_err_s_nr} data checksum error[s]" rets=1 fi local csum_err_c_nr=$((csum_err_c - stat_csum_err_c)) if [ $csum_err_c_nr -gt 0 ]; then - printf "[ FAIL ]\nclient got $csum_err_c_nr data checksum error[s]" + mptcp_lib_pr_fail "client got ${csum_err_c_nr} data checksum error[s]" retc=1 fi fi - if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then - printf "[ OK ]" - mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" - else - mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" + if [ ${stat_ooo_now} -eq 0 ] && [ ${stat_tcpfb_last_l} -ne ${stat_tcpfb_now_l} ]; then + mptcp_lib_pr_fail "unexpected fallback to TCP" + rets=1 fi if [ $cookies -eq 2 ];then if [ $stat_cookietx_last -ge $stat_cookietx_now ] ;then - printf " WARN: CookieSent: did not advance" + extra+=" WARN: CookieSent: did not advance" fi if [ $stat_cookierx_last -ge $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: did not advance" + extra+=" WARN: CookieRecv: did not advance" fi else if [ $stat_cookietx_last -ne $stat_cookietx_now ] ;then - printf " WARN: CookieSent: changed" + extra+=" WARN: CookieSent: changed" fi if [ $stat_cookierx_last -ne $stat_cookierx_now ] ;then - printf " WARN: CookieRecv: changed" + extra+=" WARN: CookieRecv: changed" fi fi if [ ${stat_synrx_now_l} -gt ${expect_synrx} ]; then - printf " WARN: SYNRX: expect %d, got %d (probably retransmissions)" \ - "${expect_synrx}" "${stat_synrx_now_l}" + extra+=" WARN: SYNRX: expect ${expect_synrx}," + extra+=" got ${stat_synrx_now_l} (probably retransmissions)" fi if [ ${stat_ackrx_now_l} -gt ${expect_ackrx} ]; then - printf " WARN: ACKRX: expect %d, got %d (probably retransmissions)" \ - "${expect_ackrx}" "${stat_ackrx_now_l}" + extra+=" WARN: ACKRX: expect ${expect_ackrx}," + extra+=" got ${stat_ackrx_now_l} (probably retransmissions)" + fi + + if [ $retc -eq 0 ] && [ $rets -eq 0 ]; then + mptcp_lib_pr_ok "${extra:1}" + mptcp_lib_result_pass "${TEST_GROUP}: ${result_msg}" + else + if [ -n "${extra}" ]; then + mptcp_lib_print_warn "${extra:1}" + fi + mptcp_lib_result_fail "${TEST_GROUP}: ${result_msg}" fi - echo cat "$capout" [ $retc -eq 0 ] && [ $rets -eq 0 ] } @@ -613,9 +574,8 @@ make_file() ksize=$((SIZE / 1024)) rem=$((SIZE - (ksize * 1024))) - dd if=/dev/urandom of="$name" bs=1024 count=$ksize 2> /dev/null - dd if=/dev/urandom conv=notrunc of="$name" bs=1 count=$rem 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $ksize + dd if=/dev/urandom conv=notrunc of="$name" oflag=append bs=1 count=$rem 2> /dev/null echo "Created $name (size $(du -b "$name")) containing data sent by $who" } @@ -635,12 +595,12 @@ run_tests_lo() fi # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" else local_addr="0.0.0.0" @@ -708,7 +668,7 @@ run_test_transparent() TEST_GROUP="${msg}" # skip if we don't want v6 - if ! $ipv6 && is_v6 "${connect_addr}"; then + if ! $ipv6 && mptcp_lib_is_v6 "${connect_addr}"; then return 0 fi @@ -717,12 +677,12 @@ run_test_transparent() # following function has been exported (T). Not great but better than # checking for a specific kernel version. if ! mptcp_lib_kallsyms_has "T __ip_sock_set_tos$"; then - echo "INFO: ${msg} not supported by the kernel: SKIP" + mptcp_lib_pr_skip "${msg} not supported by the kernel" mptcp_lib_result_skip "${TEST_GROUP}" return fi -ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" + if ! ip netns exec "$listener_ns" nft -f /dev/stdin <<"EOF" flush ruleset table inet mangle { chain divert { @@ -733,43 +693,41 @@ table inet mangle { } } EOF - if [ $? -ne 0 ]; then - echo "SKIP: $msg, could not load nft ruleset" + then + mptcp_lib_pr_skip "$msg, could not load nft ruleset" mptcp_lib_fail_if_expected_feature "nft rules" mptcp_lib_result_skip "${TEST_GROUP}" return fi local local_addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" r6flag="-6" else local_addr="0.0.0.0" fi - ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" $r6flag rule add fwmark 1 lookup 100; then ip netns exec "$listener_ns" nft flush ruleset - echo "SKIP: $msg, ip $r6flag rule failed" + mptcp_lib_pr_skip "$msg, ip $r6flag rule failed" mptcp_lib_fail_if_expected_feature "ip rule" mptcp_lib_result_skip "${TEST_GROUP}" return fi - ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100 - if [ $? -ne 0 ]; then + if ! ip -net "$listener_ns" route add local $local_addr/0 dev lo table 100; then ip netns exec "$listener_ns" nft flush ruleset ip -net "$listener_ns" $r6flag rule del fwmark 1 lookup 100 - echo "SKIP: $msg, ip route add local $local_addr failed" + mptcp_lib_pr_skip "$msg, ip route add local $local_addr failed" mptcp_lib_fail_if_expected_feature "ip route" mptcp_lib_result_skip "${TEST_GROUP}" return fi - echo "INFO: test $msg" + mptcp_lib_pr_info "test $msg" - TEST_COUNT=10000 + port=$((20000 - 1)) local extra_args="-o TRANSPARENT" do_transfer ${listener_ns} ${connector_ns} MPTCP MPTCP \ ${connect_addr} ${local_addr} "${extra_args}" @@ -780,12 +738,12 @@ EOF ip -net "$listener_ns" route del local $local_addr/0 dev lo table 100 if [ $lret -ne 0 ]; then - echo "FAIL: $msg, mptcp connection error" 1>&2 + mptcp_lib_pr_fail "$msg, mptcp connection error" ret=$lret return 1 fi - echo "PASS: $msg" + mptcp_lib_pr_info "$msg pass" return 0 } @@ -794,7 +752,7 @@ run_tests_peekmode() local peekmode="$1" TEST_GROUP="peek mode: ${peekmode}" - echo "INFO: with peek mode: ${peekmode}" + mptcp_lib_pr_info "with peek mode: ${peekmode}" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-P ${peekmode}" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-P ${peekmode}" } @@ -804,12 +762,12 @@ run_tests_mptfo() TEST_GROUP="MPTFO" if ! mptcp_lib_kallsyms_has "mptcp_fastopen_"; then - echo "INFO: TFO not supported by the kernel: SKIP" + mptcp_lib_pr_skip "TFO not supported by the kernel" mptcp_lib_result_skip "${TEST_GROUP}" return fi - echo "INFO: with MPTFO start" + mptcp_lib_pr_info "with MPTFO start" ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=2 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=1 @@ -821,7 +779,7 @@ run_tests_mptfo() ip netns exec "$ns1" sysctl -q net.ipv4.tcp_fastopen=0 ip netns exec "$ns2" sysctl -q net.ipv4.tcp_fastopen=0 - echo "INFO: with MPTFO end" + mptcp_lib_pr_info "with MPTFO end" } run_tests_disconnect() @@ -832,7 +790,7 @@ run_tests_disconnect() TEST_GROUP="full disconnect" if ! mptcp_lib_kallsyms_has "mptcp_pm_data_reset$"; then - echo "INFO: Full disconnect not supported: SKIP" + mptcp_lib_pr_skip "Full disconnect not supported" mptcp_lib_result_skip "${TEST_GROUP}" return fi @@ -845,7 +803,7 @@ run_tests_disconnect() cin_disconnect="$old_cin" connect_per_transfer=3 - echo "INFO: disconnect" + mptcp_lib_pr_info "disconnect" run_tests_lo "$ns1" "$ns1" 10.0.1.1 1 "-I 3 -i $old_cin" run_tests_lo "$ns1" "$ns1" dead:beef:1::1 1 "-I 3 -i $old_cin" @@ -869,10 +827,10 @@ log_if_error() local msg="$1" if [ ${ret} -ne 0 ]; then - echo "FAIL: ${msg}" 1>&2 + mptcp_lib_pr_fail "${msg}" final_ret=${ret} - ret=0 + ret=${KSFT_PASS} return ${final_ret} fi @@ -894,7 +852,7 @@ check_mptcp_disabled stop_if_error "The kernel configuration is not valid for MPTCP" -echo "INFO: validating network environment with pings" +print_larger_title "Validating network environment with pings" for sender in "$ns1" "$ns2" "$ns3" "$ns4";do do_ping "$ns1" $sender 10.0.1.1 do_ping "$ns1" $sender dead:beef:1::1 @@ -916,12 +874,13 @@ done mptcp_lib_result_code "${ret}" "ping tests" stop_if_error "Could not even run ping tests" +mptcp_lib_pr_ok [ -n "$tc_loss" ] && tc -net "$ns2" qdisc add dev ns2eth3 root netem loss random $tc_loss delay ${tc_delay}ms -echo -n "INFO: Using loss of $tc_loss " -test "$tc_delay" -gt 0 && echo -n "delay $tc_delay ms " +tc_info="loss of $tc_loss " +test "$tc_delay" -gt 0 && tc_info+="delay $tc_delay ms " -reorder_delay=$(($tc_delay / 4)) +reorder_delay=$((tc_delay / 4)) if [ -z "${tc_reorder}" ]; then reorder1=$((RANDOM%10)) @@ -930,17 +889,17 @@ if [ -z "${tc_reorder}" ]; then if [ $reorder_delay -gt 0 ] && [ $reorder1 -lt 100 ] && [ $reorder2 -gt 0 ]; then tc_reorder="reorder ${reorder1}% ${reorder2}%" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi elif [ "$tc_reorder" = "0" ];then tc_reorder="" elif [ "$reorder_delay" -gt 0 ];then # reordering requires some delay tc_reorder="reorder $tc_reorder" - echo -n "$tc_reorder with delay ${reorder_delay}ms " + tc_info+="$tc_reorder with delay ${reorder_delay}ms " fi -echo "on ns3eth4" +mptcp_lib_pr_info "Using ${tc_info}on ns3eth4" tc -net "$ns3" qdisc add dev ns3eth4 root netem delay ${reorder_delay}ms $tc_reorder diff --git a/tools/testing/selftests/net/mptcp/mptcp_inq.c b/tools/testing/selftests/net/mptcp/mptcp_inq.c index 8672d898f8cd..218aac467321 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_inq.c +++ b/tools/testing/selftests/net/mptcp/mptcp_inq.c @@ -18,6 +18,7 @@ #include <time.h> #include <sys/ioctl.h> +#include <sys/random.h> #include <sys/socket.h> #include <sys/types.h> #include <sys/wait.h> @@ -519,15 +520,11 @@ static int client(int unixfd) static void init_rng(void) { - int fd = open("/dev/urandom", O_RDONLY); unsigned int foo; - if (fd > 0) { - int ret = read(fd, &foo, sizeof(foo)); - - if (ret < 0) - srand(fd + foo); - close(fd); + if (getrandom(&foo, sizeof(foo), 0) == -1) { + perror("getrandom"); + exit(1); } srand(foo); diff --git a/tools/testing/selftests/net/mptcp/mptcp_join.sh b/tools/testing/selftests/net/mptcp/mptcp_join.sh index 75a2438efdf3..fefa9173bdaa 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_join.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_join.sh @@ -21,19 +21,18 @@ cinfail="" cinsent="" tmpfile="" cout="" +err="" capout="" ns1="" ns2="" -ksft_skip=4 iptables="iptables" ip6tables="ip6tables" timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) -capture=0 -checksum=0 -ip_mptcp=0 +capture=false +checksum=false check_invert=0 -validate_checksum=0 +validate_checksum=false init=0 evts_ns1="" evts_ns2="" @@ -47,7 +46,7 @@ declare -A all_tests declare -a only_tests_ids declare -a only_tests_names declare -A failed_tests -TEST_COUNT=0 +MPTCP_LIB_TEST_FORMAT="%03u %s\n" TEST_NAME="" nr_blank=6 @@ -56,6 +55,8 @@ unset FAILING_LINKS unset test_linkfail unset addr_nr_ns1 unset addr_nr_ns2 +unset cestab_ns1 +unset cestab_ns2 unset sflags unset fastclose unset fullmesh @@ -83,22 +84,12 @@ init_partial() { capout=$(mktemp) - local sec rndh - sec=$(date +%s) - rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) - - ns1="ns1-$rndh" - ns2="ns2-$rndh" + mptcp_lib_ns_init ns1 ns2 local netns for netns in "$ns1" "$ns2"; do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up - ip netns exec $netns sysctl -q net.mptcp.enabled=1 ip netns exec $netns sysctl -q net.mptcp.pm_type=0 2>/dev/null || true - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 - if [ $checksum -eq 1 ]; then + if $checksum; then ip netns exec $netns sysctl -q net.mptcp.checksum_enabled=1 fi done @@ -133,8 +124,8 @@ init_shapers() { local i for i in $(seq 1 4); do - tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1 - tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1 + tc -n $ns1 qdisc add dev ns1eth$i root netem rate 20mbit delay 1ms + tc -n $ns2 qdisc add dev ns2eth$i root netem rate 20mbit delay 1ms done } @@ -142,46 +133,22 @@ cleanup_partial() { rm -f "$capout" - local netns - for netns in "$ns1" "$ns2"; do - ip netns del $netns - rm -f /tmp/$netns.{nstat,out} - done -} - -check_tools() -{ - mptcp_lib_check_mptcp - mptcp_lib_check_kallsyms - - if ! ip -Version &> /dev/null; then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip - fi - - # Use the legacy version if available to support old kernel versions - if iptables-legacy -V &> /dev/null; then - iptables="iptables-legacy" - ip6tables="ip6tables-legacy" - elif ! iptables -V &> /dev/null; then - echo "SKIP: Could not run all tests without iptables tool" - exit $ksft_skip - elif ! ip6tables -V &> /dev/null; then - echo "SKIP: Could not run all tests without ip6tables tool" - exit $ksft_skip - fi + mptcp_lib_ns_exit "${ns1}" "${ns2}" } init() { init=1 - check_tools + mptcp_lib_check_mptcp + mptcp_lib_check_kallsyms + mptcp_lib_check_tools ip tc ss "${iptables}" "${ip6tables}" sin=$(mktemp) sout=$(mktemp) cin=$(mktemp) cinsent=$(mktemp) cout=$(mktemp) + err=$(mktemp) evts_ns1=$(mktemp) evts_ns2=$(mktemp) @@ -197,14 +164,10 @@ cleanup() rm -f "$sin" "$sout" "$cinsent" "$cinfail" rm -f "$tmpfile" rm -rf $evts_ns1 $evts_ns2 + rm -f "$err" cleanup_partial } -print_title() -{ - printf "%03u %s\n" "${TEST_COUNT}" "${TEST_NAME}" -} - print_check() { printf "%-${nr_blank}s%-36s" " " "${*}" @@ -220,17 +183,17 @@ print_info() print_ok() { - mptcp_lib_print_ok "[ ok ]${1:+ ${*}}" + mptcp_lib_pr_ok "${@}" } print_fail() { - mptcp_lib_print_err "[fail]${1:+ ${*}}" + mptcp_lib_pr_fail "${@}" } print_skip() { - mptcp_lib_print_warn "[skip]${1:+ ${*}}" + mptcp_lib_pr_skip "${@}" } # [ $1: fail msg ] @@ -263,7 +226,7 @@ skip_test() local i for i in "${only_tests_ids[@]}"; do - if [ "${TEST_COUNT}" -eq "${i}" ]; then + if [ "$((MPTCP_LIB_TEST_COUNTER+1))" -eq "${i}" ]; then return 1 fi done @@ -298,14 +261,13 @@ reset() TEST_NAME="${1}" - TEST_COUNT=$((TEST_COUNT+1)) - if skip_test; then + MPTCP_LIB_TEST_COUNTER=$((MPTCP_LIB_TEST_COUNTER+1)) last_test_ignored=1 return 1 fi - print_title + mptcp_lib_print_title "${TEST_NAME}" if [ "${init}" != "1" ]; then init @@ -378,7 +340,7 @@ reset_with_checksum() ip netns exec $ns1 sysctl -q net.mptcp.checksum_enabled=$ns1_enable ip netns exec $ns2 sysctl -q net.mptcp.checksum_enabled=$ns2_enable - validate_checksum=1 + validate_checksum=true } reset_with_allow_join_id0() @@ -411,7 +373,7 @@ reset_with_allow_join_id0() setup_fail_rules() { check_invert=1 - validate_checksum=1 + validate_checksum=true local i="$1" local ip="${2:-4}" local tables @@ -428,15 +390,15 @@ setup_fail_rules() -p tcp \ -m length --length 150:9999 \ -m statistic --mode nth --packet 1 --every 99999 \ - -j MARK --set-mark 42 || return ${ksft_skip} + -j MARK --set-mark 42 || return ${KSFT_SKIP} - tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${ksft_skip} + tc -n $ns2 qdisc add dev ns2eth$i clsact || return ${KSFT_SKIP} tc -n $ns2 filter add dev ns2eth$i egress \ protocol ip prio 1000 \ handle 42 fw \ action pedit munge offset 148 u8 invert \ pipe csum tcp \ - index 100 || return ${ksft_skip} + index 100 || return ${KSFT_SKIP} } reset_with_fail() @@ -450,7 +412,7 @@ reset_with_fail() local rc=0 setup_fail_rules "${@}" || rc=$? - if [ ${rc} -eq ${ksft_skip} ]; then + if [ ${rc} -eq ${KSFT_SKIP} ]; then mark_as_skipped "unable to set the 'fail' rules" return 1 fi @@ -460,12 +422,8 @@ reset_with_events() { reset "${1}" || return 1 - :> "$evts_ns1" - :> "$evts_ns2" - ip netns exec $ns1 ./pm_nl_ctl events >> "$evts_ns1" 2>&1 & - evts_ns1_pid=$! - ip netns exec $ns2 ./pm_nl_ctl events >> "$evts_ns2" 2>&1 & - evts_ns2_pid=$! + mptcp_lib_events "${ns1}" "${evts_ns1}" evts_ns1_pid + mptcp_lib_events "${ns2}" "${evts_ns2}" evts_ns2_pid } reset_with_tcp_filter() @@ -490,13 +448,15 @@ reset_with_tcp_filter() # $1: err msg fail_test() { - ret=1 + ret=${KSFT_FAIL} - print_fail "${@}" + if [ ${#} -gt 0 ]; then + print_fail "${@}" + fi # just in case a test is marked twice as failed if [ ${last_test_failed} -eq 0 ]; then - failed_tests[${TEST_COUNT}]="${TEST_NAME}" + failed_tests[${MPTCP_LIB_TEST_COUNTER}]="${TEST_NAME}" dump_stats last_test_failed=1 fi @@ -511,13 +471,6 @@ get_failed_tests_ids() done | sort -n } -print_file_err() -{ - ls -l "$1" 1>&2 - echo -n "Trailing bytes are: " - tail -c 27 "$1" -} - check_transfer() { local in=$1 @@ -548,8 +501,8 @@ check_transfer() local sum=$((0${a} + 0${b})) if [ $check_invert -eq 0 ] || [ $sum -ne $((0xff)) ]; then fail_test "$what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" return 1 else @@ -587,49 +540,9 @@ link_failure() done } -# $1: IP address -is_v6() -{ - [ -z "${1##*:*}" ] -} - -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex - port_hex="$(printf "%04X" "${port}")" - - local i - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - -# $1: ns ; $2: counter -get_counter() -{ - local ns="${1}" - local counter="${2}" - local count - - count=$(ip netns exec ${ns} nstat -asz "${counter}" | awk 'NR==1 {next} {print $2}') - if [ -z "${count}" ]; then - mptcp_lib_fail_if_expected_feature "${counter} counter" - return 1 - fi - - echo "${count}" -} - rm_addr_count() { - get_counter "${1}" "MPTcpExtRmAddr" + mptcp_lib_get_counter "${1}" "MPTcpExtRmAddr" } # $1: ns, $2: old rm_addr counter in $ns @@ -649,7 +562,7 @@ wait_rm_addr() rm_sf_count() { - get_counter "${1}" "MPTcpExtRmSubflow" + mptcp_lib_get_counter "${1}" "MPTcpExtRmSubflow" } # $1: ns, $2: old rm_sf counter in $ns @@ -672,204 +585,85 @@ wait_mpj() local ns="${1}" local cnt old_cnt - old_cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + old_cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") local i for i in $(seq 10); do - cnt=$(get_counter ${ns} "MPTcpExtMPJoinAckRx") + cnt=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPJoinAckRx") [ "$cnt" = "${old_cnt}" ] || break sleep 0.1 done } -kill_wait() -{ - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - kill_events_pids() { - kill_wait $evts_ns1_pid - kill_wait $evts_ns2_pid -} - -kill_tests_wait() -{ - #shellcheck disable=SC2046 - kill -SIGUSR1 $(ip netns pids $ns2) $(ip netns pids $ns1) - wait + mptcp_lib_kill_wait $evts_ns1_pid + evts_ns1_pid=0 + mptcp_lib_kill_wait $evts_ns2_pid + evts_ns2_pid=0 } pm_nl_set_limits() { - local ns=$1 - local addrs=$2 - local subflows=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp limits set add_addr_accepted $addrs subflows $subflows - else - ip netns exec $ns ./pm_nl_ctl limits $addrs $subflows - fi + mptcp_lib_pm_nl_set_limits "${@}" } pm_nl_add_endpoint() { - local ns=$1 - local addr=$2 - local flags _flags - local port _port - local dev _dev - local id _id - local nr=2 - - local p - for p in "${@}" - do - if [ $p = "flags" ]; then - eval _flags=\$"$nr" - [ -n "$_flags" ]; flags="flags $_flags" - fi - if [ $p = "dev" ]; then - eval _dev=\$"$nr" - [ -n "$_dev" ]; dev="dev $_dev" - fi - if [ $p = "id" ]; then - eval _id=\$"$nr" - [ -n "$_id" ]; id="id $_id" - fi - if [ $p = "port" ]; then - eval _port=\$"$nr" - [ -n "$_port" ]; port="port $_port" - fi - - nr=$((nr + 1)) - done - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint add $addr ${_flags//","/" "} $dev $id $port - else - ip netns exec $ns ./pm_nl_ctl add $addr $flags $dev $id $port - fi + mptcp_lib_pm_nl_add_endpoint "${@}" } pm_nl_del_endpoint() { - local ns=$1 - local id=$2 - local addr=$3 - - if [ $ip_mptcp -eq 1 ]; then - [ $id -ne 0 ] && addr='' - ip -n $ns mptcp endpoint delete id $id $addr - else - ip netns exec $ns ./pm_nl_ctl del $id $addr - fi + mptcp_lib_pm_nl_del_endpoint "${@}" } pm_nl_flush_endpoint() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint flush - else - ip netns exec $ns ./pm_nl_ctl flush - fi + mptcp_lib_pm_nl_flush_endpoint "${@}" } pm_nl_show_endpoints() { - local ns=$1 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint show - else - ip netns exec $ns ./pm_nl_ctl dump - fi + mptcp_lib_pm_nl_show_endpoints "${@}" } pm_nl_change_endpoint() { - local ns=$1 - local id=$2 - local flags=$3 - - if [ $ip_mptcp -eq 1 ]; then - ip -n $ns mptcp endpoint change id $id ${flags//","/" "} - else - ip netns exec $ns ./pm_nl_ctl set id $id flags $flags - fi + mptcp_lib_pm_nl_change_endpoint "${@}" } pm_nl_check_endpoint() { - local line expected_line local msg="$1" local ns=$2 local addr=$3 - local _flags="" - local flags - local _port - local port - local dev - local _id - local id + local flags dev id port print_check "${msg}" shift 3 while [ -n "$1" ]; do - if [ $1 = "flags" ]; then - _flags=$2 - [ -n "$_flags" ]; flags="flags $_flags" - shift - elif [ $1 = "dev" ]; then - [ -n "$2" ]; dev="dev $1" + case "${1}" in + "flags" | "dev" | "id" | "port") + eval "${1}"="${2}" shift - elif [ $1 = "id" ]; then - _id=$2 - [ -n "$_id" ]; id="id $_id" - shift - elif [ $1 = "port" ]; then - _port=$2 - [ -n "$_port" ]; port=" port $_port" - shift - fi + ;; + *) + ;; + esac shift done - if [ -z "$id" ]; then + if [ -z "${id}" ]; then test_fail "bad test - missing endpoint id" return fi - if [ $ip_mptcp -eq 1 ]; then - # get line and trim trailing whitespace - line=$(ip -n $ns mptcp endpoint show $id) - line="${line% }" - # the dump order is: address id flags port dev - [ -n "$addr" ] && expected_line="$addr" - expected_line="$expected_line $id" - [ -n "$_flags" ] && expected_line="$expected_line ${_flags//","/" "}" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$port" ] && expected_line="$expected_line $port" - else - line=$(ip netns exec $ns ./pm_nl_ctl get $_id) - # the dump order is: id flags dev address port - expected_line="$id" - [ -n "$flags" ] && expected_line="$expected_line $flags" - [ -n "$dev" ] && expected_line="$expected_line $dev" - [ -n "$addr" ] && expected_line="$expected_line $addr" - [ -n "$_port" ] && expected_line="$expected_line $_port" - fi - if [ "$line" = "$expected_line" ]; then - print_ok - else - fail_test "expected '$expected_line' found '$line'" - fi + check_output "mptcp_lib_pm_nl_get_endpoint ${ns} ${id}" \ + "$(mptcp_lib_pm_nl_format_endpoints \ + "${id},${addr},${flags//","/" "},${dev},${port}")" } pm_nl_set_endpoint() @@ -901,7 +695,7 @@ pm_nl_set_endpoint() local id=10 while [ $add_nr_ns1 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::1" else addr="10.0.$counter.1" @@ -953,7 +747,7 @@ pm_nl_set_endpoint() local id=20 while [ $add_nr_ns2 -gt 0 ]; do local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:$counter::2" else addr="10.0.$counter.2" @@ -995,7 +789,7 @@ pm_nl_set_endpoint() pm_nl_flush_endpoint ${connector_ns} elif [ $rm_nr_ns2 -eq 9 ]; then local addr - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then addr="dead:beef:1::2" else addr="10.0.1.2" @@ -1029,6 +823,34 @@ pm_nl_set_endpoint() fi } +chk_cestab_nr() +{ + local ns=$1 + local cestab=$2 + local count + + print_check "cestab $cestab" + count=$(mptcp_lib_get_counter ${ns} "MPTcpExtMPCurrEstab") + if [ -z "$count" ]; then + print_skip + elif [ "$count" != "$cestab" ]; then + fail_test "got $count current establish[s] expected $cestab" + else + print_ok + fi +} + +# $1 namespace 1, $2 namespace 2 +check_cestab() +{ + if [ -n "${cestab_ns1}" ]; then + chk_cestab_nr ${1} ${cestab_ns1} + fi + if [ -n "${cestab_ns2}" ]; then + chk_cestab_nr ${2} ${cestab_ns2} + fi +} + do_transfer() { local listener_ns="$1" @@ -1037,7 +859,7 @@ do_transfer() local srv_proto="$4" local connect_addr="$5" - local port=$((10000 + TEST_COUNT - 1)) + local port=$((10000 + MPTCP_LIB_TEST_COUNTER - 1)) local cappid local FAILING_LINKS=${FAILING_LINKS:-""} local fastclose=${fastclose:-""} @@ -1047,7 +869,7 @@ do_transfer() :> "$sout" :> "$capout" - if [ $capture -eq 1 ]; then + if $capture; then local capuser if [ -z $SUDO_USER ] ; then capuser="" @@ -1055,9 +877,9 @@ do_transfer() capuser="-Z $SUDO_USER" fi - capfile=$(printf "mp_join-%02u-%s.pcap" "$TEST_COUNT" "${listener_ns}") + capfile=$(printf "mp_join-%02u-%s.pcap" "$MPTCP_LIB_TEST_COUNTER" "${listener_ns}") - echo "Capturing traffic for test $TEST_COUNT into $capfile" + echo "Capturing traffic for test $MPTCP_LIB_TEST_COUNTER into $capfile" ip netns exec ${listener_ns} tcpdump -i any -s 65535 -B 32768 $capuser -w $capfile > "$capout" 2>&1 & cappid=$! @@ -1117,7 +939,7 @@ do_transfer() fi local spid=$! - wait_local_port_listen "${listener_ns}" "${port}" + mptcp_lib_wait_local_port_listen "${listener_ns}" "${port}" extra_cl_args="$extra_args $extra_cl_args" if [ "$test_linkfail" -eq 0 ];then @@ -1142,13 +964,14 @@ do_transfer() local cpid=$! pm_nl_set_endpoint $listener_ns $connector_ns $connect_addr + check_cestab $listener_ns $connector_ns wait $cpid local retc=$? wait $spid local rets=$? - if [ $capture -eq 1 ]; then + if $capture; then sleep 1 kill $cappid fi @@ -1199,8 +1022,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size print_info "Test file (size $size KB) for $who" } @@ -1284,9 +1106,9 @@ chk_csum_nr() fi print_check "sum" - count=$(get_counter ${ns1} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns1" ]; then - extra_msg="$extra_msg ns1=$count" + extra_msg+=" ns1=$count" fi if [ -z "$count" ]; then print_skip @@ -1297,9 +1119,9 @@ chk_csum_nr() print_ok fi print_check "csum" - count=$(get_counter ${ns2} "MPTcpExtDataCsumErr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtDataCsumErr") if [ "$count" != "$csum_ns2" ]; then - extra_msg="$extra_msg ns2=$count" + extra_msg+=" ns2=$count" fi if [ -z "$count" ]; then print_skip @@ -1341,9 +1163,9 @@ chk_fail_nr() fi print_check "ftx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFailTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFailTx") if [ "$count" != "$fail_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fi if [ -z "$count" ]; then print_skip @@ -1355,9 +1177,9 @@ chk_fail_nr() fi print_check "failrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFailRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFailRx") if [ "$count" != "$fail_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fi if [ -z "$count" ]; then print_skip @@ -1388,22 +1210,22 @@ chk_fclose_nr() fi print_check "ctx" - count=$(get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPFastcloseTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_tx" ]; then - extra_msg="$extra_msg,tx=$count" + extra_msg+=",tx=$count" fail_test "got $count MP_FASTCLOSE[s] TX expected $fclose_tx" else print_ok fi print_check "fclzrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPFastcloseRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$fclose_rx" ]; then - extra_msg="$extra_msg,rx=$count" + extra_msg+=",rx=$count" fail_test "got $count MP_FASTCLOSE[s] RX expected $fclose_rx" else print_ok @@ -1429,7 +1251,7 @@ chk_rst_nr() fi print_check "rtx" - count=$(get_counter ${ns_tx} "MPTcpExtMPRstTx") + count=$(mptcp_lib_get_counter ${ns_tx} "MPTcpExtMPRstTx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1441,7 +1263,7 @@ chk_rst_nr() fi print_check "rstrx" - count=$(get_counter ${ns_rx} "MPTcpExtMPRstRx") + count=$(mptcp_lib_get_counter ${ns_rx} "MPTcpExtMPRstRx") if [ -z "$count" ]; then print_skip # accept more rst than expected except if we don't expect any @@ -1462,7 +1284,7 @@ chk_infi_nr() local count print_check "itx" - count=$(get_counter ${ns2} "MPTcpExtInfiniteMapTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtInfiniteMapTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_tx" ]; then @@ -1472,7 +1294,7 @@ chk_infi_nr() fi print_check "infirx" - count=$(get_counter ${ns1} "MPTcpExtInfiniteMapRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtInfiniteMapRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$infi_rx" ]; then @@ -1501,7 +1323,7 @@ chk_join_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1512,7 +1334,7 @@ chk_join_nr() print_check "synack" with_cookie=$(ip netns exec $ns2 sysctl -n net.ipv4.tcp_syncookies) - count=$(get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1529,7 +1351,7 @@ chk_join_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1537,7 +1359,7 @@ chk_join_nr() else print_ok fi - if [ $validate_checksum -eq 1 ]; then + if $validate_checksum; then chk_csum_nr $csum_ns1 $csum_ns2 chk_fail_nr $fail_nr $fail_nr chk_rst_nr $rst_nr $rst_nr @@ -1562,8 +1384,8 @@ chk_stale_nr() print_check "stale" - stale_nr=$(get_counter ${ns} "MPTcpExtSubflowStale") - recover_nr=$(get_counter ${ns} "MPTcpExtSubflowRecover") + stale_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowStale") + recover_nr=$(mptcp_lib_get_counter ${ns} "MPTcpExtSubflowRecover") if [ -z "$stale_nr" ] || [ -z "$recover_nr" ]; then print_skip elif [ $stale_nr -lt $stale_min ] || @@ -1600,7 +1422,7 @@ chk_add_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add" - count=$(get_counter ${ns2} "MPTcpExtAddAddr") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtAddAddr") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1612,7 +1434,7 @@ chk_add_nr() fi print_check "echo" - count=$(get_counter ${ns1} "MPTcpExtEchoAdd") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtEchoAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_nr" ]; then @@ -1623,7 +1445,7 @@ chk_add_nr() if [ $port_nr -gt 0 ]; then print_check "pt" - count=$(get_counter ${ns2} "MPTcpExtPortAdd") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtPortAdd") if [ -z "$count" ]; then print_skip elif [ "$count" != "$port_nr" ]; then @@ -1633,7 +1455,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_nr" ]; then @@ -1644,7 +1466,7 @@ chk_add_nr() fi print_check "synack" - count=$(get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtMPJoinPortSynAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$syn_ack_nr" ]; then @@ -1655,7 +1477,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPJoinPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$ack_nr" ]; then @@ -1666,7 +1488,7 @@ chk_add_nr() fi print_check "syn" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortSynRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortSynRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_syn_nr" ]; then @@ -1677,7 +1499,7 @@ chk_add_nr() fi print_check "ack" - count=$(get_counter ${ns1} "MPTcpExtMismatchPortAckRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMismatchPortAckRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mis_ack_nr" ]; then @@ -1699,7 +1521,7 @@ chk_add_tx_nr() timeout=$(ip netns exec $ns1 sysctl -n net.mptcp.add_addr_timeout) print_check "add TX" - count=$(get_counter ${ns1} "MPTcpExtAddAddrTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtAddAddrTx") if [ -z "$count" ]; then print_skip # if the test configured a short timeout tolerate greater then expected @@ -1711,7 +1533,7 @@ chk_add_tx_nr() fi print_check "echo TX" - count=$(get_counter ${ns2} "MPTcpExtEchoAddTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtEchoAddTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$echo_tx_nr" ]; then @@ -1749,7 +1571,7 @@ chk_rm_nr() fi print_check "rm" - count=$(get_counter ${addr_ns} "MPTcpExtRmAddr") + count=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmAddr") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_nr" ]; then @@ -1759,20 +1581,20 @@ chk_rm_nr() fi print_check "rmsf" - count=$(get_counter ${subflow_ns} "MPTcpExtRmSubflow") + count=$(mptcp_lib_get_counter ${subflow_ns} "MPTcpExtRmSubflow") if [ -z "$count" ]; then print_skip elif [ -n "$simult" ]; then local cnt suffix - cnt=$(get_counter ${addr_ns} "MPTcpExtRmSubflow") + cnt=$(mptcp_lib_get_counter ${addr_ns} "MPTcpExtRmSubflow") # in case of simult flush, the subflow removal count on each side is # unreliable count=$((count + cnt)) if [ "$count" != "$rm_subflow_nr" ]; then suffix="$count in [$rm_subflow_nr:$((rm_subflow_nr*2))]" - extra_msg="$extra_msg simult" + extra_msg+=" simult" fi if [ $count -ge "$rm_subflow_nr" ] && \ [ "$count" -le "$((rm_subflow_nr *2 ))" ]; then @@ -1794,7 +1616,7 @@ chk_rm_tx_nr() local rm_addr_tx_nr=$1 print_check "rm TX" - count=$(get_counter ${ns2} "MPTcpExtRmAddrTx") + count=$(mptcp_lib_get_counter ${ns2} "MPTcpExtRmAddrTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$rm_addr_tx_nr" ]; then @@ -1811,7 +1633,7 @@ chk_prio_nr() local count print_check "ptx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioTx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioTx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_tx" ]; then @@ -1821,7 +1643,7 @@ chk_prio_nr() fi print_check "prx" - count=$(get_counter ${ns1} "MPTcpExtMPPrioRx") + count=$(mptcp_lib_get_counter ${ns1} "MPTcpExtMPPrioRx") if [ -z "$count" ]; then print_skip elif [ "$count" != "$mp_prio_nr_rx" ]; then @@ -1867,12 +1689,10 @@ chk_mptcp_info() local cnt2 local dump_stats - print_check "mptcp_info ${info1:0:8}=$exp1:$exp2" + print_check "mptcp_info ${info1:0:15}=$exp1:$exp2" - cnt1=$(ss -N $ns1 -inmHM | grep "$info1:" | - sed -n 's/.*\('"$info1"':\)\([[:digit:]]*\).*$/\2/p;q') - cnt2=$(ss -N $ns2 -inmHM | grep "$info2:" | - sed -n 's/.*\('"$info2"':\)\([[:digit:]]*\).*$/\2/p;q') + cnt1=$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value "$info1" "$info1") + cnt2=$(ss -N $ns2 -inmHM | mptcp_lib_get_info_value "$info2" "$info2") # 'ss' only display active connections and counters that are not 0. [ -z "$cnt1" ] && cnt1=0 [ -z "$cnt2" ] && cnt2=0 @@ -1890,6 +1710,42 @@ chk_mptcp_info() fi } +# $1: subflows in ns1 ; $2: subflows in ns2 +# number of all subflows, including the initial subflow. +chk_subflows_total() +{ + local cnt1 + local cnt2 + local info="subflows_total" + local dump_stats + + # if subflows_total counter is supported, use it: + if [ -n "$(ss -N $ns1 -inmHM | mptcp_lib_get_info_value $info $info)" ]; then + chk_mptcp_info $info $1 $info $2 + return + fi + + print_check "$info $1:$2" + + # if not, count the TCP connections that are in fact MPTCP subflows + cnt1=$(ss -N $ns1 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + cnt2=$(ss -N $ns2 -ti state established state syn-sent state syn-recv | + grep -c tcp-ulp-mptcp) + + if [ "$1" != "$cnt1" ] || [ "$2" != "$cnt2" ]; then + fail_test "got subflows $cnt1:$cnt2 expected $1:$2" + dump_stats=1 + else + print_ok + fi + + if [ "$dump_stats" = 1 ]; then + ss -N $ns1 -ti + ss -N $ns2 -ti + fi +} + chk_link_usage() { local ns=$1 @@ -1921,7 +1777,7 @@ wait_attempt_fail() while [ $time -lt $timeout_ms ]; do local cnt - cnt=$(get_counter ${ns} "TcpAttemptFails") + cnt=$(mptcp_lib_get_counter ${ns} "TcpAttemptFails") [ "$cnt" = 1 ] && return 1 time=$((time + 100)) @@ -2497,47 +2353,52 @@ add_tests() if reset "add single subflow"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 - addr_nr_ns2=1 speed=slow \ + addr_nr_ns2=1 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 + chk_cestab_nr $ns2 0 fi # add signal address if reset "add signal address"; then pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 1 1 - addr_nr_ns1=1 speed=slow \ + addr_nr_ns1=1 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 1 1 1 chk_add_nr 1 1 + chk_cestab_nr $ns1 0 fi # add multiple subflows if reset "add multiple subflows"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 10.0.1.1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple subflows IPv6 if reset "add multiple subflows IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 0 2 - addr_nr_ns2=2 speed=slow \ + addr_nr_ns2=2 speed=slow cestab_ns2=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 + chk_cestab_nr $ns2 0 fi # add multiple addresses IPv6 if reset "add multiple addresses IPv6"; then pm_nl_set_limits $ns1 0 2 pm_nl_set_limits $ns2 2 2 - addr_nr_ns1=2 speed=slow \ + addr_nr_ns1=2 speed=slow cestab_ns1=1 \ run_tests $ns1 $ns2 dead:beef:1::1 chk_join_nr 2 2 2 chk_add_nr 2 2 + chk_cestab_nr $ns1 0 fi } @@ -2776,7 +2637,7 @@ backup_tests() fi if reset "mpc backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2785,7 +2646,7 @@ backup_tests() fi if reset "mpc backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow,backup pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow,backup speed=slow \ @@ -2795,7 +2656,7 @@ backup_tests() fi if reset "mpc switch to backup" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ run_tests $ns1 $ns2 10.0.1.1 @@ -2804,7 +2665,7 @@ backup_tests() fi if reset "mpc switch to backup both sides" && - continue_if mptcp_lib_kallsyms_doesnt_have "mptcp_subflow_send_ack$"; then + continue_if mptcp_lib_kallsyms_doesnt_have "T mptcp_subflow_send_ack$"; then pm_nl_add_endpoint $ns1 10.0.1.1 flags subflow pm_nl_add_endpoint $ns2 10.0.1.2 flags subflow sflags=backup speed=slow \ @@ -2814,28 +2675,16 @@ backup_tests() fi } -LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED -LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED - -AF_INET=2 -AF_INET6=10 - verify_listener_events() { - local evt=$1 local e_type=$2 - local e_family=$3 local e_saddr=$4 local e_sport=$5 - local type - local family - local saddr - local sport local name - if [ $e_type = $LISTENER_CREATED ]; then + if [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CREATED ]; then name="LISTENER_CREATED" - elif [ $e_type = $LISTENER_CLOSED ]; then + elif [ $e_type = $MPTCP_LIB_EVENT_LISTENER_CLOSED ]; then name="LISTENER_CLOSED " else name="$e_type" @@ -2848,23 +2697,11 @@ verify_listener_events() return fi - type=$(grep "type:$e_type," $evt | sed -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | sed -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - else - saddr=$(grep "type:$e_type," $evt | sed -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') - fi - - if [ $type ] && [ $type = $e_type ] && - [ $family ] && [ $family = $e_family ] && - [ $saddr ] && [ $saddr = $e_saddr ] && - [ $sport ] && [ $sport = $e_sport ]; then + if mptcp_lib_verify_listener_events "${@}"; then print_ok return 0 fi - fail_test "$e_type:$type $e_family:$family $e_saddr:$saddr $e_sport:$sport" + fail_test } add_addr_ports_tests() @@ -2902,8 +2739,10 @@ add_addr_ports_tests() chk_add_nr 1 1 1 chk_rm_nr 1 1 invert - verify_listener_events $evts_ns1 $LISTENER_CREATED $AF_INET 10.0.2.1 10100 - verify_listener_events $evts_ns1 $LISTENER_CLOSED $AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CREATED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 + verify_listener_events $evts_ns1 $MPTCP_LIB_EVENT_LISTENER_CLOSED \ + $MPTCP_LIB_AF_INET 10.0.2.1 10100 kill_events_pids fi @@ -3240,7 +3079,7 @@ fastclose_tests() if reset_check_counter "fastclose server test" "MPTcpExtMPFastcloseRx"; then test_linkfail=1024 fastclose=server \ run_tests $ns1 $ns2 10.0.1.1 - chk_join_nr 0 0 0 + chk_join_nr 0 0 0 0 0 0 1 chk_fclose_nr 1 1 invert chk_rst_nr 1 1 fi @@ -3249,8 +3088,7 @@ fastclose_tests() pedit_action_pkts() { tc -n $ns2 -j -s action show action pedit index 100 | \ - grep "packets" | \ - sed 's/.*"packets":\([0-9]\+\),.*/\1/' + mptcp_lib_get_info_value \"packets\" packets } fail_tests() @@ -3265,7 +3103,7 @@ fail_tests() # multiple subflows if reset_with_fail "MP_FAIL MP_RST" 2; then - tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5 + tc -n $ns2 qdisc add dev ns2eth1 root netem rate 1mbit delay 5ms pm_nl_set_limits $ns1 0 1 pm_nl_set_limits $ns2 0 1 pm_nl_add_endpoint $ns2 10.0.2.2 dev ns2eth2 flags subflow @@ -3275,75 +3113,142 @@ fail_tests() fi } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_addr() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - ip netns exec $ns1 ./pm_nl_ctl ann $addr token $tk id $id + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl ann $2 token $tk id $3 sleep 1 } -userspace_pm_rm_sf_addr_ns1() +# $1: ns ; $2: id +userspace_pm_rm_addr() { - local addr=$1 - local id=$2 - local tk sp da dp - local cnt_addr cnt_sf + local evts=$evts_ns1 + local tk + local cnt - tk=$(grep "type:1," "$evts_ns1" | - sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - sp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - da=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') - dp=$(grep "type:10" "$evts_ns1" | - sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q') - cnt_addr=$(rm_addr_count ${ns1}) - cnt_sf=$(rm_sf_count ${ns1}) - ip netns exec $ns1 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns1 ./pm_nl_ctl dsf lip "::ffff:$addr" \ - lport $sp rip $da rport $dp token $tk - wait_rm_addr $ns1 "${cnt_addr}" - wait_rm_sf $ns1 "${cnt_sf}" + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + cnt=$(rm_addr_count ${1}) + ip netns exec $1 ./pm_nl_ctl rem token $tk id $2 + wait_rm_addr $1 "${cnt}" } +# $1: ns ; $2: addr ; $3: id userspace_pm_add_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 local tk da dp - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - ip netns exec $ns2 ./pm_nl_ctl csf lip $addr lid $id \ + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info daddr4 "$evts") + dp=$(mptcp_lib_evts_get_info dport "$evts") + + ip netns exec $1 ./pm_nl_ctl csf lip $2 lid $3 \ rip $da rport $dp token $tk sleep 1 } -userspace_pm_rm_sf_addr_ns2() +# $1: ns ; $2: addr $3: event type +userspace_pm_rm_sf() { - local addr=$1 - local id=$2 + local evts=$evts_ns1 + local t=${3:-1} + local ip local tk da dp sp - local cnt_addr cnt_sf - - tk=$(sed -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - da=$(sed -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evts_ns2") - dp=$(sed -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evts_ns2") - sp=$(grep "type:10" "$evts_ns2" | - sed -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - cnt_addr=$(rm_addr_count ${ns2}) - cnt_sf=$(rm_sf_count ${ns2}) - ip netns exec $ns2 ./pm_nl_ctl rem token $tk id $id - ip netns exec $ns2 ./pm_nl_ctl dsf lip $addr lport $sp \ + local cnt + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + [ -n "$(mptcp_lib_evts_get_info "saddr4" "$evts" $t)" ] && ip=4 + [ -n "$(mptcp_lib_evts_get_info "saddr6" "$evts" $t)" ] && ip=6 + tk=$(mptcp_lib_evts_get_info token "$evts") + da=$(mptcp_lib_evts_get_info "daddr$ip" "$evts" $t $2) + dp=$(mptcp_lib_evts_get_info dport "$evts" $t $2) + sp=$(mptcp_lib_evts_get_info sport "$evts" $t $2) + + cnt=$(rm_sf_count ${1}) + ip netns exec $1 ./pm_nl_ctl dsf lip $2 lport $sp \ rip $da rport $dp token $tk - wait_rm_addr $ns2 "${cnt_addr}" - wait_rm_sf $ns2 "${cnt_sf}" + wait_rm_sf $1 "${cnt}" +} + +check_output() +{ + local cmd="$1" + local expected="$2" + local msg="$3" + local rc=0 + + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + fail_test "fail to check output # error ${rc}" + elif [ ${rc} -eq 0 ]; then + print_ok + elif [ ${rc} -eq 1 ]; then + fail_test "fail to check output # different output" + fi +} + +# $1: ns +userspace_pm_dump() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl dump token $tk +} + +# $1: ns ; $2: id +userspace_pm_get_addr() +{ + local evts=$evts_ns1 + local tk + + [ "$1" == "$ns2" ] && evts=$evts_ns2 + tk=$(mptcp_lib_evts_get_info token "$evts") + + ip netns exec $1 ./pm_nl_ctl get $2 token $tk +} + +userspace_pm_chk_dump_addr() +{ + local ns="${1}" + local exp="${2}" + local check="${3}" + + print_check "dump addrs ${check}" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_dump_addr$"; then + check_output "userspace_pm_dump ${ns}" "${exp}" + else + print_skip + fi +} + +userspace_pm_chk_get_addr() +{ + local ns="${1}" + local id="${2}" + local exp="${3}" + + print_check "get id ${id} addr" + + if mptcp_lib_kallsyms_has "mptcp_userspace_pm_get_addr$"; then + check_output "userspace_pm_get_addr ${ns} ${id}" "${exp}" + else + print_skip + fi } userspace_tests() @@ -3425,21 +3330,35 @@ userspace_tests() if reset_with_events "userspace pm add & remove address" && continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then set_userspace_pm $ns1 - pm_nl_set_limits $ns2 1 1 + pm_nl_set_limits $ns2 2 2 speed=5 \ run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns1 - userspace_pm_add_addr 10.0.2.1 10 - chk_join_nr 1 1 1 - chk_add_nr 1 1 - chk_mptcp_info subflows 1 subflows 1 - chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 - userspace_pm_rm_sf_addr_ns1 10.0.2.1 10 - chk_rm_nr 1 1 invert + userspace_pm_add_addr $ns1 10.0.2.1 10 + userspace_pm_add_addr $ns1 10.0.3.1 20 + chk_join_nr 2 2 2 + chk_add_nr 2 2 + chk_mptcp_info subflows 2 subflows 2 + chk_subflows_total 3 3 + chk_mptcp_info add_addr_signal 2 add_addr_accepted 2 + userspace_pm_chk_dump_addr "${ns1}" \ + $'id 10 flags signal 10.0.2.1\nid 20 flags signal 10.0.3.1' \ + "signal" + userspace_pm_chk_get_addr "${ns1}" "10" "id 10 flags signal 10.0.2.1" + userspace_pm_chk_get_addr "${ns1}" "20" "id 20 flags signal 10.0.3.1" + userspace_pm_rm_addr $ns1 10 + userspace_pm_rm_sf $ns1 "::ffff:10.0.2.1" $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" \ + "id 20 flags signal 10.0.3.1" "after rm_addr 10" + userspace_pm_rm_addr $ns1 20 + userspace_pm_rm_sf $ns1 10.0.3.1 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns1}" "" "after rm_addr 20" + chk_rm_nr 2 2 invert chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid fi # userspace pm create destroy subflow @@ -3451,14 +3370,95 @@ userspace_tests() run_tests $ns1 $ns2 10.0.1.1 & local tests_pid=$! wait_mpj $ns2 - userspace_pm_add_sf 10.0.3.2 20 + userspace_pm_add_sf $ns2 10.0.3.2 20 chk_join_nr 1 1 1 chk_mptcp_info subflows 1 subflows 1 - userspace_pm_rm_sf_addr_ns2 10.0.3.2 20 + chk_subflows_total 2 2 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 20 flags subflow 10.0.3.2" \ + "subflow" + userspace_pm_chk_get_addr "${ns2}" "20" "id 20 flags subflow 10.0.3.2" + userspace_pm_rm_addr $ns2 20 + userspace_pm_rm_sf $ns2 10.0.3.2 $MPTCP_LIB_EVENT_SUB_ESTABLISHED + userspace_pm_chk_dump_addr "${ns2}" \ + "" \ + "after rm_addr 20" chk_rm_nr 1 1 chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm create id 0 subflow + if reset_with_events "userspace pm create id 0 subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + chk_mptcp_info subflows 0 subflows 0 + chk_subflows_total 1 1 + userspace_pm_add_sf $ns2 10.0.3.2 0 + userspace_pm_chk_dump_addr "${ns2}" \ + "id 0 flags subflow 10.0.3.2" "id 0 subflow" + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 kill_events_pids - wait $tests_pid + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm remove initial subflow + if reset_with_events "userspace pm remove initial subflow" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns2 + pm_nl_set_limits $ns1 0 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns2 + userspace_pm_add_sf $ns2 10.0.3.2 20 + chk_join_nr 1 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + userspace_pm_rm_sf $ns2 10.0.1.2 + # we don't look at the counter linked to the RM_ADDR but + # to the one linked to the subflows that have been removed + chk_rm_nr 0 1 + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid + fi + + # userspace pm send RM_ADDR for ID 0 + if reset_with_events "userspace pm send RM_ADDR for ID 0" && + continue_if mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then + set_userspace_pm $ns1 + pm_nl_set_limits $ns2 1 1 + speed=5 \ + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! + wait_mpj $ns1 + userspace_pm_add_addr $ns1 10.0.2.1 10 + chk_join_nr 1 1 1 + chk_add_nr 1 1 + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 2 2 + chk_mptcp_info add_addr_signal 1 add_addr_accepted 1 + userspace_pm_rm_addr $ns1 0 + # we don't look at the counter linked to the subflows that + # have been removed but to the one linked to the RM_ADDR + chk_rm_nr 1 0 invert + chk_rst_nr 0 0 invert + chk_mptcp_info subflows 1 subflows 1 + chk_subflows_total 1 1 + kill_events_pids + mptcp_lib_kill_wait $tests_pid fi } @@ -3472,7 +3472,8 @@ endpoint_tests() pm_nl_set_limits $ns2 2 2 pm_nl_add_endpoint $ns1 10.0.2.1 flags signal speed=slow \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns1 pm_nl_check_endpoint "creation" \ @@ -3487,7 +3488,7 @@ endpoint_tests() pm_nl_add_endpoint $ns2 10.0.2.2 flags signal pm_nl_check_endpoint "modif is allowed" \ $ns2 10.0.2.2 id 1 flags signal - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi if reset "delete and re-add" && @@ -3496,9 +3497,12 @@ endpoint_tests() pm_nl_set_limits $ns2 1 1 pm_nl_add_endpoint $ns2 10.0.2.2 id 2 dev ns2eth2 flags subflow test_linkfail=4 speed=20 \ - run_tests $ns1 $ns2 10.0.1.1 2>/dev/null & + run_tests $ns1 $ns2 10.0.1.1 & + local tests_pid=$! wait_mpj $ns2 + pm_nl_check_endpoint "creation" \ + $ns2 10.0.2.2 id 2 flags subflow dev ns2eth2 chk_subflow_nr "before delete" 2 chk_mptcp_info subflows 1 subflows 1 @@ -3511,7 +3515,7 @@ endpoint_tests() wait_mpj $ns2 chk_subflow_nr "after re-add" 2 chk_mptcp_info subflows 1 subflows 1 - kill_tests_wait + mptcp_lib_kill_wait $tests_pid fi } @@ -3520,7 +3524,7 @@ usage() { if [ -n "${1}" ]; then echo "${1}" - ret=1 + ret=${KSFT_FAIL} fi echo "mptcp_join usage:" @@ -3583,13 +3587,13 @@ while getopts "${all_tests_args}cCih" opt; do tests+=("${all_tests[${opt}]}") ;; c) - capture=1 + capture=true ;; C) - checksum=1 + checksum=true ;; i) - ip_mptcp=1 + mptcp_lib_set_ip_mptcp ;; h) usage diff --git a/tools/testing/selftests/net/mptcp/mptcp_lib.sh b/tools/testing/selftests/net/mptcp/mptcp_lib.sh index 92a5befe8039..ad2ebda5cb64 100644 --- a/tools/testing/selftests/net/mptcp/mptcp_lib.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_lib.sh @@ -6,9 +6,24 @@ readonly KSFT_FAIL=1 readonly KSFT_SKIP=4 # shellcheck disable=SC2155 # declare and assign separately -readonly KSFT_TEST=$(basename "${0}" | sed 's/\.sh$//g') +readonly KSFT_TEST="${MPTCP_LIB_KSFT_TEST:-$(basename "${0}" .sh)}" + +# These variables are used in some selftests, read-only +declare -rx MPTCP_LIB_EVENT_ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED +declare -rx MPTCP_LIB_EVENT_REMOVED=7 # MPTCP_EVENT_REMOVED +declare -rx MPTCP_LIB_EVENT_SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED +declare -rx MPTCP_LIB_EVENT_SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED +declare -rx MPTCP_LIB_EVENT_LISTENER_CREATED=15 # MPTCP_EVENT_LISTENER_CREATED +declare -rx MPTCP_LIB_EVENT_LISTENER_CLOSED=16 # MPTCP_EVENT_LISTENER_CLOSED + +declare -rx MPTCP_LIB_AF_INET=2 +declare -rx MPTCP_LIB_AF_INET6=10 MPTCP_LIB_SUBTESTS=() +MPTCP_LIB_SUBTESTS_DUPLICATED=0 +MPTCP_LIB_TEST_COUNTER=0 +MPTCP_LIB_TEST_FORMAT="%02u %-50s" +MPTCP_LIB_IP_MPTCP=0 # only if supported (or forced) and not disabled, see no-color.org if { [ -t 1 ] || [ "${SELFTESTS_MPTCP_LIB_COLOR_FORCE:-}" = "1" ]; } && @@ -47,6 +62,23 @@ mptcp_lib_print_err() { mptcp_lib_print_color "${MPTCP_LIB_COLOR_RED}${*}" } +# shellcheck disable=SC2120 # parameters are optional +mptcp_lib_pr_ok() { + mptcp_lib_print_ok "[ OK ]${1:+ ${*}}" +} + +mptcp_lib_pr_skip() { + mptcp_lib_print_warn "[SKIP]${1:+ ${*}}" +} + +mptcp_lib_pr_fail() { + mptcp_lib_print_err "[FAIL]${1:+ ${*}}" +} + +mptcp_lib_pr_info() { + mptcp_lib_print_info "INFO: ${*}" +} + # SELFTESTS_MPTCP_LIB_EXPECT_ALL_FEATURES env var can be set when validating all # features using the last version of the kernel and the selftests to make sure # a test is not being skipped by mistake. @@ -77,14 +109,14 @@ mptcp_lib_has_file() { mptcp_lib_check_mptcp() { if ! mptcp_lib_has_file "/proc/sys/net/mptcp/enabled"; then - echo "SKIP: MPTCP support is not available" + mptcp_lib_pr_skip "MPTCP support is not available" exit ${KSFT_SKIP} fi } mptcp_lib_check_kallsyms() { if ! mptcp_lib_has_file "/proc/kallsyms"; then - echo "SKIP: CONFIG_KALLSYMS is missing" + mptcp_lib_pr_skip "CONFIG_KALLSYMS is missing" exit ${KSFT_SKIP} fi } @@ -146,12 +178,26 @@ mptcp_lib_kversion_ge() { mptcp_lib_fail_if_expected_feature "kernel version ${1} lower than ${v}" } +__mptcp_lib_result_check_duplicated() { + local subtest + + for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do + if [[ "${subtest}" == *" - ${KSFT_TEST}: ${*%% #*}" ]]; then + MPTCP_LIB_SUBTESTS_DUPLICATED=1 + mptcp_lib_print_err "Duplicated entry: ${*}" + break + fi + done +} + __mptcp_lib_result_add() { local result="${1}" shift local id=$((${#MPTCP_LIB_SUBTESTS[@]} + 1)) + __mptcp_lib_result_check_duplicated "${*}" + MPTCP_LIB_SUBTESTS+=("${result} ${id} - ${KSFT_TEST}: ${*}") } @@ -206,4 +252,391 @@ mptcp_lib_result_print_all_tap() { for subtest in "${MPTCP_LIB_SUBTESTS[@]}"; do printf "%s\n" "${subtest}" done + + if [ "${MPTCP_LIB_SUBTESTS_DUPLICATED}" = 1 ] && + mptcp_lib_expect_all_features; then + mptcp_lib_print_err "Duplicated test entries" + exit ${KSFT_FAIL} + fi +} + +# get the value of keyword $1 in the line marked by keyword $2 +mptcp_lib_get_info_value() { + grep "${2}" | sed -n 's/.*\('"${1}"':\)\([0-9a-f:.]*\).*$/\2/p;q' +} + +# $1: info name ; $2: evts_ns ; [$3: event type; [$4: addr]] +mptcp_lib_evts_get_info() { + grep "${4:-}" "${2}" | mptcp_lib_get_info_value "${1}" "^type:${3:-1}," +} + +# $1: PID +mptcp_lib_kill_wait() { + [ "${1}" -eq 0 ] && return 0 + + kill -SIGUSR1 "${1}" > /dev/null 2>&1 + kill "${1}" > /dev/null 2>&1 + wait "${1}" 2>/dev/null +} + +# $1: IP address +mptcp_lib_is_v6() { + [ -z "${1##*:*}" ] +} + +# $1: ns, $2: MIB counter +mptcp_lib_get_counter() { + local ns="${1}" + local counter="${2}" + local count + + count=$(ip netns exec "${ns}" nstat -asz "${counter}" | + awk 'NR==1 {next} {print $2}') + if [ -z "${count}" ]; then + mptcp_lib_fail_if_expected_feature "${counter} counter" + return 1 + fi + + echo "${count}" +} + +mptcp_lib_make_file() { + local name="${1}" + local bs="${2}" + local size="${3}" + + dd if=/dev/urandom of="${name}" bs="${bs}" count="${size}" 2> /dev/null + echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "${name}" +} + +# $1: file +mptcp_lib_print_file_err() { + ls -l "${1}" 1>&2 + echo "Trailing bytes are: " + tail -c 27 "${1}" +} + +# $1: input file ; $2: output file ; $3: what kind of file +mptcp_lib_check_transfer() { + local in="${1}" + local out="${2}" + local what="${3}" + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + mptcp_lib_pr_fail "$what does not match (in, out):" + mptcp_lib_print_file_err "$in" + mptcp_lib_print_file_err "$out" + + return 1 + fi + + return 0 +} + +# $1: ns, $2: port +mptcp_lib_wait_local_port_listen() { + local listener_ns="${1}" + local port="${2}" + + local port_hex + port_hex="$(printf "%04X" "${port}")" + + local _ + for _ in $(seq 10); do + ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ + awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) \ + {rc=0; exit}} END {exit rc}" && + break + sleep 0.1 + done +} + +mptcp_lib_check_output() { + local err="${1}" + local cmd="${2}" + local expected="${3}" + local cmd_ret=0 + local out + + if ! out=$(${cmd} 2>"${err}"); then + cmd_ret=${?} + fi + + if [ ${cmd_ret} -ne 0 ]; then + mptcp_lib_pr_fail "command execution '${cmd}' stderr" + cat "${err}" + return 2 + elif [ "${out}" = "${expected}" ]; then + return 0 + else + mptcp_lib_pr_fail "expected '${expected}' got '${out}'" + return 1 + fi +} + +mptcp_lib_check_tools() { + local tool + + for tool in "${@}"; do + case "${tool}" in + "ip") + if ! ip -Version &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without ip tool" + exit ${KSFT_SKIP} + fi + ;; + "tc") + if ! tc -help &> /dev/null; then + mptcp_lib_pr_skip "Could not run test without tc tool" + exit ${KSFT_SKIP} + fi + ;; + "ss") + if ! ss -h | grep -q MPTCP; then + mptcp_lib_pr_skip "ss tool does not support MPTCP" + exit ${KSFT_SKIP} + fi + ;; + "iptables"* | "ip6tables"*) + if ! "${tool}" -V &> /dev/null; then + mptcp_lib_pr_skip "Could not run all tests without ${tool}" + exit ${KSFT_SKIP} + fi + ;; + *) + mptcp_lib_pr_fail "Internal error: unsupported tool: ${tool}" + exit ${KSFT_FAIL} + ;; + esac + done +} + +mptcp_lib_ns_init() { + local sec rndh + + sec=$(date +%s) + rndh=$(printf %x "${sec}")-$(mktemp -u XXXXXX) + + local netns + for netns in "${@}"; do + eval "${netns}=${netns}-${rndh}" + + ip netns add "${!netns}" || exit ${KSFT_SKIP} + ip -net "${!netns}" link set lo up + ip netns exec "${!netns}" sysctl -q net.mptcp.enabled=1 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.all.rp_filter=0 + ip netns exec "${!netns}" sysctl -q net.ipv4.conf.default.rp_filter=0 + done +} + +mptcp_lib_ns_exit() { + local netns + for netns in "${@}"; do + ip netns del "${netns}" + rm -f /tmp/"${netns}".{nstat,out} + done +} + +mptcp_lib_events() { + local ns="${1}" + local evts="${2}" + declare -n pid="${3}" + + :>"${evts}" + + mptcp_lib_kill_wait "${pid:-0}" + ip netns exec "${ns}" ./pm_nl_ctl events >> "${evts}" 2>&1 & + pid=$! +} + +mptcp_lib_print_title() { + : "${MPTCP_LIB_TEST_COUNTER:?}" + : "${MPTCP_LIB_TEST_FORMAT:?}" + + # shellcheck disable=SC2059 # the format is in a variable + printf "${MPTCP_LIB_TEST_FORMAT}" "$((++MPTCP_LIB_TEST_COUNTER))" "${*}" +} + +# $1: var name ; $2: prev ret +mptcp_lib_check_expected_one() { + local var="${1}" + local exp="e_${var}" + local prev_ret="${2}" + + if [ "${!var}" = "${!exp}" ]; then + return 0 + fi + + if [ "${prev_ret}" = "0" ]; then + mptcp_lib_pr_fail + fi + + mptcp_lib_print_err "Expected value for '${var}': '${!exp}', got '${!var}'." + return 1 +} + +# $@: all var names to check +mptcp_lib_check_expected() { + local rc=0 + local var + + for var in "${@}"; do + mptcp_lib_check_expected_one "${var}" "${rc}" || rc=1 + done + + return "${rc}" +} + +# shellcheck disable=SC2034 # Some variables are used below but indirectly +mptcp_lib_verify_listener_events() { + local evt=${1} + local e_type=${2} + local e_family=${3} + local e_saddr=${4} + local e_sport=${5} + local type + local family + local saddr + local sport + local rc=0 + + type=$(mptcp_lib_evts_get_info type "${evt}" "${e_type}") + family=$(mptcp_lib_evts_get_info family "${evt}" "${e_type}") + if [ "${family}" ] && [ "${family}" = "${AF_INET6}" ]; then + saddr=$(mptcp_lib_evts_get_info saddr6 "${evt}" "${e_type}") + else + saddr=$(mptcp_lib_evts_get_info saddr4 "${evt}" "${e_type}") + fi + sport=$(mptcp_lib_evts_get_info sport "${evt}" "${e_type}") + + mptcp_lib_check_expected "type" "family" "saddr" "sport" || rc="${?}" + return "${rc}" +} + +mptcp_lib_set_ip_mptcp() { + MPTCP_LIB_IP_MPTCP=1 +} + +mptcp_lib_is_ip_mptcp() { + [ "${MPTCP_LIB_IP_MPTCP}" = "1" ] +} + +# format: <id>,<ip>,<flags>,<dev> +mptcp_lib_pm_nl_format_endpoints() { + local entry id ip flags dev port + + for entry in "${@}"; do + IFS=, read -r id ip flags dev port <<< "${entry}" + if mptcp_lib_is_ip_mptcp; then + echo -n "${ip}" + [ -n "${port}" ] && echo -n " port ${port}" + echo -n " id ${id}" + [ -n "${flags}" ] && echo -n " ${flags}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo " " # always a space at the end + else + echo -n "id ${id}" + echo -n " flags ${flags//" "/","}" + [ -n "${dev}" ] && echo -n " dev ${dev}" + echo -n " ${ip}" + [ -n "${port}" ] && echo -n " ${port}" + echo "" + fi + done +} + +mptcp_lib_pm_nl_get_endpoint() { + local ns=${1} + local id=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show id "${id}" + else + ip netns exec "${ns}" ./pm_nl_ctl get "${id}" + fi +} + +mptcp_lib_pm_nl_set_limits() { + local ns=${1} + local addrs=${2} + local subflows=${3} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp limits set add_addr_accepted "${addrs}" subflows "${subflows}" + else + ip netns exec "${ns}" ./pm_nl_ctl limits "${addrs}" "${subflows}" + fi +} + +mptcp_lib_pm_nl_add_endpoint() { + local ns=${1} + local addr=${2} + local flags dev id port + local nr=2 + + local p + for p in "${@}"; do + case "${p}" in + "flags" | "dev" | "id" | "port") + eval "${p}"=\$"${nr}" + ;; + esac + + nr=$((nr + 1)) + done + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint add "${addr}" ${flags//","/" "} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + else + ip netns exec "${ns}" ./pm_nl_ctl add "${addr}" ${flags:+flags "${flags}"} \ + ${dev:+dev "${dev}"} ${id:+id "${id}"} ${port:+port "${port}"} + fi +} + +mptcp_lib_pm_nl_del_endpoint() { + local ns=${1} + local id=${2} + local addr=${3} + + if mptcp_lib_is_ip_mptcp; then + [ "${id}" -ne 0 ] && addr='' + ip -n "${ns}" mptcp endpoint delete id "${id}" ${addr:+"${addr}"} + else + ip netns exec "${ns}" ./pm_nl_ctl del "${id}" "${addr}" + fi +} + +mptcp_lib_pm_nl_flush_endpoint() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint flush + else + ip netns exec "${ns}" ./pm_nl_ctl flush + fi +} + +mptcp_lib_pm_nl_show_endpoints() { + local ns=${1} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns}" mptcp endpoint show + else + ip netns exec "${ns}" ./pm_nl_ctl dump + fi +} + +mptcp_lib_pm_nl_change_endpoint() { + local ns=${1} + local id=${2} + local flags=${3} + + if mptcp_lib_is_ip_mptcp; then + # shellcheck disable=SC2086 # blanks in flags, no double quote + ip -n "${ns}" mptcp endpoint change id "${id}" ${flags//","/" "} + else + ip netns exec "${ns}" ./pm_nl_ctl set id "${id}" flags "${flags}" + fi } diff --git a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh index a817af6616ec..68899a303a1a 100755 --- a/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh +++ b/tools/testing/selftests/net/mptcp/mptcp_sockopt.sh @@ -1,6 +1,11 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" ret=0 @@ -8,17 +13,36 @@ sin="" sout="" cin="" cout="" -ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) iptables="iptables" ip6tables="ip6tables" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns_sbox="ns_sbox-$rndh" +ns1="" +ns2="" +ns_sbox="" + +usage() { + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" +} + +while getopts "hi" option;do + case "$option" in + "h") + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp + ;; + "?") + usage "$0" + exit ${KSFT_FAIL} + ;; + esac +done add_mark_rules() { @@ -40,17 +64,10 @@ add_mark_rules() init() { - local netns - for netns in "$ns1" "$ns2" "$ns_sbox";do - ip netns add $netns || exit $ksft_skip - ip -net $netns link set lo up - ip netns exec $netns sysctl -q net.mptcp.enabled=1 - ip netns exec $netns sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $netns sysctl -q net.ipv4.conf.default.rp_filter=0 - done + mptcp_lib_ns_init ns1 ns2 ns_sbox local i - for i in `seq 1 4`; do + for i in $(seq 1 4); do ip link add ns1eth$i netns "$ns1" type veth peer name ns2eth$i netns "$ns2" ip -net "$ns1" addr add 10.0.$i.1/24 dev ns1eth$i ip -net "$ns1" addr add dead:beef:$i::1/64 dev ns1eth$i nodad @@ -63,50 +80,32 @@ init() # let $ns2 reach any $ns1 address from any interface ip -net "$ns2" route add default via 10.0.$i.1 dev ns2eth$i metric 10$i - ip netns exec $ns1 ./pm_nl_ctl add 10.0.$i.1 flags signal - ip netns exec $ns1 ./pm_nl_ctl add dead:beef:$i::1 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "10.0.${i}.1" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns1}" "dead:beef:${i}::1" flags signal - ip netns exec $ns2 ./pm_nl_ctl add 10.0.$i.2 flags signal - ip netns exec $ns2 ./pm_nl_ctl add dead:beef:$i::2 flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "10.0.${i}.2" flags signal + mptcp_lib_pm_nl_add_endpoint "${ns2}" "dead:beef:${i}::2" flags signal done - ip netns exec $ns1 ./pm_nl_ctl limits 8 8 - ip netns exec $ns2 ./pm_nl_ctl limits 8 8 + mptcp_lib_pm_nl_set_limits "${ns1}" 8 8 + mptcp_lib_pm_nl_set_limits "${ns2}" 8 8 add_mark_rules $ns1 1 add_mark_rules $ns2 2 } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { - local netns - for netns in "$ns1" "$ns2" "$ns_sbox"; do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns_sbox}" rm -f "$cin" "$cout" rm -f "$sin" "$sout" } mptcp_lib_check_mptcp mptcp_lib_check_kallsyms - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi - -# Use the legacy version if available to support old kernel versions -if iptables-legacy -V &> /dev/null; then - iptables="iptables-legacy" - ip6tables="ip6tables-legacy" -elif ! iptables -V &> /dev/null; then - echo "SKIP: Could not run all tests without iptables tool" - exit $ksft_skip -elif ! ip6tables -V &> /dev/null; then - echo "SKIP: Could not run all tests without ip6tables tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip "${iptables}" "${ip6tables}" check_mark() { @@ -126,8 +125,9 @@ check_mark() local v for v in $values; do if [ $v -ne 0 ]; then - echo "FAIL: got $tables $values in ns $ns , not 0 - not all expected packets marked" 1>&2 - ret=1 + mptcp_lib_pr_fail "got $tables $values in ns $ns," \ + "not 0 - not all expected packets marked" + ret=${KSFT_FAIL} return 1 fi done @@ -135,36 +135,9 @@ check_mark() return 0 } -print_file_err() -{ - ls -l "$1" 1>&2 - echo "Trailing bytes are: " - tail -c 27 "$1" -} - -check_transfer() -{ - local in=$1 - local out=$2 - local what=$3 - - cmp "$in" "$out" > /dev/null 2>&1 - if [ $? -ne 0 ] ;then - echo "[ FAIL ] $what does not match (in, out):" - print_file_err "$in" - print_file_err "$out" - ret=1 - - return 1 - fi - - return 0 -} - -# $1: IP address -is_v6() +print_title() { - [ -z "${1##*:*}" ] + mptcp_lib_print_title "${@}" } do_transfer() @@ -183,7 +156,7 @@ do_transfer() local mptcp_connect="./mptcp_connect -r 20" local local_addr ip - if is_v6 "${connect_addr}"; then + if mptcp_lib_is_v6 "${connect_addr}"; then local_addr="::" ip=ipv6 else @@ -216,8 +189,9 @@ do_transfer() wait $spid local rets=$? + print_title "Transfer ${ip:2}" if [ ${rets} -ne 0 ] || [ ${retc} -ne 0 ]; then - echo " client exit code $retc, server $rets" 1>&2 + mptcp_lib_pr_fail "client exit code $retc, server $rets" echo -e "\nnetns ${listener_ns} socket stat for ${port}:" 1>&2 ip netns exec ${listener_ns} ss -Menita 1>&2 -o "sport = :$port" @@ -226,10 +200,17 @@ do_transfer() mptcp_lib_result_fail "transfer ${ip}" - ret=1 + ret=${KSFT_FAIL} return 1 fi + if ! mptcp_lib_check_transfer $cin $sout "file received by server"; then + rets=1 + else + mptcp_lib_pr_ok + fi + mptcp_lib_result_code "${rets}" "transfer ${ip}" + print_title "Mark ${ip:2}" if [ $local_addr = "::" ];then check_mark $listener_ns 6 || retc=1 check_mark $connector_ns 6 || retc=1 @@ -238,15 +219,13 @@ do_transfer() check_mark $connector_ns 4 || retc=1 fi - check_transfer $cin $sout "file received by server" - rets=$? - mptcp_lib_result_code "${retc}" "mark ${ip}" - mptcp_lib_result_code "${rets}" "transfer ${ip}" if [ $retc -eq 0 ] && [ $rets -eq 0 ];then + mptcp_lib_pr_ok return 0 fi + mptcp_lib_pr_fail return 1 } @@ -257,8 +236,7 @@ make_file() local who=$2 local size=$3 - dd if=/dev/urandom of="$name" bs=1024 count=$size 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" + mptcp_lib_make_file $name 1024 $size echo "Created $name (size $size KB) containing data sent by $who" } @@ -268,7 +246,7 @@ do_mptcp_sockopt_tests() local lret=0 if ! mptcp_lib_kallsyms_has "mptcp_diag_fill_info$"; then - echo "INFO: MPTCP sockopt not supported: SKIP" + mptcp_lib_pr_skip "MPTCP sockopt not supported" mptcp_lib_result_skip "sockopt" return fi @@ -276,23 +254,27 @@ do_mptcp_sockopt_tests() ip netns exec "$ns_sbox" ./mptcp_sockopt lret=$? + print_title "SOL_MPTCP sockopt v4" if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt" 1>&2 + mptcp_lib_pr_fail mptcp_lib_result_fail "sockopt v4" ret=$lret return fi + mptcp_lib_pr_ok mptcp_lib_result_pass "sockopt v4" ip netns exec "$ns_sbox" ./mptcp_sockopt -6 lret=$? + print_title "SOL_MPTCP sockopt v6" if [ $lret -ne 0 ]; then - echo "FAIL: SOL_MPTCP getsockopt (ipv6)" 1>&2 + mptcp_lib_pr_fail mptcp_lib_result_fail "sockopt v6" ret=$lret return fi + mptcp_lib_pr_ok mptcp_lib_result_pass "sockopt v6" } @@ -315,16 +297,17 @@ run_tests() do_tcpinq_test() { + print_title "TCP_INQ cmsg/ioctl $*" ip netns exec "$ns_sbox" ./mptcp_inq "$@" local lret=$? if [ $lret -ne 0 ];then ret=$lret - echo "FAIL: mptcp_inq $@" 1>&2 + mptcp_lib_pr_fail mptcp_lib_result_fail "TCP_INQ: $*" return $lret fi - echo "PASS: TCP_INQ cmsg/ioctl $@" + mptcp_lib_pr_ok mptcp_lib_result_pass "TCP_INQ: $*" return $lret } @@ -334,7 +317,7 @@ do_tcpinq_tests() local lret=0 if ! mptcp_lib_kallsyms_has "mptcp_ioctl$"; then - echo "INFO: TCP_INQ not supported: SKIP" + mptcp_lib_pr_skip "TCP_INQ not supported" mptcp_lib_result_skip "TCP_INQ" return fi @@ -370,15 +353,7 @@ trap cleanup EXIT run_tests $ns1 $ns2 10.0.1.1 run_tests $ns1 $ns2 dead:beef:1::1 -if [ $ret -eq 0 ];then - echo "PASS: all packets had packet mark set" -fi - do_mptcp_sockopt_tests -if [ $ret -eq 0 ];then - echo "PASS: SOL_MPTCP getsockopt has expected information" -fi - do_tcpinq_tests mptcp_lib_result_print_all_tap diff --git a/tools/testing/selftests/net/mptcp/pm_netlink.sh b/tools/testing/selftests/net/mptcp/pm_netlink.sh index 8f4ff123a7eb..2757378b1b13 100755 --- a/tools/testing/selftests/net/mptcp/pm_netlink.sh +++ b/tools/testing/selftests/net/mptcp/pm_netlink.sh @@ -3,197 +3,267 @@ . "$(dirname "${0}")/mptcp_lib.sh" -ksft_skip=4 ret=0 usage() { - echo "Usage: $0 [ -h ]" + echo "Usage: $0 [ -i ] [ -h ]" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" + echo -e "\t-h: help" } - +optstring=hi while getopts "$optstring" option;do case "$option" in "h") - usage $0 - exit 0 + usage "$0" + exit ${KSFT_PASS} + ;; + "i") + mptcp_lib_set_ip_mptcp ;; "?") - usage $0 - exit 1 + usage "$0" + exit ${KSFT_FAIL} ;; esac done -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" +ns1="" err=$(mktemp) -ret=0 +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { - rm -f $err - ip netns del $ns1 + rm -f "${err}" + mptcp_lib_ns_exit "${ns1}" } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip trap cleanup EXIT -ip netns add $ns1 || exit $ksft_skip -ip -net $ns1 link set lo up -ip netns exec $ns1 sysctl -q net.mptcp.enabled=1 +mptcp_lib_ns_init ns1 + +format_limits() { + local accept="${1}" + local subflows="${2}" + + if mptcp_lib_is_ip_mptcp; then + # with a space at the end + printf "add_addr_accepted %d subflows %d \n" "${accept}" "${subflows}" + else + printf "accept %d\nsubflows %d\n" "${accept}" "${subflows}" + fi +} + +get_limits() { + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp limits + else + ip netns exec "${ns1}" ./pm_nl_ctl limits + fi +} + +format_endpoints() { + mptcp_lib_pm_nl_format_endpoints "${@}" +} + +get_endpoint() { + # shellcheck disable=SC2317 # invoked indirectly + mptcp_lib_pm_nl_get_endpoint "${ns1}" "${@}" +} + +change_address() { + local addr=${1} + local flags=${2} + + if mptcp_lib_is_ip_mptcp; then + ip -n "${ns1}" mptcp endpoint change "${addr}" "${flags}" + else + ip netns exec "${ns1}" ./pm_nl_ctl set "${addr}" flags "${flags}" + fi +} + +set_limits() +{ + mptcp_lib_pm_nl_set_limits "${ns1}" "${@}" +} + +add_endpoint() +{ + mptcp_lib_pm_nl_add_endpoint "${ns1}" "${@}" +} + +del_endpoint() +{ + mptcp_lib_pm_nl_del_endpoint "${ns1}" "${@}" +} + +flush_endpoint() +{ + mptcp_lib_pm_nl_flush_endpoint "${ns1}" +} + +show_endpoints() +{ + mptcp_lib_pm_nl_show_endpoints "${ns1}" +} + +change_endpoint() +{ + mptcp_lib_pm_nl_change_endpoint "${ns1}" "${@}" +} check() { local cmd="$1" local expected="$2" local msg="$3" - local out=`$cmd 2>$err` - local cmd_ret=$? - - printf "%-50s" "$msg" - if [ $cmd_ret -ne 0 ]; then - echo "[FAIL] command execution '$cmd' stderr " - cat $err - mptcp_lib_result_fail "${msg} # error ${cmd_ret}" - ret=1 - elif [ "$out" = "$expected" ]; then - echo "[ OK ]" + local rc=0 + + mptcp_lib_print_title "$msg" + mptcp_lib_check_output "${err}" "${cmd}" "${expected}" || rc=${?} + if [ ${rc} -eq 2 ]; then + mptcp_lib_result_fail "${msg} # error ${rc}" + ret=${KSFT_FAIL} + elif [ ${rc} -eq 0 ]; then + mptcp_lib_print_ok "[ OK ]" mptcp_lib_result_pass "${msg}" - else - echo -n "[FAIL] " - echo "expected '$expected' got '$out'" + elif [ ${rc} -eq 1 ]; then mptcp_lib_result_fail "${msg} # different output" - ret=1 + ret=${KSFT_FAIL} fi } -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "defaults addr list" +check "show_endpoints" "" "defaults addr list" -default_limits="$(ip netns exec $ns1 ./pm_nl_ctl limits)" +default_limits="$(get_limits)" if mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 0 -subflows 2" "defaults limits" + check "get_limits" "$(format_limits 0 2)" "defaults limits" fi -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 flags subflow dev lo -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 flags signal,backup -check "ip netns exec $ns1 ./pm_nl_ctl get 1" "id 1 flags 10.0.1.1" "simple add/get addr" +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 flags subflow dev lo +add_endpoint 10.0.1.3 flags signal,backup +check "get_endpoint 1" "$(format_endpoints "1,10.0.1.1")" "simple add/get addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 2 flags subflow dev lo 10.0.1.2 -id 3 flags signal,backup 10.0.1.3" "dump addrs" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2,subflow,lo" \ + "3,10.0.1.3,signal backup")" "dump addrs" -ip netns exec $ns1 ./pm_nl_ctl del 2 -check "ip netns exec $ns1 ./pm_nl_ctl get 2" "" "simple del addr" -check "ip netns exec $ns1 ./pm_nl_ctl dump" \ -"id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3" "dump addrs after del" +del_endpoint 2 +check "get_endpoint 2" "" "simple del addr" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup")" "dump addrs after del" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "" "duplicate addr" +add_endpoint 10.0.1.3 2>/dev/null +check "get_endpoint 4" "" "duplicate addr" -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 flags signal -check "ip netns exec $ns1 ./pm_nl_ctl get 4" "id 4 flags signal 10.0.1.4" "id addr increment" +add_endpoint 10.0.1.4 flags signal +check "get_endpoint 4" "$(format_endpoints "4,10.0.1.4,signal")" "id addr increment" -for i in `seq 5 9`; do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.$i flags signal >/dev/null 2>&1 +for i in $(seq 5 9); do + add_endpoint "10.0.1.${i}" flags signal >/dev/null 2>&1 done -check "ip netns exec $ns1 ./pm_nl_ctl get 9" "id 9 flags signal 10.0.1.9" "hard addr limit" -check "ip netns exec $ns1 ./pm_nl_ctl get 10" "" "above hard addr limit" +check "get_endpoint 9" "$(format_endpoints "9,10.0.1.9,signal")" "hard addr limit" +check "get_endpoint 10" "" "above hard addr limit" -ip netns exec $ns1 ./pm_nl_ctl del 9 -for i in `seq 10 255`; do - ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.9 id $i - ip netns exec $ns1 ./pm_nl_ctl del $i +del_endpoint 9 +for i in $(seq 10 255); do + add_endpoint 10.0.0.9 id "${i}" + del_endpoint "${i}" done -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 3 flags signal,backup 10.0.1.3 -id 4 flags signal 10.0.1.4 -id 5 flags signal 10.0.1.5 -id 6 flags signal 10.0.1.6 -id 7 flags signal 10.0.1.7 -id 8 flags signal 10.0.1.8" "id limit" - -ip netns exec $ns1 ./pm_nl_ctl flush -check "ip netns exec $ns1 ./pm_nl_ctl dump" "" "flush addrs" - -ip netns exec $ns1 ./pm_nl_ctl limits 9 1 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "rcv addrs above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 1 9 2>/dev/null -check "ip netns exec $ns1 ./pm_nl_ctl limits" "$default_limits" "subflows above hard limit" - -ip netns exec $ns1 ./pm_nl_ctl limits 8 8 -check "ip netns exec $ns1 ./pm_nl_ctl limits" "accept 8 -subflows 8" "set limits" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.2 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.3 id 100 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.5 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.1.1 -id 2 flags 10.0.1.2 -id 3 flags 10.0.1.7 -id 4 flags 10.0.1.8 -id 100 flags 10.0.1.3 -id 101 flags 10.0.1.4 -id 254 flags 10.0.1.5 -id 255 flags 10.0.1.6" "set ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.1 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.2 id 254 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.3 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.4 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.5 id 253 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.6 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.7 -ip netns exec $ns1 ./pm_nl_ctl add 10.0.0.8 -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags 10.0.0.1 -id 2 flags 10.0.0.4 -id 3 flags 10.0.0.6 -id 4 flags 10.0.0.7 -id 5 flags 10.0.0.8 -id 253 flags 10.0.0.5 -id 254 flags 10.0.0.2 -id 255 flags 10.0.0.3" "wrap-around ids" - -ip netns exec $ns1 ./pm_nl_ctl flush -ip netns exec $ns1 ./pm_nl_ctl add 10.0.1.1 flags subflow -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags backup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup 10.0.1.1" "set flags (backup)" -ip netns exec $ns1 ./pm_nl_ctl set 10.0.1.1 flags nobackup -check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nobackup)" +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "3,10.0.1.3,signal backup" \ + "4,10.0.1.4,signal" \ + "5,10.0.1.5,signal" \ + "6,10.0.1.6,signal" \ + "7,10.0.1.7,signal" \ + "8,10.0.1.8,signal")" "id limit" + +flush_endpoint +check "show_endpoints" "" "flush addrs" + +set_limits 9 1 2>/dev/null +check "get_limits" "${default_limits}" "rcv addrs above hard limit" + +set_limits 1 9 2>/dev/null +check "get_limits" "${default_limits}" "subflows above hard limit" + +set_limits 8 8 +check "get_limits" "$(format_limits 8 8)" "set limits" + +flush_endpoint +add_endpoint 10.0.1.1 +add_endpoint 10.0.1.2 +add_endpoint 10.0.1.3 id 100 +add_endpoint 10.0.1.4 +add_endpoint 10.0.1.5 id 254 +add_endpoint 10.0.1.6 +add_endpoint 10.0.1.7 +add_endpoint 10.0.1.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.1.1" \ + "2,10.0.1.2" \ + "3,10.0.1.7" \ + "4,10.0.1.8" \ + "100,10.0.1.3" \ + "101,10.0.1.4" \ + "254,10.0.1.5" \ + "255,10.0.1.6")" "set ids" + +flush_endpoint +add_endpoint 10.0.0.1 +add_endpoint 10.0.0.2 id 254 +add_endpoint 10.0.0.3 +add_endpoint 10.0.0.4 +add_endpoint 10.0.0.5 id 253 +add_endpoint 10.0.0.6 +add_endpoint 10.0.0.7 +add_endpoint 10.0.0.8 +check "show_endpoints" \ + "$(format_endpoints "1,10.0.0.1" \ + "2,10.0.0.4" \ + "3,10.0.0.6" \ + "4,10.0.0.7" \ + "5,10.0.0.8" \ + "253,10.0.0.5" \ + "254,10.0.0.2" \ + "255,10.0.0.3")" "wrap-around ids" + +flush_endpoint +add_endpoint 10.0.1.1 flags subflow +change_address 10.0.1.1 backup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup")" \ + "set flags (backup)" +change_address 10.0.1.1 nobackup +check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nobackup)" # fullmesh support has been added later -ip netns exec $ns1 ./pm_nl_ctl set id 1 flags fullmesh -if ip netns exec $ns1 ./pm_nl_ctl dump | grep -q "fullmesh" || +change_endpoint 1 fullmesh 2>/dev/null +if show_endpoints | grep -q "fullmesh" || mptcp_lib_expect_all_features; then - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,fullmesh 10.0.1.1" " (fullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags nofullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow 10.0.1.1" " (nofullmesh)" - ip netns exec $ns1 ./pm_nl_ctl set id 1 flags backup,fullmesh - check "ip netns exec $ns1 ./pm_nl_ctl dump" "id 1 flags \ -subflow,backup,fullmesh 10.0.1.1" " (backup,fullmesh)" + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow fullmesh")" \ + " (fullmesh)" + change_endpoint 1 nofullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow")" \ + " (nofullmesh)" + change_endpoint 1 backup,fullmesh + check "show_endpoints" "$(format_endpoints "1,10.0.1.1,subflow backup fullmesh")" \ + " (backup,fullmesh)" +else + for st in fullmesh nofullmesh backup,fullmesh; do + st=" (${st})" + mptcp_lib_print_title "${st}" + mptcp_lib_pr_skip + mptcp_lib_result_skip "${st}" + done fi mptcp_lib_result_print_all_tap diff --git a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c index 49369c4a5f26..7ad5a59adff2 100644 --- a/tools/testing/selftests/net/mptcp/pm_nl_ctl.c +++ b/tools/testing/selftests/net/mptcp/pm_nl_ctl.c @@ -453,6 +453,7 @@ int csf(int fd, int pm_family, int argc, char *argv[]) char data[NLMSG_ALIGN(sizeof(struct nlmsghdr)) + NLMSG_ALIGN(sizeof(struct genlmsghdr)) + 1024]; + u_int32_t flags = MPTCP_PM_ADDR_FLAG_SUBFLOW; const char *params[5]; struct nlmsghdr *nh; struct rtattr *addr; @@ -558,6 +559,13 @@ int csf(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); } + /* addr flags */ + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ADDR_ATTR_FLAGS; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &flags, 4); + off += NLMSG_ALIGN(rta->rta_len); + addr->rta_len = off - addr_start; } @@ -1079,6 +1087,7 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) 1024]; struct rtattr *rta, *nest; struct nlmsghdr *nh; + u_int32_t token = 0; int nest_start; u_int8_t id; int off = 0; @@ -1089,10 +1098,12 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) MPTCP_PM_VER); /* the only argument is the address id */ - if (argc != 3) + if (argc != 3 && argc != 5) syntax(argv); id = atoi(argv[2]); + if (argc == 5 && !strcmp(argv[3], "token")) + token = strtoul(argv[4], NULL, 10); nest_start = off; nest = (void *)(data + off); @@ -1108,6 +1119,15 @@ int get_addr(int fd, int pm_family, int argc, char *argv[]) off += NLMSG_ALIGN(rta->rta_len); nest->rta_len = off - nest_start; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -1119,8 +1139,16 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) 1024]; pid_t pid = getpid(); struct nlmsghdr *nh; + u_int32_t token = 0; + struct rtattr *rta; int off = 0; + if (argc != 2 && argc != 4) + syntax(argv); + + if (argc == 4 && !strcmp(argv[2], "token")) + token = strtoul(argv[3], NULL, 10); + memset(data, 0, sizeof(data)); nh = (void *)data; off = init_genl_req(data, pm_family, MPTCP_PM_CMD_GET_ADDR, @@ -1130,6 +1158,15 @@ int dump_addrs(int fd, int pm_family, int argc, char *argv[]) nh->nlmsg_pid = pid; nh->nlmsg_len = off; + /* token */ + if (token) { + rta = (void *)(data + off); + rta->rta_type = MPTCP_PM_ATTR_TOKEN; + rta->rta_len = RTA_LENGTH(4); + memcpy(RTA_DATA(rta), &token, 4); + off += NLMSG_ALIGN(rta->rta_len); + } + print_addrs(nh, pm_family, do_nl_req(fd, nh, off, sizeof(data))); return 0; } @@ -1239,7 +1276,7 @@ int add_listener(int argc, char *argv[]) struct sockaddr_storage addr; struct sockaddr_in6 *a6; struct sockaddr_in *a4; - u_int16_t family; + u_int16_t family = AF_UNSPEC; int enable = 1; int sock; int err; diff --git a/tools/testing/selftests/net/mptcp/settings b/tools/testing/selftests/net/mptcp/settings index 79b65bdf05db..abc5648b59ab 100644 --- a/tools/testing/selftests/net/mptcp/settings +++ b/tools/testing/selftests/net/mptcp/settings @@ -1 +1 @@ -timeout=1200 +timeout=1800 diff --git a/tools/testing/selftests/net/mptcp/simult_flows.sh b/tools/testing/selftests/net/mptcp/simult_flows.sh index ce9203b817f8..4b14b4412166 100755 --- a/tools/testing/selftests/net/mptcp/simult_flows.sh +++ b/tools/testing/selftests/net/mptcp/simult_flows.sh @@ -1,48 +1,52 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +# Double quotes to prevent globbing and word splitting is recommended in new +# code but we accept it, especially because there were too many before having +# address all other issues detected by shellcheck. +#shellcheck disable=SC2086 + . "$(dirname "${0}")/mptcp_lib.sh" -sec=$(date +%s) -rndh=$(printf %x $sec)-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" -ns3="ns3-$rndh" +ns1="" +ns2="" +ns3="" capture=false -ksft_skip=4 timeout_poll=30 timeout_test=$((timeout_poll * 2 + 1)) -test_cnt=1 +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-60s" ret=0 bail=0 slack=50 +large="" +small="" +sout="" +cout="" +capout="" +size=0 usage() { - echo "Usage: $0 [ -b ] [ -c ] [ -d ]" + echo "Usage: $0 [ -b ] [ -c ] [ -d ] [ -i]" echo -e "\t-b: bail out after first error, otherwise runs al testcases" echo -e "\t-c: capture packets for each test using tcpdump (default: no capture)" echo -e "\t-d: debug this script" + echo -e "\t-i: use 'ip mptcp' instead of 'pm_nl_ctl'" } +# This function is used in the cleanup trap +#shellcheck disable=SC2317 cleanup() { rm -f "$cout" "$sout" rm -f "$large" "$small" rm -f "$capout" - local netns - for netns in "$ns1" "$ns2" "$ns3";do - ip netns del $netns - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" "${ns3}" } mptcp_lib_check_mptcp - -ip -Version > /dev/null 2>&1 -if [ $? -ne 0 ];then - echo "SKIP: Could not run test without ip tool" - exit $ksft_skip -fi +mptcp_lib_check_tools ip tc # "$ns1" ns2 ns3 # ns1eth1 ns2eth1 ns2eth3 ns3eth1 @@ -64,12 +68,7 @@ setup() trap cleanup EXIT - for i in "$ns1" "$ns2" "$ns3";do - ip netns add $i || exit $ksft_skip - ip -net $i link set lo up - ip netns exec $i sysctl -q net.ipv4.conf.all.rp_filter=0 - ip netns exec $i sysctl -q net.ipv4.conf.default.rp_filter=0 - done + mptcp_lib_ns_init ns1 ns2 ns3 ip link add ns1eth1 netns "$ns1" type veth peer name ns2eth1 netns "$ns2" ip link add ns1eth2 netns "$ns1" type veth peer name ns2eth2 netns "$ns2" @@ -87,8 +86,8 @@ setup() ip -net "$ns1" route add default via 10.0.2.2 metric 101 ip -net "$ns1" route add default via dead:beef:2::2 metric 101 - ip netns exec "$ns1" ./pm_nl_ctl limits 1 1 - ip netns exec "$ns1" ./pm_nl_ctl add 10.0.2.1 dev ns1eth2 flags subflow + mptcp_lib_pm_nl_set_limits "${ns1}" 1 1 + mptcp_lib_pm_nl_add_endpoint "${ns1}" 10.0.2.1 dev ns1eth2 flags subflow ip -net "$ns2" addr add 10.0.1.2/24 dev ns2eth1 ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad @@ -110,7 +109,7 @@ setup() ip -net "$ns3" route add default via 10.0.3.2 ip -net "$ns3" route add default via dead:beef:3::2 - ip netns exec "$ns3" ./pm_nl_ctl limits 1 1 + mptcp_lib_pm_nl_set_limits "${ns3}" 1 1 # debug build can slow down measurably the test program # we use quite tight time limit on the run-time, to ensure @@ -123,31 +122,13 @@ setup() grep -q ' kmemleak_init$\| lockdep_init$\| kasan_init$\| prove_locking$' /proc/kallsyms && slack=$((slack+550)) } -# $1: ns, $2: port -wait_local_port_listen() -{ - local listener_ns="${1}" - local port="${2}" - - local port_hex i - - port_hex="$(printf "%04X" "${port}")" - for i in $(seq 10); do - ip netns exec "${listener_ns}" cat /proc/net/tcp* | \ - awk "BEGIN {rc=1} {if (\$2 ~ /:${port_hex}\$/ && \$4 ~ /0A/) {rc=0; exit}} END {exit rc}" && - break - sleep 0.1 - done -} - do_transfer() { local cin=$1 local sin=$2 local max_time=$3 local port - port=$((10000+$test_cnt)) - test_cnt=$((test_cnt+1)) + port=$((10000+MPTCP_LIB_TEST_COUNTER)) :> "$cout" :> "$sout" @@ -155,6 +136,7 @@ do_transfer() if $capture; then local capuser + local rndh="${ns1:4}" if [ -z $SUDO_USER ] ; then capuser="" else @@ -179,7 +161,7 @@ do_transfer() 0.0.0.0 < "$sin" > "$sout" & local spid=$! - wait_local_port_listen "${ns3}" "${port}" + mptcp_lib_wait_local_port_listen "${ns3}" "${port}" timeout ${timeout_test} \ ip netns exec ${ns1} \ @@ -206,12 +188,12 @@ do_transfer() printf "%-16s" " max $max_time " if [ $retc -eq 0 ] && [ $rets -eq 0 ] && \ [ $cmpc -eq 0 ] && [ $cmps -eq 0 ]; then - echo "[ OK ]" + mptcp_lib_pr_ok cat "$capout" return 0 fi - echo " [ fail ]" + mptcp_lib_pr_fail echo "client exit code $retc, server $rets" 1>&2 echo -e "\nnetns ${ns3} socket stat for $port:" 1>&2 ip netns exec ${ns3} ss -nita 1>&2 -o "sport = :$port" @@ -235,8 +217,8 @@ run_test() shift 4 local msg=$* - [ $delay1 -gt 0 ] && delay1="delay $delay1" || delay1="" - [ $delay2 -gt 0 ] && delay2="delay $delay2" || delay2="" + [ $delay1 -gt 0 ] && delay1="delay ${delay1}ms" || delay1="" + [ $delay2 -gt 0 ] && delay2="delay ${delay2}ms" || delay2="" for dev in ns1eth1 ns1eth2; do tc -n $ns1 qdisc del dev $dev root >/dev/null 2>&1 @@ -258,7 +240,7 @@ run_test() # completion (see mptcp_connect): 200ms on each side, add some slack time=$((time + 400 + slack)) - printf "%-60s" "$msg" + mptcp_lib_print_title "$msg" do_transfer $small $large $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" @@ -267,7 +249,8 @@ run_test() [ $bail -eq 0 ] || exit $ret fi - printf "%-60s" "$msg - reverse direction" + msg+=" - reverse direction" + mptcp_lib_print_title "${msg}" do_transfer $large $small $time lret=$? mptcp_lib_result_code "${lret}" "${msg}" @@ -277,11 +260,11 @@ run_test() fi } -while getopts "bcdh" option;do +while getopts "bcdhi" option;do case "$option" in "h") usage $0 - exit 0 + exit ${KSFT_PASS} ;; "b") bail=1 @@ -292,21 +275,24 @@ while getopts "bcdh" option;do "d") set -x ;; + "i") + mptcp_lib_set_ip_mptcp + ;; "?") usage $0 - exit 1 + exit ${KSFT_FAIL} ;; esac done setup run_test 10 10 0 0 "balanced bwidth" -run_test 10 10 1 50 "balanced bwidth with unbalanced delay" +run_test 10 10 1 25 "balanced bwidth with unbalanced delay" # we still need some additional infrastructure to pass the following test-cases -run_test 30 10 0 0 "unbalanced bwidth" -run_test 30 10 1 50 "unbalanced bwidth with unbalanced delay" -run_test 30 10 50 1 "unbalanced bwidth with opposed, unbalanced delay" +run_test 10 3 0 0 "unbalanced bwidth" +run_test 10 3 1 25 "unbalanced bwidth with unbalanced delay" +run_test 10 3 25 1 "unbalanced bwidth with opposed, unbalanced delay" mptcp_lib_result_print_all_tap exit $ret diff --git a/tools/testing/selftests/net/mptcp/userspace_pm.sh b/tools/testing/selftests/net/mptcp/userspace_pm.sh index b25a3e33eb25..9e2981f2d7f5 100755 --- a/tools/testing/selftests/net/mptcp/userspace_pm.sh +++ b/tools/testing/selftests/net/mptcp/userspace_pm.sh @@ -5,7 +5,7 @@ # code but we accept it. #shellcheck disable=SC2086 -# Some variables are used below but indirectly, see check_expected_one() +# Some variables are used below but indirectly, see verify_*_event() #shellcheck disable=SC2034 . "$(dirname "${0}")/mptcp_lib.sh" @@ -17,21 +17,17 @@ if ! mptcp_lib_has_file '/proc/sys/net/mptcp/pm_type'; then echo "userspace pm tests are not supported by the kernel: SKIP" exit ${KSFT_SKIP} fi +mptcp_lib_check_tools ip -if ! ip -Version &> /dev/null; then - echo "SKIP: Cannot not run test without ip tool" - exit ${KSFT_SKIP} -fi +ANNOUNCED=${MPTCP_LIB_EVENT_ANNOUNCED} +REMOVED=${MPTCP_LIB_EVENT_REMOVED} +SUB_ESTABLISHED=${MPTCP_LIB_EVENT_SUB_ESTABLISHED} +SUB_CLOSED=${MPTCP_LIB_EVENT_SUB_CLOSED} +LISTENER_CREATED=${MPTCP_LIB_EVENT_LISTENER_CREATED} +LISTENER_CLOSED=${MPTCP_LIB_EVENT_LISTENER_CLOSED} -ANNOUNCED=6 # MPTCP_EVENT_ANNOUNCED -REMOVED=7 # MPTCP_EVENT_REMOVED -SUB_ESTABLISHED=10 # MPTCP_EVENT_SUB_ESTABLISHED -SUB_CLOSED=11 # MPTCP_EVENT_SUB_CLOSED -LISTENER_CREATED=15 #MPTCP_EVENT_LISTENER_CREATED -LISTENER_CLOSED=16 #MPTCP_EVENT_LISTENER_CLOSED - -AF_INET=2 -AF_INET6=10 +AF_INET=${MPTCP_LIB_AF_INET} +AF_INET6=${MPTCP_LIB_AF_INET6} file="" server_evts="" @@ -54,20 +50,16 @@ app6_port=50004 client_addr_id=${RANDOM:0:2} server_addr_id=${RANDOM:0:2} -sec=$(date +%s) -rndh=$(printf %x "$sec")-$(mktemp -u XXXXXX) -ns1="ns1-$rndh" -ns2="ns2-$rndh" +ns1="" +ns2="" ret=0 test_name="" - -_printf() { - stdbuf -o0 -e0 printf "${@}" -} +# a bit more space: because we have more to display +MPTCP_LIB_TEST_FORMAT="%02u %-68s" print_title() { - _printf "INFO: %s\n" "${1}" + mptcp_lib_pr_info "${1}" } # $1: test name @@ -75,48 +67,32 @@ print_test() { test_name="${1}" - _printf "%-63s" "${test_name}" -} - -print_results() -{ - _printf "[%s]\n" "${1}" + mptcp_lib_print_title "${test_name}" } test_pass() { - print_results " OK " + mptcp_lib_pr_ok mptcp_lib_result_pass "${test_name}" } test_skip() { - print_results "SKIP" + mptcp_lib_pr_skip mptcp_lib_result_skip "${test_name}" } # $1: msg test_fail() { - print_results "FAIL" - ret=1 - - if [ -n "${1}" ]; then - _printf "\t%s\n" "${1}" + if [ ${#} -gt 0 ] + then + mptcp_lib_pr_fail "${@}" fi - + ret=${KSFT_FAIL} mptcp_lib_result_fail "${test_name}" } -kill_wait() -{ - [ $1 -eq 0 ] && return 0 - - kill -SIGUSR1 $1 > /dev/null 2>&1 - kill $1 > /dev/null 2>&1 - wait $1 2>/dev/null -} - # This function is used in the cleanup trap #shellcheck disable=SC2317 cleanup() @@ -128,26 +104,21 @@ cleanup() for pid in $client4_pid $server4_pid $client6_pid $server6_pid\ $server_evts_pid $client_evts_pid do - kill_wait $pid + mptcp_lib_kill_wait $pid done - local netns - for netns in "$ns1" "$ns2" ;do - ip netns del "$netns" - done + mptcp_lib_ns_exit "${ns1}" "${ns2}" rm -rf $file $client_evts $server_evts - _printf "Done\n" + mptcp_lib_pr_info "Done" } trap cleanup EXIT # Create and configure network namespaces for testing +mptcp_lib_ns_init ns1 ns2 for i in "$ns1" "$ns2" ;do - ip netns add "$i" || exit 1 - ip -net "$i" link set lo up - ip netns exec "$i" sysctl -q net.mptcp.enabled=1 ip netns exec "$i" sysctl -q net.mptcp.pm_type=1 done @@ -169,27 +140,23 @@ ip -net "$ns2" addr add dead:beef:1::2/64 dev ns2eth1 nodad ip -net "$ns2" addr add dead:beef:2::2/64 dev ns2eth1 nodad ip -net "$ns2" link set ns2eth1 up +file=$(mktemp) +mptcp_lib_make_file "$file" 2 1 + +# Capture netlink events over the two network namespaces running +# the MPTCP client and server +client_evts=$(mktemp) +mptcp_lib_events "${ns2}" "${client_evts}" client_evts_pid +server_evts=$(mktemp) +mptcp_lib_events "${ns1}" "${server_evts}" server_evts_pid +sleep 0.5 + print_title "Init" print_test "Created network namespaces ns1, ns2" test_pass -make_file() -{ - # Store a chunk of data in a file to transmit over an MPTCP connection - local name=$1 - local ksize=1 - - dd if=/dev/urandom of="$name" bs=2 count=$ksize 2> /dev/null - echo -e "\nMPTCP_TEST_FILE_END_MARKER" >> "$name" -} - make_connection() { - if [ -z "$file" ]; then - file=$(mktemp) - fi - make_file "$file" "client" - local is_v6=$1 local app_port=$app4_port local connect_addr="10.0.1.1" @@ -203,27 +170,8 @@ make_connection() is_v6="v4" fi - # Capture netlink events over the two network namespaces running - # the MPTCP client and server - if [ -z "$client_evts" ]; then - client_evts=$(mktemp) - fi :>"$client_evts" - if [ $client_evts_pid -ne 0 ]; then - kill_wait $client_evts_pid - fi - ip netns exec "$ns2" ./pm_nl_ctl events >> "$client_evts" 2>&1 & - client_evts_pid=$! - if [ -z "$server_evts" ]; then - server_evts=$(mktemp) - fi :>"$server_evts" - if [ $server_evts_pid -ne 0 ]; then - kill_wait $server_evts_pid - fi - ip netns exec "$ns1" ./pm_nl_ctl events >> "$server_evts" 2>&1 & - server_evts_pid=$! - sleep 0.5 # Run the server ip netns exec "$ns1" \ @@ -247,14 +195,11 @@ make_connection() local server_token local server_serverside - client_token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_port=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") - client_serverside=$(sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q'\ - "$client_evts") - server_token=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q') - server_serverside=$(grep "type:1," "$server_evts" | - sed --unbuffered -n 's/.*\(server_side:\)\([[:digit:]]*\).*$/\2/p;q') + client_token=$(mptcp_lib_evts_get_info token "$client_evts") + client_port=$(mptcp_lib_evts_get_info sport "$client_evts") + client_serverside=$(mptcp_lib_evts_get_info server_side "$client_evts") + server_token=$(mptcp_lib_evts_get_info token "$server_evts") + server_serverside=$(mptcp_lib_evts_get_info server_side "$server_evts") print_test "Established IP${is_v6} MPTCP Connection ns2 => ns1" if [ "$client_token" != "" ] && [ "$server_token" != "" ] && [ "$client_serverside" = 0 ] && @@ -264,7 +209,7 @@ make_connection() else test_fail "Expected tokens (c:${client_token} - s:${server_token}) and server (c:${client_serverside} - s:${server_serverside})" mptcp_lib_result_print_all_tap - exit 1 + exit ${KSFT_FAIL} fi if [ "$is_v6" = "v6" ] @@ -283,45 +228,16 @@ make_connection() fi } -# $1: var name ; $2: prev ret -check_expected_one() -{ - local var="${1}" - local exp="e_${var}" - local prev_ret="${2}" - - if [ "${!var}" = "${!exp}" ] - then - return 0 - fi - - if [ "${prev_ret}" = "0" ] - then - test_fail - fi - - _printf "\tExpected value for '%s': '%s', got '%s'.\n" \ - "${var}" "${!exp}" "${!var}" - return 1 -} - # $@: all var names to check check_expected() { - local rc=0 - local var - - for var in "${@}" - do - check_expected_one "${var}" "${rc}" || rc=1 - done - - if [ ${rc} -eq 0 ] + if mptcp_lib_check_expected "${@}" then test_pass return 0 fi + test_fail return 1 } @@ -340,16 +256,16 @@ verify_announce_event() local dport local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) if [ "$e_af" = "v6" ] then - addr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - addr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + addr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "addr" "dport" "id" } @@ -367,7 +283,7 @@ test_announce() $client_addr_id dev ns2eth1 > /dev/null 2>&1 local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") print_test "ADD_ADDR 10.0.2.2 (ns2) => ns1, invalid token" if [ "$type" = "" ] then @@ -446,9 +362,9 @@ verify_remove_event() local token local id - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - id=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + id=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) check_expected "type" "token" "id" } @@ -466,12 +382,12 @@ test_remove() $client_addr_id > /dev/null 2>&1 print_test "RM_ADDR id:${client_addr_id} ns2 => ns1, invalid token" local type - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass else - test_fail + test_fail "unexpected type: ${type}" fi # RM_ADDR using an invalid addr id should result in no action @@ -479,12 +395,12 @@ test_remove() ip netns exec "$ns2" ./pm_nl_ctl rem token "$client4_token" id\ $invalid_id > /dev/null 2>&1 print_test "RM_ADDR id:${invalid_id} ns2 => ns1, invalid id" - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + type=$(mptcp_lib_evts_get_info type "$server_evts") if [ "$type" = "" ] then test_pass else - test_fail + test_fail "unexpected type: ${type}" fi # RM_ADDR from the client to server machine @@ -564,7 +480,7 @@ verify_subflow_events() local remid local info - info="${e_saddr} (${e_from}) => ${e_daddr} (${e_to})" + info="${e_saddr} (${e_from}) => ${e_daddr}:${e_dport} (${e_to})" if [ "$e_type" = "$SUB_ESTABLISHED" ] then @@ -583,19 +499,19 @@ verify_subflow_events() fi fi - type=$(sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - token=$(sed --unbuffered -n 's/.*\(token:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - family=$(sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - dport=$(sed --unbuffered -n 's/.*\(dport:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - locid=$(sed --unbuffered -n 's/.*\(loc_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") - remid=$(sed --unbuffered -n 's/.*\(rem_id:\)\([[:digit:]]*\).*$/\2/p;q' "$evt") + type=$(mptcp_lib_evts_get_info type "$evt" $e_type) + token=$(mptcp_lib_evts_get_info token "$evt" $e_type) + family=$(mptcp_lib_evts_get_info family "$evt" $e_type) + dport=$(mptcp_lib_evts_get_info dport "$evt" $e_type) + locid=$(mptcp_lib_evts_get_info loc_id "$evt" $e_type) + remid=$(mptcp_lib_evts_get_info rem_id "$evt" $e_type) if [ "$family" = "$AF_INET6" ] then - saddr=$(sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr6:\)\([0-9a-f:.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr6 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr6 "$evt" $e_type) else - saddr=$(sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") - daddr=$(sed --unbuffered -n 's/.*\(daddr4:\)\([0-9.]*\).*$/\2/p;q' "$evt") + saddr=$(mptcp_lib_evts_get_info saddr4 "$evt" $e_type) + daddr=$(mptcp_lib_evts_get_info daddr4 "$evt" $e_type) fi check_expected "type" "token" "daddr" "dport" "family" "saddr" "locid" "remid" @@ -627,10 +543,10 @@ test_subflows() "10.0.2.2" "$client4_port" "23" "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid local sport - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -666,9 +582,9 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from server to client machine :>"$server_evts" @@ -705,9 +621,9 @@ test_subflows() "$client_addr_id" "ns1" "ns2" # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$server_evts") + sport=$(mptcp_lib_evts_get_info sport "$server_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from server to client machine :>"$server_evts" @@ -743,9 +659,9 @@ test_subflows() "10.0.2.1" "$app4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -782,9 +698,9 @@ test_subflows() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW6 from client to server machine :>"$client_evts" @@ -819,9 +735,9 @@ test_subflows() "10.0.2.2" "10.0.2.1" "$new4_port" "23" "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -865,9 +781,9 @@ test_subflows_v4_v6_mix() "$server_addr_id" "ns2" "ns1" # Delete the listener from the server ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid - sport=$(sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q' "$client_evts") + sport=$(mptcp_lib_evts_get_info sport "$client_evts" $SUB_ESTABLISHED) # DESTROY_SUBFLOW from client to server machine :>"$client_evts" @@ -896,9 +812,10 @@ test_prio() # Check TX print_test "MP_PRIO TX" - count=$(ip netns exec "$ns2" nstat -as | grep MPTcpExtMPPrioTx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns2" "MPTcpExtMPPrioTx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -906,9 +823,10 @@ test_prio() # Check RX print_test "MP_PRIO RX" - count=$(ip netns exec "$ns1" nstat -as | grep MPTcpExtMPPrioRx | awk '{print $2}') - [ -z "$count" ] && count=0 - if [ $count != 1 ]; then + count=$(mptcp_lib_get_counter "$ns1" "MPTcpExtMPPrioRx") + if [ -z "$count" ]; then + test_skip + elif [ $count != 1 ]; then test_fail "Count != 1: ${count}" else test_pass @@ -917,37 +835,11 @@ test_prio() verify_listener_events() { - local evt=$1 - local e_type=$2 - local e_family=$3 - local e_saddr=$4 - local e_sport=$5 - local type - local family - local saddr - local sport - - if [ $e_type = $LISTENER_CREATED ]; then - print_test "CREATE_LISTENER $e_saddr:$e_sport" - elif [ $e_type = $LISTENER_CLOSED ]; then - print_test "CLOSE_LISTENER $e_saddr:$e_sport" - fi - - type=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(type:\)\([[:digit:]]*\).*$/\2/p;q') - family=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(family:\)\([[:digit:]]*\).*$/\2/p;q') - sport=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(sport:\)\([[:digit:]]*\).*$/\2/p;q') - if [ $family ] && [ $family = $AF_INET6 ]; then - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr6:\)\([0-9a-f:.]*\).*$/\2/p;q') + if mptcp_lib_verify_listener_events "${@}"; then + test_pass else - saddr=$(grep "type:$e_type," $evt | - sed --unbuffered -n 's/.*\(saddr4:\)\([0-9.]*\).*$/\2/p;q') + test_fail fi - - check_expected "type" "family" "saddr" "sport" } test_listener() @@ -969,6 +861,7 @@ test_listener() local listener_pid=$! sleep 0.5 + print_test "CREATE_LISTENER 10.0.2.2:$client4_port" verify_listener_events $client_evts $LISTENER_CREATED $AF_INET 10.0.2.2 $client4_port # ADD_ADDR from client to server machine reusing the subflow port @@ -982,9 +875,10 @@ test_listener() sleep 0.5 # Delete the listener from the client ns, if one was created - kill_wait $listener_pid + mptcp_lib_kill_wait $listener_pid sleep 0.5 + print_test "CLOSE_LISTENER 10.0.2.2:$client4_port" verify_listener_events $client_evts $LISTENER_CLOSED $AF_INET 10.0.2.2 $client4_port } diff --git a/tools/testing/selftests/net/msg_zerocopy.sh b/tools/testing/selftests/net/msg_zerocopy.sh index 825ffec85cea..89c22f5320e0 100755 --- a/tools/testing/selftests/net/msg_zerocopy.sh +++ b/tools/testing/selftests/net/msg_zerocopy.sh @@ -70,23 +70,22 @@ case "${TXMODE}" in esac # Start of state changes: install cleanup handler -save_sysctl_mem="$(sysctl -n ${path_sysctl_mem})" cleanup() { ip netns del "${NS2}" ip netns del "${NS1}" - sysctl -w -q "${path_sysctl_mem}=${save_sysctl_mem}" } trap cleanup EXIT -# Configure system settings -sysctl -w -q "${path_sysctl_mem}=1000000" - # Create virtual ethernet pair between network namespaces ip netns add "${NS1}" ip netns add "${NS2}" +# Configure system settings +ip netns exec "${NS1}" sysctl -w -q "${path_sysctl_mem}=1000000" +ip netns exec "${NS2}" sysctl -w -q "${path_sysctl_mem}=1000000" + ip link add "${DEV}" mtu "${DEV_MTU}" netns "${NS1}" type veth \ peer name "${DEV}" mtu "${DEV_MTU}" netns "${NS2}" diff --git a/tools/testing/selftests/net/nat6to4.c b/tools/testing/selftests/net/nat6to4.bpf.c index ac54c36b25fc..ac54c36b25fc 100644 --- a/tools/testing/selftests/net/nat6to4.c +++ b/tools/testing/selftests/net/nat6to4.bpf.c diff --git a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh index 86e621b7b9c7..5db69dad0cfc 100755 --- a/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh +++ b/tools/testing/selftests/net/ndisc_unsolicited_na_test.sh @@ -10,16 +10,12 @@ # 0 1 0 Don't update NC # 0 1 1 Add a STALE NC entry +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 PAUSE_ON_FAIL=no PAUSE=no -HOST_NS="ns-host" -ROUTER_NS="ns-router" - HOST_INTF="veth-host" ROUTER_INTF="veth-router" @@ -29,11 +25,6 @@ SUBNET_WIDTH=64 ROUTER_ADDR_WITH_MASK="${ROUTER_ADDR}/${SUBNET_WIDTH}" HOST_ADDR_WITH_MASK="${HOST_ADDR}/${SUBNET_WIDTH}" -IP_HOST="ip -6 -netns ${HOST_NS}" -IP_HOST_EXEC="ip netns exec ${HOST_NS}" -IP_ROUTER="ip -6 -netns ${ROUTER_NS}" -IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" - tcpdump_stdout= tcpdump_stderr= @@ -76,8 +67,12 @@ setup() # Setup two namespaces and a veth tunnel across them. # On end of the tunnel is a router and the other end is a host. - ip netns add ${HOST_NS} - ip netns add ${ROUTER_NS} + setup_ns HOST_NS ROUTER_NS + IP_HOST="ip -6 -netns ${HOST_NS}" + IP_HOST_EXEC="ip netns exec ${HOST_NS}" + IP_ROUTER="ip -6 -netns ${ROUTER_NS}" + IP_ROUTER_EXEC="ip netns exec ${ROUTER_NS}" + ${IP_ROUTER} link add ${ROUTER_INTF} type veth \ peer name ${HOST_INTF} netns ${HOST_NS} diff --git a/tools/testing/selftests/net/net_helper.sh b/tools/testing/selftests/net/net_helper.sh new file mode 100644 index 000000000000..6596fe03c77f --- /dev/null +++ b/tools/testing/selftests/net/net_helper.sh @@ -0,0 +1,25 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Helper functions + +wait_local_port_listen() +{ + local listener_ns="${1}" + local port="${2}" + local protocol="${3}" + local pattern + local i + + pattern=":$(printf "%04X" "${port}") " + + # for tcp protocol additionally check the socket state + [ ${protocol} = "tcp" ] && pattern="${pattern}0A" + for i in $(seq 10); do + if ip netns exec "${listener_ns}" awk '{print $2" "$4}' \ + /proc/net/"${protocol}"* | grep -q "${pattern}"; then + break + fi + sleep 0.1 + done +} diff --git a/tools/testing/selftests/net/netfilter/.gitignore b/tools/testing/selftests/net/netfilter/.gitignore new file mode 100644 index 000000000000..0a64d6d0e29a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/.gitignore @@ -0,0 +1,6 @@ +# SPDX-License-Identifier: GPL-2.0-only +audit_logread +connect_close +conntrack_dump_flush +sctp_collision +nf_queue diff --git a/tools/testing/selftests/net/netfilter/Makefile b/tools/testing/selftests/net/netfilter/Makefile new file mode 100644 index 000000000000..47945b2b3f92 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/Makefile @@ -0,0 +1,52 @@ +# SPDX-License-Identifier: GPL-2.0 + +top_srcdir = ../../../../.. + +HOSTPKG_CONFIG := pkg-config +MNL_CFLAGS := $(shell $(HOSTPKG_CONFIG) --cflags libmnl 2>/dev/null) +MNL_LDLIBS := $(shell $(HOSTPKG_CONFIG) --libs libmnl 2>/dev/null || echo -lmnl) + +TEST_PROGS := br_netfilter.sh bridge_brouter.sh +TEST_PROGS += conntrack_icmp_related.sh +TEST_PROGS += conntrack_ipip_mtu.sh +TEST_PROGS += conntrack_tcp_unreplied.sh +TEST_PROGS += conntrack_sctp_collision.sh +TEST_PROGS += conntrack_vrf.sh +TEST_PROGS += ipvs.sh +TEST_PROGS += nf_conntrack_packetdrill.sh +TEST_PROGS += nf_nat_edemux.sh +TEST_PROGS += nft_audit.sh +TEST_PROGS += nft_concat_range.sh +TEST_PROGS += nft_conntrack_helper.sh +TEST_PROGS += nft_fib.sh +TEST_PROGS += nft_flowtable.sh +TEST_PROGS += nft_meta.sh +TEST_PROGS += nft_nat.sh +TEST_PROGS += nft_nat_zones.sh +TEST_PROGS += nft_queue.sh +TEST_PROGS += nft_synproxy.sh +TEST_PROGS += nft_zones_many.sh +TEST_PROGS += rpath.sh +TEST_PROGS += xt_string.sh + +TEST_PROGS_EXTENDED = nft_concat_range_perf.sh + +TEST_GEN_PROGS = conntrack_dump_flush + +TEST_GEN_FILES = audit_logread +TEST_GEN_FILES += connect_close nf_queue +TEST_GEN_FILES += sctp_collision + +include ../../lib.mk + +$(OUTPUT)/nf_queue: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/nf_queue: LDLIBS += $(MNL_LDLIBS) + +$(OUTPUT)/conntrack_dump_flush: CFLAGS += $(MNL_CFLAGS) +$(OUTPUT)/conntrack_dump_flush: LDLIBS += $(MNL_LDLIBS) + +TEST_FILES := lib.sh +TEST_FILES += packetdrill + +TEST_INCLUDES := \ + ../lib.sh diff --git a/tools/testing/selftests/net/netfilter/audit_logread.c b/tools/testing/selftests/net/netfilter/audit_logread.c new file mode 100644 index 000000000000..a0a880fc2d9d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/audit_logread.c @@ -0,0 +1,165 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE +#include <errno.h> +#include <fcntl.h> +#include <poll.h> +#include <signal.h> +#include <stdint.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/socket.h> +#include <unistd.h> +#include <linux/audit.h> +#include <linux/netlink.h> + +static int fd; + +#define MAX_AUDIT_MESSAGE_LENGTH 8970 +struct audit_message { + struct nlmsghdr nlh; + union { + struct audit_status s; + char data[MAX_AUDIT_MESSAGE_LENGTH]; + } u; +}; + +int audit_recv(int fd, struct audit_message *rep) +{ + struct sockaddr_nl addr; + socklen_t addrlen = sizeof(addr); + int ret; + + do { + ret = recvfrom(fd, rep, sizeof(*rep), 0, + (struct sockaddr *)&addr, &addrlen); + } while (ret < 0 && errno == EINTR); + + if (ret < 0 || + addrlen != sizeof(addr) || + addr.nl_pid != 0 || + rep->nlh.nlmsg_type == NLMSG_ERROR) /* short-cut for now */ + return -1; + + return ret; +} + +int audit_send(int fd, uint16_t type, uint32_t key, uint32_t val) +{ + static int seq = 0; + struct audit_message msg = { + .nlh = { + .nlmsg_len = NLMSG_SPACE(sizeof(msg.u.s)), + .nlmsg_type = type, + .nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK, + .nlmsg_seq = ++seq, + }, + .u.s = { + .mask = key, + .enabled = key == AUDIT_STATUS_ENABLED ? val : 0, + .pid = key == AUDIT_STATUS_PID ? val : 0, + } + }; + struct sockaddr_nl addr = { + .nl_family = AF_NETLINK, + }; + int ret; + + do { + ret = sendto(fd, &msg, msg.nlh.nlmsg_len, 0, + (struct sockaddr *)&addr, sizeof(addr)); + } while (ret < 0 && errno == EINTR); + + if (ret != (int)msg.nlh.nlmsg_len) + return -1; + return 0; +} + +int audit_set(int fd, uint32_t key, uint32_t val) +{ + struct audit_message rep = { 0 }; + int ret; + + ret = audit_send(fd, AUDIT_SET, key, val); + if (ret) + return ret; + + ret = audit_recv(fd, &rep); + if (ret < 0) + return ret; + return 0; +} + +int readlog(int fd) +{ + struct audit_message rep = { 0 }; + int ret = audit_recv(fd, &rep); + const char *sep = ""; + char *k, *v; + + if (ret < 0) + return ret; + + if (rep.nlh.nlmsg_type != AUDIT_NETFILTER_CFG) + return 0; + + /* skip the initial "audit(...): " part */ + strtok(rep.u.data, " "); + + while ((k = strtok(NULL, "="))) { + v = strtok(NULL, " "); + + /* these vary and/or are uninteresting, ignore */ + if (!strcmp(k, "pid") || + !strcmp(k, "comm") || + !strcmp(k, "subj")) + continue; + + /* strip the varying sequence number */ + if (!strcmp(k, "table")) + *strchrnul(v, ':') = '\0'; + + printf("%s%s=%s", sep, k, v); + sep = " "; + } + if (*sep) { + printf("\n"); + fflush(stdout); + } + return 0; +} + +void cleanup(int sig) +{ + audit_set(fd, AUDIT_STATUS_ENABLED, 0); + close(fd); + if (sig) + exit(0); +} + +int main(int argc, char **argv) +{ + struct sigaction act = { + .sa_handler = cleanup, + }; + + fd = socket(PF_NETLINK, SOCK_RAW, NETLINK_AUDIT); + if (fd < 0) { + perror("Can't open netlink socket"); + return -1; + } + + if (sigaction(SIGTERM, &act, NULL) < 0 || + sigaction(SIGINT, &act, NULL) < 0) { + perror("Can't set signal handler"); + close(fd); + return -1; + } + + audit_set(fd, AUDIT_STATUS_ENABLED, 1); + audit_set(fd, AUDIT_STATUS_PID, getpid()); + + while (1) + readlog(fd); +} diff --git a/tools/testing/selftests/net/netfilter/br_netfilter.sh b/tools/testing/selftests/net/netfilter/br_netfilter.sh new file mode 100755 index 000000000000..c28379a965d8 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/br_netfilter.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test for legacy br_netfilter module combined with connection tracking, +# a combination that doesn't really work. +# Multicast/broadcast packets race for hash table insertion. + +# eth0 br0 eth0 +# setup is: ns1 <->,ns0 <-> ns3 +# ns2 <-' `'-> ns4 + +source lib.sh + +checktool "nft --version" "run test without nft tool" + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns ns0 ns1 ns2 ns3 ns4 + +ret=0 + +do_ping() +{ + fromns="$1" + dstip="$2" + + if ! ip netns exec "$fromns" ping -c 1 -q "$dstip" > /dev/null; then + echo "ERROR: ping from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + fi +} + +bcast_ping() +{ + fromns="$1" + dstip="$2" + + local packets=500 + + [ "$KSFT_MACHINE_SLOW" = yes ] && packets=100 + + for i in $(seq 1 $packets); do + if ! ip netns exec "$fromns" ping -q -f -b -c 1 -q "$dstip" > /dev/null 2>&1; then + echo "ERROR: ping -b from $fromns to $dstip" + ip netns exec "$ns0" nft list ruleset + ret=1 + break + fi + done +} + +ip netns exec "$ns0" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q net.ipv4.conf.default.rp_filter=0 + +if ! ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi + +ip link add veth2 netns "$ns0" type veth peer name eth0 netns "$ns2" +ip link add veth3 netns "$ns0" type veth peer name eth0 netns "$ns3" +ip link add veth4 netns "$ns0" type veth peer name eth0 netns "$ns4" + +for i in $(seq 1 4); do + ip -net "$ns0" link set "veth$i" up +done + +if ! ip -net "$ns0" link add br0 type bridge stp_state 0 forward_delay 0 nf_call_iptables 1 nf_call_ip6tables 1 nf_call_arptables 1; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +# make veth0,1,2 part of bridge. +for i in $(seq 1 3); do + ip -net "$ns0" link set "veth$i" master br0 +done + +# add a macvlan on top of the bridge. +MACVLAN_ADDR=ba:f3:13:37:42:23 +ip -net "$ns0" link add link br0 name macvlan0 type macvlan mode private +ip -net "$ns0" link set macvlan0 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan0 up +ip -net "$ns0" addr add 10.23.0.1/24 dev macvlan0 + +# add a macvlan on top of veth4. +MACVLAN_ADDR=ba:f3:13:37:42:24 +ip -net "$ns0" link add link veth4 name macvlan4 type macvlan mode passthru +ip -net "$ns0" link set macvlan4 address ${MACVLAN_ADDR} +ip -net "$ns0" link set macvlan4 up + +# make the macvlan part of the bridge. +# veth4 is not a bridge port, only the macvlan on top of it. +ip -net "$ns0" link set macvlan4 master br0 + +ip -net "$ns0" link set br0 up +ip -net "$ns0" addr add 10.0.0.1/24 dev br0 + +modprobe -q br_netfilter +if ! ip netns exec "$ns0" sysctl -q net.bridge.bridge-nf-call-iptables=1; then + echo "SKIP: bridge netfilter not available" + ret=$ksft_skip +fi + +# for testing, so namespaces will reply to ping -b probes. +ip netns exec "$ns0" sysctl -q net.ipv4.icmp_echo_ignore_broadcasts=0 + +# enable conntrack in ns0 and drop broadcast packets in forward to +# avoid them from getting confirmed in the postrouting hook before +# the cloned skb is passed up the stack. +ip netns exec "$ns0" nft -f - <<EOF +table ip filter { + chain input { + type filter hook input priority 1; policy accept + iifname br0 counter + ct state new accept + } +} + +table bridge filter { + chain forward { + type filter hook forward priority 0; policy accept + meta pkttype broadcast ip protocol icmp counter drop + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: could not add nftables ruleset" + exit $ksft_skip +fi + +# place 1, 2 & 3 in same subnet, connected via ns0:br0. +# ns4 is placed in same subnet as well, but its not +# part of the bridge: the corresponding veth4 is not +# part of the bridge, only its macvlan interface. +for i in $(seq 1 4); do + eval ip -net \$ns"$i" link set eth0 up +done +for i in $(seq 1 2); do + eval ip -net \$ns"$i" addr add "10.0.0.1$i/24" dev eth0 +done + +ip -net "$ns3" addr add 10.23.0.13/24 dev eth0 +ip -net "$ns4" addr add 10.23.0.14/24 dev eth0 + +# test basic connectivity +do_ping "$ns1" 10.0.0.12 +do_ping "$ns3" 10.23.0.1 +do_ping "$ns4" 10.23.0.1 + +bcast_ping "$ns1" 10.0.0.255 + +# This should deliver broadcast to macvlan0, which is on top of ns0:br0. +bcast_ping "$ns3" 10.23.0.255 + +# same, this time via veth4:macvlan4. +bcast_ping "$ns4" 10.23.0.255 + +read t < /proc/sys/kernel/tainted +if [ "$t" -eq 0 ];then + echo PASS: kernel not tainted +else + echo ERROR: kernel is tainted + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/bridge_brouter.sh b/tools/testing/selftests/net/netfilter/bridge_brouter.sh new file mode 100755 index 000000000000..2549b6590693 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/bridge_brouter.sh @@ -0,0 +1,122 @@ +#!/bin/bash +# +# This test is for bridge 'brouting', i.e. make some packets being routed +# rather than getting bridged even though they arrive on interface that is +# part of a bridge. + +# eth0 br0 eth0 +# setup is: ns1 <-> nsbr <-> ns2 + +source lib.sh + +if ! ebtables -V > /dev/null 2>&1;then + echo "SKIP: Could not run test without ebtables" + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns nsbr ns1 ns2 + +ip netns exec "$nsbr" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$nsbr" sysctl -q net.ipv4.conf.all.rp_filter=0 +if ! ip link add veth0 netns "$nsbr" type veth peer name eth0 netns "$ns1"; then + echo "SKIP: Can't create veth device" + exit $ksft_skip +fi +ip link add veth1 netns "$nsbr" type veth peer name eth0 netns "$ns2" + +if ! ip -net "$nsbr" link add br0 type bridge; then + echo "SKIP: Can't create bridge br0" + exit $ksft_skip +fi + +ip -net "$nsbr" link set veth0 up +ip -net "$nsbr" link set veth1 up + +ip -net "$nsbr" link set veth0 master br0 +ip -net "$nsbr" link set veth1 master br0 +ip -net "$nsbr" link set br0 up +ip -net "$nsbr" addr add 10.0.0.1/24 dev br0 + +# place both in same subnet, ${ns1} and ${ns2} connected via ${nsbr}:br0 +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up +ip -net "$ns1" addr add 10.0.0.11/24 dev eth0 +ip -net "$ns2" addr add 10.0.0.12/24 dev eth0 + +test_ebtables_broute() +{ + # redirect is needed so the dstmac is rewritten to the bridge itself, + # ip stack won't process OTHERHOST (foreign unicast mac) packets. + if ! ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP; then + echo "SKIP: Could not add ebtables broute redirect rule" + return $ksft_skip + fi + + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=0 + + # ping net${ns1}, expected to not work (ip forwarding is off) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed" 1>&2 + return 1 + fi + + # enable forwarding on both interfaces. + # neither needs an ip address, but at least the bridge needs + # an ip address in same network segment as ${ns1} and ${ns2} (${nsbr} + # needs to be able to determine route for to-be-forwarded packet). + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth0.forwarding=1 + ip netns exec "$nsbr" sysctl -q net.ipv4.conf.veth1.forwarding=1 + + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule" + ip netns exec "$nsbr" ebtables -t broute -F + + # ping net${ns1}, expected to work (frames are bridged) + if ! ip netns exec "$ns1" ping -q -c 1 10.0.0.12 > /dev/null; then + echo "ERROR: ping did not work, but it should (bridged)" 1>&2 + return 1 + fi + + ip netns exec "$nsbr" ebtables -t filter -A FORWARD -p ipv4 --ip-protocol icmp -j DROP + + # ping net${ns1}, expected to not work (DROP in bridge forward) + if ip netns exec "$ns1" ping -q -c 1 10.0.0.12 -W 0.5 > /dev/null 2>&1; then + echo "ERROR: ping works, should have failed (icmp forward drop)" 1>&2 + return 1 + fi + + # re-activate brouter + ip netns exec "$nsbr" ebtables -t broute -A BROUTING -p ipv4 --ip-protocol icmp -j redirect --redirect-target=DROP + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.0.11 > /dev/null; then + echo "ERROR: ping did not work, but it should (broute+forward 2)" 1>&2 + return 1 + fi + + echo "PASS: ${ns1}/${ns2} connectivity with active broute rule and bridge forward drop" + return 0 +} + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.0.12 > /dev/null; then + echo "ERROR: Could not reach ${ns2} from ${ns1}" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.0.11 > /dev/null; then + echo "ERROR: Could not reach ${ns1} from ${ns2}" 1>&2 + exit 1 +fi + +test_ebtables_broute +exit $? diff --git a/tools/testing/selftests/net/netfilter/config b/tools/testing/selftests/net/netfilter/config new file mode 100644 index 000000000000..63ef80ef47a4 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/config @@ -0,0 +1,89 @@ +CONFIG_AUDIT=y +CONFIG_BPF_SYSCALL=y +CONFIG_BRIDGE=m +CONFIG_BRIDGE_EBT_BROUTE=m +CONFIG_BRIDGE_EBT_IP=m +CONFIG_BRIDGE_EBT_REDIRECT=m +CONFIG_BRIDGE_EBT_T_FILTER=m +CONFIG_BRIDGE_NETFILTER=m +CONFIG_BRIDGE_NF_EBTABLES=m +CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_INET_ESP=m +CONFIG_IP_NF_MATCH_RPFILTER=m +CONFIG_IP6_NF_MATCH_RPFILTER=m +CONFIG_IP_NF_IPTABLES=m +CONFIG_IP6_NF_IPTABLES=m +CONFIG_IP_NF_FILTER=m +CONFIG_IP6_NF_FILTER=m +CONFIG_IP_NF_RAW=m +CONFIG_IP6_NF_RAW=m +CONFIG_IP_SCTP=m +CONFIG_IP_VS=m +CONFIG_IP_VS_PROTO_TCP=y +CONFIG_IP_VS_RR=m +CONFIG_IPV6=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_MACVLAN=m +CONFIG_NAMESPACES=y +CONFIG_NET_CLS_U32=m +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_NS=y +CONFIG_NET_SCH_NETEM=m +CONFIG_NET_SCH_HTB=m +CONFIG_NET_IPIP=m +CONFIG_NET_VRF=y +CONFIG_NETFILTER=y +CONFIG_NETFILTER_ADVANCED=y +CONFIG_NETFILTER_NETLINK=m +CONFIG_NETFILTER_NETLINK_QUEUE=m +CONFIG_NETFILTER_SYNPROXY=m +CONFIG_NETFILTER_XTABLES=m +CONFIG_NETFILTER_XT_NAT=m +CONFIG_NETFILTER_XT_MATCH_CONNTRACK=m +CONFIG_NETFILTER_XT_MATCH_STATE=m +CONFIG_NETFILTER_XT_MATCH_STRING=m +CONFIG_NETFILTER_XT_TARGET_REDIRECT=m +CONFIG_NF_CONNTRACK=m +CONFIG_NF_CONNTRACK_EVENTS=y +CONFIG_NF_CONNTRACK_FTP=m +CONFIG_NF_CONNTRACK_MARK=y +CONFIG_NF_CONNTRACK_ZONES=y +CONFIG_NF_CT_NETLINK=m +CONFIG_NF_CT_PROTO_SCTP=y +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_LOG_IPV4=m +CONFIG_NF_LOG_IPV6=m +CONFIG_NF_NAT=m +CONFIG_NF_NAT_REDIRECT=y +CONFIG_NF_NAT_MASQUERADE=y +CONFIG_NF_TABLES=m +CONFIG_NF_TABLES_BRIDGE=m +CONFIG_NF_TABLES_INET=y +CONFIG_NF_TABLES_IPV4=y +CONFIG_NF_TABLES_IPV6=y +CONFIG_NF_TABLES_NETDEV=y +CONFIG_NF_FLOW_TABLE_INET=m +CONFIG_NFT_BRIDGE_META=m +CONFIG_NFT_COMPAT=m +CONFIG_NFT_CT=m +CONFIG_NFT_FIB=m +CONFIG_NFT_FIB_INET=m +CONFIG_NFT_FIB_IPV4=m +CONFIG_NFT_FIB_IPV6=m +CONFIG_NFT_FLOW_OFFLOAD=m +CONFIG_NFT_LIMIT=m +CONFIG_NFT_LOG=m +CONFIG_NFT_MASQ=m +CONFIG_NFT_NAT=m +CONFIG_NFT_NUMGEN=m +CONFIG_NFT_QUEUE=m +CONFIG_NFT_QUOTA=m +CONFIG_NFT_REDIR=m +CONFIG_NFT_SYNPROXY=m +CONFIG_VETH=m +CONFIG_VLAN_8021Q=m +CONFIG_XFRM_USER=m +CONFIG_XFRM_STATISTICS=y +CONFIG_NET_PKTGEN=m +CONFIG_TUN=m diff --git a/tools/testing/selftests/net/netfilter/connect_close.c b/tools/testing/selftests/net/netfilter/connect_close.c new file mode 100644 index 000000000000..1c3b0add54c4 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/connect_close.c @@ -0,0 +1,136 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <fcntl.h> +#include <string.h> +#include <unistd.h> +#include <signal.h> + +#include <arpa/inet.h> +#include <sys/socket.h> + +#define PORT 12345 +#define RUNTIME 10 + +static struct { + unsigned int timeout; + unsigned int port; +} opts = { + .timeout = RUNTIME, + .port = PORT, +}; + +static void handler(int sig) +{ + _exit(sig == SIGALRM ? 0 : 1); +} + +static void set_timeout(void) +{ + struct sigaction action = { + .sa_handler = handler, + }; + + sigaction(SIGALRM, &action, NULL); + + alarm(opts.timeout); +} + +static void do_connect(const struct sockaddr_in *dst) +{ + int s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s >= 0) + fcntl(s, F_SETFL, O_NONBLOCK); + + connect(s, (struct sockaddr *)dst, sizeof(*dst)); + close(s); +} + +static void do_accept(const struct sockaddr_in *src) +{ + int c, one = 1, s = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + + if (s < 0) + return; + + setsockopt(s, SOL_SOCKET, SO_REUSEADDR, &one, sizeof(one)); + setsockopt(s, SOL_SOCKET, SO_REUSEPORT, &one, sizeof(one)); + + bind(s, (struct sockaddr *)src, sizeof(*src)); + + listen(s, 16); + + c = accept(s, NULL, NULL); + if (c >= 0) + close(c); + + close(s); +} + +static int accept_loop(void) +{ + struct sockaddr_in src = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &src.sin_addr); + + set_timeout(); + + for (;;) + do_accept(&src); + + return 1; +} + +static int connect_loop(void) +{ + struct sockaddr_in dst = { + .sin_family = AF_INET, + .sin_port = htons(opts.port), + }; + + inet_pton(AF_INET, "127.0.0.1", &dst.sin_addr); + + set_timeout(); + + for (;;) + do_connect(&dst); + + return 1; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "t:p:")) != -1) { + switch (c) { + case 't': + opts.timeout = atoi(optarg); + break; + case 'p': + opts.port = atoi(optarg); + break; + } + } +} + +int main(int argc, char *argv[]) +{ + pid_t p; + + parse_opts(argc, argv); + + p = fork(); + if (p < 0) + return 111; + + if (p > 0) + return accept_loop(); + + return connect_loop(); +} diff --git a/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c new file mode 100644 index 000000000000..bd9317bf5ada --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_dump_flush.c @@ -0,0 +1,469 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define _GNU_SOURCE + +#include <time.h> +#include <libmnl/libmnl.h> +#include <netinet/ip.h> + +#include <linux/netlink.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_conntrack.h> +#include <linux/netfilter/nf_conntrack_tcp.h> +#include "../../kselftest_harness.h" + +#define TEST_ZONE_ID 123 +#define NF_CT_DEFAULT_ZONE_ID 0 + +static int reply_counter; + +static int build_cta_tuple_v4(struct nlmsghdr *nlh, int type, + uint32_t src_ip, uint32_t dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put_u32(nlh, CTA_IP_V4_SRC, src_ip); + mnl_attr_put_u32(nlh, CTA_IP_V4_DST, dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_tuple_v6(struct nlmsghdr *nlh, int type, + struct in6_addr src_ip, struct in6_addr dst_ip, + uint16_t src_port, uint16_t dst_port) +{ + struct nlattr *nest, *nest_ip, *nest_proto; + + nest = mnl_attr_nest_start(nlh, type); + if (!nest) + return -1; + + nest_ip = mnl_attr_nest_start(nlh, CTA_TUPLE_IP); + if (!nest_ip) + return -1; + mnl_attr_put(nlh, CTA_IP_V6_SRC, sizeof(struct in6_addr), &src_ip); + mnl_attr_put(nlh, CTA_IP_V6_DST, sizeof(struct in6_addr), &dst_ip); + mnl_attr_nest_end(nlh, nest_ip); + + nest_proto = mnl_attr_nest_start(nlh, CTA_TUPLE_PROTO); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTO_NUM, 6); + mnl_attr_put_u16(nlh, CTA_PROTO_SRC_PORT, htons(src_port)); + mnl_attr_put_u16(nlh, CTA_PROTO_DST_PORT, htons(dst_port)); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int build_cta_proto(struct nlmsghdr *nlh) +{ + struct nlattr *nest, *nest_proto; + + nest = mnl_attr_nest_start(nlh, CTA_PROTOINFO); + if (!nest) + return -1; + + nest_proto = mnl_attr_nest_start(nlh, CTA_PROTOINFO_TCP); + if (!nest_proto) + return -1; + mnl_attr_put_u8(nlh, CTA_PROTOINFO_TCP_STATE, TCP_CONNTRACK_ESTABLISHED); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_ORIGINAL, 0x0a0a); + mnl_attr_put_u16(nlh, CTA_PROTOINFO_TCP_FLAGS_REPLY, 0x0a0a); + mnl_attr_nest_end(nlh, nest_proto); + + mnl_attr_nest_end(nlh, nest); +} + +static int conntrack_data_insert(struct mnl_socket *sock, struct nlmsghdr *nlh, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *rplnlh; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + ret = build_cta_proto(nlh); + if (ret < 0) { + perror("build_cta_proto"); + return -1; + } + mnl_attr_put_u32(nlh, CTA_TIMEOUT, htonl(20000)); + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + if (mnl_socket_sendto(sock, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + return -1; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + if (errno == EEXIST) { + /* The entries are probably still there from a previous + * run. So we are good + */ + return 0; + } + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +static int conntrack_data_generate_v4(struct mnl_socket *sock, uint32_t src_ip, + uint32_t dst_ip, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + ret = build_cta_tuple_v4(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, 443, 12345); + if (ret < 0) { + perror("build_cta_tuple_v4"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int conntrack_data_generate_v6(struct mnl_socket *sock, + struct in6_addr src_ip, + struct in6_addr dst_ip, + uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh; + struct nfgenmsg *nfh; + int ret; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_NEW; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE | + NLM_F_ACK | NLM_F_EXCL; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_INET6; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_ORIG, src_ip, dst_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + ret = build_cta_tuple_v6(nlh, CTA_TUPLE_REPLY, dst_ip, src_ip, + 12345, 443); + if (ret < 0) { + perror("build_cta_tuple_v6"); + return ret; + } + return conntrack_data_insert(sock, nlh, zone); +} + +static int count_entries(const struct nlmsghdr *nlh, void *data) +{ + reply_counter++; +} + +static int conntracK_count_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_GET; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_DUMP; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + reply_counter = 0; + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + while (ret > 0) { + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, + count_entries, NULL); + if (ret <= MNL_CB_STOP) + break; + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + } + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + return reply_counter; +} + +static int conntrack_flush_zone(struct mnl_socket *sock, uint16_t zone) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + struct nlmsghdr *nlh, *rplnlh; + struct nfgenmsg *nfh; + struct nlattr *nest; + unsigned int portid; + int err, ret; + + portid = mnl_socket_get_portid(sock); + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_CTNETLINK << 8) | IPCTNL_MSG_CT_DELETE; + nlh->nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + nlh->nlmsg_seq = time(NULL); + + nfh = mnl_nlmsg_put_extra_header(nlh, sizeof(struct nfgenmsg)); + nfh->nfgen_family = AF_UNSPEC; + nfh->version = NFNETLINK_V0; + nfh->res_id = 0; + + mnl_attr_put_u16(nlh, CTA_ZONE, htons(zone)); + + ret = mnl_socket_sendto(sock, nlh, nlh->nlmsg_len); + if (ret < 0) { + perror("mnl_socket_sendto"); + return ret; + } + + ret = mnl_socket_recvfrom(sock, buf, MNL_SOCKET_BUFFER_SIZE); + if (ret < 0) { + perror("mnl_socket_recvfrom"); + return ret; + } + + ret = mnl_cb_run(buf, ret, nlh->nlmsg_seq, portid, NULL, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + return ret; + } + + return 0; +} + +FIXTURE(conntrack_dump_flush) +{ + struct mnl_socket *sock; +}; + +FIXTURE_SETUP(conntrack_dump_flush) +{ + struct in6_addr src, dst; + int ret; + + self->sock = mnl_socket_open(NETLINK_NETFILTER); + if (!self->sock) { + perror("mnl_socket_open"); + SKIP(return, "cannot open netlink_netfilter socket"); + } + + ret = mnl_socket_bind(self->sock, 0, MNL_SOCKET_AUTOPID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + if (ret < 0 && errno == EPERM) + SKIP(return, "Needs to be run as root"); + else if (ret < 0 && errno == EOPNOTSUPP) + SKIP(return, "Kernel does not seem to support conntrack zones"); + + ret = conntrack_data_generate_v4(self->sock, 0xf0f0f0f0, 0xf1f1f1f1, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf2f2f2f2, 0xf3f3f3f3, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf4f4f4f4, 0xf5f5f5f5, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + ret = conntrack_data_generate_v4(self->sock, 0xf6f6f6f6, 0xf7f7f7f7, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x01000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x02000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x03000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x04000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 0); + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x05000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x06000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 0); + + src = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x07000000 + } + }}; + dst = (struct in6_addr) {{ + .__u6_addr32 = { + 0xb80d0120, + 0x00000000, + 0x00000000, + 0x08000000 + } + }}; + ret = conntrack_data_generate_v6(self->sock, src, dst, + NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_GE(ret, 2); + if (ret > 2) + SKIP(return, "kernel does not support filtering by zone"); +} + +FIXTURE_TEARDOWN(conntrack_dump_flush) +{ +} + +TEST_F(conntrack_dump_flush, test_dump_by_zone) +{ + int ret; + + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 2); +} + +TEST_F(conntrack_dump_flush, test_flush_by_zone_default) +{ + int ret; + + ret = conntrack_flush_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 1); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, TEST_ZONE_ID + 2); + EXPECT_EQ(ret, 2); + ret = conntracK_count_zone(self->sock, NF_CT_DEFAULT_ZONE_ID); + EXPECT_EQ(ret, 0); +} + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh new file mode 100755 index 000000000000..c63d840ead61 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_icmp_related.sh @@ -0,0 +1,278 @@ +#!/bin/bash +# +# check that ICMP df-needed/pkttoobig icmp are set are set as related +# state +# +# Setup is: +# +# nsclient1 -> nsrouter1 -> nsrouter2 -> nsclient2 +# MTU 1500, except for nsrouter2 <-> nsclient2 link (1280). +# ping nsclient2 from nsclient1, checking that conntrack did set RELATED +# 'fragmentation needed' icmp packet. +# +# In addition, nsrouter1 will perform IP masquerading, i.e. also +# check the icmp errors are propagated to the correct host as per +# nat of "established" icmp-echo "connection". + +source lib.sh + +if ! nft --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +cleanup() { + cleanup_all_ns +} + +trap cleanup EXIT + +setup_ns nsclient1 nsclient2 nsrouter1 nsrouter2 + +ret=0 + +add_addr() +{ + ns=$1 + dev=$2 + i=$3 + + ip -net "$ns" link set "$dev" up + ip -net "$ns" addr add "192.168.$i.2/24" dev "$dev" + ip -net "$ns" addr add "dead:$i::2/64" dev "$dev" nodad +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + if ! ip netns exec "$ns" nft list counter inet filter "$name" | grep -q "$expect"; then + echo "ERROR: counter $name in $ns has unexpected value (expected $expect)" 1>&2 + ip netns exec "$ns" nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +check_unknown() +{ + expect="packets 0 bytes 0" + for n in ${nsclient1} ${nsclient2} ${nsrouter1} ${nsrouter2}; do + if ! check_counter "$n" "unknown" "$expect"; then + return 1 + fi + done + + return 0 +} + +DEV=veth0 +ip link add "$DEV" netns "$nsclient1" type veth peer name eth1 netns "$nsrouter1" +ip link add "$DEV" netns "$nsclient2" type veth peer name eth1 netns "$nsrouter2" +ip link add "$DEV" netns "$nsrouter1" type veth peer name eth2 netns "$nsrouter2" + +add_addr "$nsclient1" $DEV 1 +add_addr "$nsclient2" $DEV 2 + +ip -net "$nsrouter1" link set eth1 up +ip -net "$nsrouter1" link set $DEV up + +ip -net "$nsrouter2" link set eth1 mtu 1280 up +ip -net "$nsrouter2" link set eth2 up + +ip -net "$nsclient1" route add default via 192.168.1.1 +ip -net "$nsclient1" -6 route add default via dead:1::1 + +ip -net "$nsclient2" route add default via 192.168.2.1 +ip -net "$nsclient2" route add default via dead:2::1 +ip -net "$nsclient2" link set veth0 mtu 1280 + +ip -net "$nsrouter1" addr add 192.168.1.1/24 dev eth1 +ip -net "$nsrouter1" addr add 192.168.3.1/24 dev veth0 +ip -net "$nsrouter1" addr add dead:1::1/64 dev eth1 nodad +ip -net "$nsrouter1" addr add dead:3::1/64 dev veth0 nodad +ip -net "$nsrouter1" route add default via 192.168.3.10 +ip -net "$nsrouter1" -6 route add default via dead:3::10 + +ip -net "$nsrouter2" addr add 192.168.2.1/24 dev eth1 +ip -net "$nsrouter2" addr add 192.168.3.10/24 dev eth2 +ip -net "$nsrouter2" addr add dead:2::1/64 dev eth1 nodad +ip -net "$nsrouter2" addr add dead:3::10/64 dev eth2 nodad +ip -net "$nsrouter2" route add default via 192.168.3.1 +ip -net "$nsrouter2" route add default via dead:3::1 + +for i in 4 6; do + ip netns exec "$nsrouter1" sysctl -q net.ipv$i.conf.all.forwarding=1 + ip netns exec "$nsrouter2" sysctl -q net.ipv$i.conf.all.forwarding=1 +done + +for netns in "$nsrouter1" "$nsrouter2"; do +ip netns exec "$netns" nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + chain forward { + type filter hook forward priority 0; policy accept; + meta l4proto icmpv6 icmpv6 type "packet-too-big" ct state "related" counter name "related" accept + meta l4proto icmp icmp type "destination-unreachable" ct state "related" counter name "related" accept + meta l4proto { icmp, icmpv6 } ct state new,established accept + counter name "unknown" drop + } +} +EOF +done + +ip netns exec "$nsclient1" nft -f - <<EOF +table inet filter { + counter unknown { } + counter related { } + counter redir4 { } + counter redir6 { } + chain input { + type filter hook input priority 0; policy accept; + + icmp type "redirect" ct state "related" counter name "redir4" accept + icmpv6 type "nd-redirect" ct state "related" counter name "redir6" accept + + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + meta l4proto { icmp, icmpv6 } ct state "related" counter name "related" accept + + counter name "unknown" drop + } +} +EOF + +ip netns exec "$nsclient2" nft -f - <<EOF +table inet filter { + counter unknown { } + counter new { } + counter established { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" accept + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" accept + counter name "unknown" drop + } + chain output { + type filter hook output priority 0; policy accept; + meta l4proto { icmp, icmpv6 } ct state established,untracked accept + + meta l4proto { icmp, icmpv6 } ct state "new" counter name "new" + meta l4proto { icmp, icmpv6 } ct state "established" counter name "established" + counter name "unknown" drop + } +} +EOF + +# make sure NAT core rewrites adress of icmp error if nat is used according to +# conntrack nat information (icmp error will be directed at nsrouter1 address, +# but it needs to be routed to nsclient1 address). +ip netns exec "$nsrouter1" nft -f - <<EOF +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip protocol icmp oifname "veth0" counter masquerade + } +} +table ip6 nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + ip6 nexthdr icmpv6 oifname "veth0" counter masquerade + } +} +EOF + +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q -M "do" 192.168.2.2 >/dev/null; then + echo "ERROR: netns ip routing/connectivity broken" 1>&2 + exit 1 +fi +if ! ip netns exec "$nsclient1" ping -c 1 -s 1000 -q dead:2::2 >/dev/null; then + echo "ERROR: netns ipv6 routing/connectivity broken" 1>&2 + exit 1 +fi + +if ! check_unknown; then + ret=1 +fi + +expect="packets 0 bytes 0" +for netns in "$nsrouter1" "$nsrouter2" "$nsclient1";do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +expect="packets 2 bytes 2076" +if ! check_counter "$nsclient2" "new" "$expect"; then + ret=1 +fi + +if ip netns exec "$nsclient1" ping -W 0.5 -q -c 1 -s 1300 -M "do" 192.168.2.2 > /dev/null; then + echo "ERROR: ping should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +# nsrouter2 should have generated the icmp error, so +# related counter should be 0 (its in forward). +expect="packets 0 bytes 0" +if ! check_counter "$nsrouter2" "related" "$expect"; then + ret=1 +fi + +# but nsrouter1 should have seen it, same for nsclient1. +expect="packets 1 bytes 576" +for netns in ${nsrouter1} ${nsclient1};do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +if ip netns exec "${nsclient1}" ping6 -W 0.5 -c 1 -s 1300 dead:2::2 > /dev/null; then + echo "ERROR: ping6 should have failed with PMTU too big error" 1>&2 + ret=1 +fi + +expect="packets 2 bytes 1856" +for netns in "${nsrouter1}" "${nsclient1}";do + if ! check_counter "$netns" "related" "$expect"; then + ret=1 + fi +done + +if [ $ret -eq 0 ];then + echo "PASS: icmp mtu error had RELATED state" +else + echo "ERROR: icmp error RELATED state test has failed" +fi + +# add 'bad' route, expect icmp REDIRECT to be generated +ip netns exec "${nsclient1}" ip route add 192.168.1.42 via 192.168.1.1 +ip netns exec "${nsclient1}" ip route add dead:1::42 via dead:1::1 + +ip netns exec "$nsclient1" ping -W 1 -q -i 0.5 -c 2 192.168.1.42 > /dev/null + +expect="packets 1 bytes 112" +if ! check_counter "$nsclient1" "redir4" "$expect"; then + ret=1 +fi + +ip netns exec "$nsclient1" ping -W 1 -c 1 dead:1::42 > /dev/null +expect="packets 1 bytes 192" +if ! check_counter "$nsclient1" "redir6" "$expect"; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: icmp redirects had RELATED state" +else + echo "ERROR: icmp redirect RELATED state test has failed" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh new file mode 100755 index 000000000000..9832a5d0198a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_ipip_mtu.sh @@ -0,0 +1,191 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +# Conntrack needs to reassemble fragments in order to have complete +# packets for rule matching. Reassembly can lead to packet loss. + +# Consider the following setup: +# +--------+ +---------+ +--------+ +# |Router A|-------|Wanrouter|-------|Router B| +# | |.IPIP..| |..IPIP.| | +# +--------+ +---------+ +--------+ +# / mtu 1400 \ +# / \ +#+--------+ +--------+ +#|Client A| |Client B| +#| | | | +#+--------+ +--------+ + +# Router A and Router B use IPIP tunnel interfaces to tunnel traffic +# between Client A and Client B over WAN. Wanrouter has MTU 1400 set +# on its interfaces. + +rx=$(mktemp) + +checktool "iptables --version" "run test without iptables" +checktool "socat -h" "run test without socat" + +setup_ns r_a r_b r_w c_a c_b + +cleanup() { + cleanup_all_ns + rm -f "$rx" +} + +trap cleanup EXIT + +listener_ready() +{ + ns="$1" + port="$2" + ss -N "$ns" -lnu -o "sport = :$port" | grep -q "$port" +} + +test_path() { + msg="$1" + + ip netns exec "$c_b" socat -t 3 - udp4-listen:5000,reuseaddr > "$rx" < /dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$c_b" 5000 + + for i in 1 2 3; do + head -c1400 /dev/zero | tr "\000" "a" | \ + ip netns exec "$c_a" socat -t 1 -u STDIN UDP:192.168.20.2:5000 + done + + wait + + bytes=$(wc -c < "$rx") + + if [ "$bytes" -eq 1400 ];then + echo "OK: PMTU $msg connection tracking" + else + echo "FAIL: PMTU $msg connection tracking: got $bytes, expected 1400" + exit 1 + fi +} + +# Detailed setup for Router A +# --------------------------- +# Interfaces: +# eth0: 10.2.2.1/24 +# eth1: 192.168.10.1/24 +# ipip0: No IP address, local 10.2.2.1 remote 10.4.4.1 +# Routes: +# 192.168.20.0/24 dev ipip0 (192.168.20.0/24 is subnet of Client B) +# 10.4.4.1 via 10.2.2.254 (Router B via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns "$r_a" type veth peer name veth0 netns "$r_w" +ip link add veth1 netns "$r_a" type veth peer name veth0 netns "$c_a" + +l_addr="10.2.2.1" +r_addr="10.4.4.1" +ip netns exec "$r_a" ip link add ipip0 type ipip local "$l_addr" remote "$r_addr" mode ipip || exit $ksft_skip + +for dev in lo veth0 veth1 ipip0; do + ip -net "$r_a" link set "$dev" up +done + +ip -net "$r_a" addr add 10.2.2.1/24 dev veth0 +ip -net "$r_a" addr add 192.168.10.1/24 dev veth1 + +ip -net "$r_a" route add 192.168.20.0/24 dev ipip0 +ip -net "$r_a" route add 10.4.4.0/24 via 10.2.2.254 + +ip netns exec "$r_a" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Detailed setup for Router B +# --------------------------- +# Interfaces: +# eth0: 10.4.4.1/24 +# eth1: 192.168.20.1/24 +# ipip0: No IP address, local 10.4.4.1 remote 10.2.2.1 +# Routes: +# 192.168.10.0/24 dev ipip0 (192.168.10.0/24 is subnet of Client A) +# 10.2.2.1 via 10.4.4.254 (Router A via Wanrouter) +# No iptables rules at all. + +ip link add veth0 netns "$r_b" type veth peer name veth1 netns "$r_w" +ip link add veth1 netns "$r_b" type veth peer name veth0 netns "$c_b" + +l_addr="10.4.4.1" +r_addr="10.2.2.1" + +ip netns exec "$r_b" ip link add ipip0 type ipip local "${l_addr}" remote "${r_addr}" mode ipip || exit $ksft_skip + +for dev in veth0 veth1 ipip0; do + ip -net "$r_b" link set $dev up +done + +ip -net "$r_b" addr add 10.4.4.1/24 dev veth0 +ip -net "$r_b" addr add 192.168.20.1/24 dev veth1 + +ip -net "$r_b" route add 192.168.10.0/24 dev ipip0 +ip -net "$r_b" route add 10.2.2.0/24 via 10.4.4.254 +ip netns exec "$r_b" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Client A +ip -net "$c_a" addr add 192.168.10.2/24 dev veth0 +ip -net "$c_a" link set dev veth0 up +ip -net "$c_a" route add default via 192.168.10.1 + +# Client A +ip -net "$c_b" addr add 192.168.20.2/24 dev veth0 +ip -net "$c_b" link set dev veth0 up +ip -net "$c_b" route add default via 192.168.20.1 + +# Wan +ip -net "$r_w" addr add 10.2.2.254/24 dev veth0 +ip -net "$r_w" addr add 10.4.4.254/24 dev veth1 + +ip -net "$r_w" link set dev veth0 up mtu 1400 +ip -net "$r_w" link set dev veth1 up mtu 1400 + +ip -net "$r_a" link set dev veth0 mtu 1400 +ip -net "$r_b" link set dev veth0 mtu 1400 + +ip netns exec "$r_w" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null + +# Path MTU discovery +# ------------------ +# Running tracepath from Client A to Client B shows PMTU discovery is working +# as expected: +# +# clienta:~# tracepath 192.168.20.2 +# 1?: [LOCALHOST] pmtu 1500 +# 1: 192.168.10.1 0.867ms +# 1: 192.168.10.1 0.302ms +# 2: 192.168.10.1 0.312ms pmtu 1480 +# 2: no reply +# 3: 192.168.10.1 0.510ms pmtu 1380 +# 3: 192.168.20.2 2.320ms reached +# Resume: pmtu 1380 hops 3 back 3 + +# ip netns exec ${c_a} traceroute --mtu 192.168.20.2 + +# Router A has learned PMTU (1400) to Router B from Wanrouter. +# Client A has learned PMTU (1400 - IPIP overhead = 1380) to Client B +# from Router A. + +#Send large UDP packet +#--------------------- +#Now we send a 1400 bytes UDP packet from Client A to Client B: + +# clienta:~# head -c1400 /dev/zero | tr "\000" "a" | socat -u STDIN UDP:192.168.20.2:5000 +test_path "without" + +# The IPv4 stack on Client A already knows the PMTU to Client B, so the +# UDP packet is sent as two fragments (1380 + 20). Router A forwards the +# fragments between eth1 and ipip0. The fragments fit into the tunnel and +# reach their destination. + +#When sending the large UDP packet again, Router A now reassembles the +#fragments before routing the packet over ipip0. The resulting IPIP +#packet is too big (1400) for the tunnel PMTU (1380) to Router B, it is +#dropped on Router A before sending. + +ip netns exec "$r_a" iptables -A FORWARD -m conntrack --ctstate NEW +test_path "with" diff --git a/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh new file mode 100755 index 000000000000..d860f7d9744b --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_sctp_collision.sh @@ -0,0 +1,87 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Testing For SCTP COLLISION SCENARIO as Below: +# +# 14:35:47.655279 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT] [init tag: 2017837359] +# 14:35:48.353250 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT] [init tag: 1187206187] +# 14:35:48.353275 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [INIT ACK] [init tag: 2017837359] +# 14:35:48.353283 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [COOKIE ECHO] +# 14:35:48.353977 IP CLIENT_IP.PORT > SERVER_IP.PORT: sctp (1) [COOKIE ACK] +# 14:35:48.855335 IP SERVER_IP.PORT > CLIENT_IP.PORT: sctp (1) [INIT ACK] [init tag: 164579970] +# +# TOPO: SERVER_NS (link0)<--->(link1) ROUTER_NS (link2)<--->(link3) CLIENT_NS + +source lib.sh + +CLIENT_IP="198.51.200.1" +CLIENT_PORT=1234 + +SERVER_IP="198.51.100.1" +SERVER_PORT=1234 + +CLIENT_GW="198.51.200.2" +SERVER_GW="198.51.100.2" + +# setup the topo +setup() { + setup_ns CLIENT_NS SERVER_NS ROUTER_NS + ip -n "$SERVER_NS" link add link0 type veth peer name link1 netns "$ROUTER_NS" + ip -n "$CLIENT_NS" link add link3 type veth peer name link2 netns "$ROUTER_NS" + + ip -n "$SERVER_NS" link set link0 up + ip -n "$SERVER_NS" addr add $SERVER_IP/24 dev link0 + ip -n "$SERVER_NS" route add $CLIENT_IP dev link0 via $SERVER_GW + + ip -n "$ROUTER_NS" link set link1 up + ip -n "$ROUTER_NS" link set link2 up + ip -n "$ROUTER_NS" addr add $SERVER_GW/24 dev link1 + ip -n "$ROUTER_NS" addr add $CLIENT_GW/24 dev link2 + ip net exec "$ROUTER_NS" sysctl -wq net.ipv4.ip_forward=1 + + ip -n "$CLIENT_NS" link set link3 up + ip -n "$CLIENT_NS" addr add $CLIENT_IP/24 dev link3 + ip -n "$CLIENT_NS" route add $SERVER_IP dev link3 via $CLIENT_GW + + # simulate the delay on OVS upcall by setting up a delay for INIT_ACK with + # tc on $SERVER_NS side + tc -n "$SERVER_NS" qdisc add dev link0 root handle 1: htb r2q 64 + tc -n "$SERVER_NS" class add dev link0 parent 1: classid 1:1 htb rate 100mbit + tc -n "$SERVER_NS" filter add dev link0 parent 1: protocol ip u32 match ip protocol 132 \ + 0xff match u8 2 0xff at 32 flowid 1:1 + if ! tc -n "$SERVER_NS" qdisc add dev link0 parent 1:1 handle 10: netem delay 1200ms; then + echo "SKIP: Cannot add netem qdisc" + exit $ksft_skip + fi + + # simulate the ctstate check on OVS nf_conntrack + ip net exec "$ROUTER_NS" iptables -A FORWARD -m state --state INVALID,UNTRACKED -j DROP + ip net exec "$ROUTER_NS" iptables -A INPUT -p sctp -j DROP + + # use a smaller number for assoc's max_retrans to reproduce the issue + modprobe -q sctp + ip net exec "$CLIENT_NS" sysctl -wq net.sctp.association_max_retrans=3 +} + +cleanup() { + ip net exec "$CLIENT_NS" pkill sctp_collision >/dev/null 2>&1 + ip net exec "$SERVER_NS" pkill sctp_collision >/dev/null 2>&1 + cleanup_all_ns +} + +do_test() { + ip net exec "$SERVER_NS" ./sctp_collision server \ + $SERVER_IP $SERVER_PORT $CLIENT_IP $CLIENT_PORT & + ip net exec "$CLIENT_NS" ./sctp_collision client \ + $CLIENT_IP $CLIENT_PORT $SERVER_IP $SERVER_PORT +} + +# NOTE: one way to work around the issue is set a smaller hb_interval +# ip net exec $CLIENT_NS sysctl -wq net.sctp.hb_interval=3500 + +# run the test case +trap cleanup EXIT +setup && \ +echo "Test for SCTP Collision in nf_conntrack:" && \ +do_test && echo "PASS!" +exit $? diff --git a/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh new file mode 100755 index 000000000000..121ea93c0178 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_tcp_unreplied.sh @@ -0,0 +1,164 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that UNREPLIED tcp conntrack will eventually timeout. +# + +source lib.sh + +if ! nft --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +if ! conntrack --version > /dev/null 2>&1;then + echo "SKIP: Could not run test without conntrack tool" + exit $ksft_skip +fi + +ret=0 + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +ipv4() { + echo -n 192.168."$1".2 +} + +check_counter() +{ + ns=$1 + name=$2 + expect=$3 + local lret=0 + + if ! ip netns exec "$ns2" nft list counter inet filter "$name" | grep -q "$expect"; then + echo "ERROR: counter $name in $ns2 has unexpected value (expected $expect)" 1>&2 + ip netns exec "$ns2" nft list counter inet filter "$name" 1>&2 + lret=1 + fi + + return $lret +} + +trap cleanup EXIT + +# Create test namespaces +setup_ns ns1 ns2 + +# Connect the namespace to the host using a veth pair +ip -net "$ns1" link add name veth1 type veth peer name veth2 +ip -net "$ns1" link set netns "$ns2" dev veth2 + +ip -net "$ns1" link set up dev lo +ip -net "$ns2" link set up dev lo +ip -net "$ns1" link set up dev veth1 +ip -net "$ns2" link set up dev veth2 + +ip -net "$ns2" addr add 10.11.11.2/24 dev veth2 +ip -net "$ns2" route add default via 10.11.11.1 + +ip netns exec "$ns2" sysctl -q net.ipv4.conf.veth2.forwarding=1 + +# add a rule inside NS so we enable conntrack +ip netns exec "$ns1" nft -f - <<EOF +table inet filter { + chain input { + type filter hook input priority 0; policy accept; + ct state established accept + } +} +EOF + +ip -net "$ns1" addr add 10.11.11.1/24 dev veth1 +ip -net "$ns1" route add 10.99.99.99 via 10.11.11.2 + +# Check connectivity works +ip netns exec "$ns1" ping -q -c 2 10.11.11.2 >/dev/null || exit 1 + +ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT & + +ip netns exec "$ns2" nft -f - <<EOF +table inet filter { + counter connreq { } + counter redir { } + chain input { + type filter hook input priority 0; policy accept; + ct state new tcp flags syn ip daddr 10.99.99.99 tcp dport 80 counter name "connreq" accept + ct state new ct status dnat tcp dport 8080 counter name "redir" accept + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nft rules" + exit 1 +fi + +ip netns exec "$ns2" sysctl -q net.netfilter.nf_conntrack_tcp_timeout_syn_sent=10 + +echo "INFO: connect $ns1 -> $ns2 to the virtual ip" +ip netns exec "$ns1" bash -c 'for i in $(seq 1 $BUSYWAIT_TIMEOUT) ; do + socat -u STDIN TCP:10.99.99.99:80 < /dev/null + sleep 0.1 + done' & + +wait_for_attempt() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --dport 80 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} + +# wait for conntrack to pick the new connection request up before loading +# the nat redirect rule. +if ! busywait "$BUSYWAIT_TIMEOUT" wait_for_attempt; then + echo "ERROR: $ns2 did not pick up tcp connection from peer" + exit 1 +fi + +ip netns exec "$ns2" nft -f - <<EOF +table inet nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + ip daddr 10.99.99.99 tcp dport 80 redirect to :8080 + } +} +EOF +if [ $? -ne 0 ]; then + echo "ERROR: Could not load nat redirect" + exit 1 +fi + +wait_for_redirect() +{ + count=$(ip netns exec "$ns2" conntrack -L -p tcp --reply-port-src 8080 2>/dev/null | wc -l) + if [ "$count" -gt 0 ]; then + return 0 + fi + + return 1 +} +echo "INFO: NAT redirect added in ns $ns2, waiting for $BUSYWAIT_TIMEOUT ms for nat to take effect" + +busywait "$BUSYWAIT_TIMEOUT" wait_for_redirect +ret=$? + +expect="packets 1 bytes 60" +if ! check_counter "$ns2" "redir" "$expect"; then + ret=1 +fi + +if [ $ret -eq 0 ];then + echo "PASS: redirection counter has expected values" +else + echo "ERROR: no tcp connection was redirected" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/conntrack_vrf.sh b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh new file mode 100755 index 000000000000..073e8e62d350 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/conntrack_vrf.sh @@ -0,0 +1,220 @@ +#!/bin/bash + +# This script demonstrates interaction of conntrack and vrf. +# The vrf driver calls the netfilter hooks again, with oif/iif +# pointing at the VRF device. +# +# For ingress, this means first iteration has iifname of lower/real +# device. In this script, thats veth0. +# Second iteration is iifname set to vrf device, tvrf in this script. +# +# For egress, this is reversed: first iteration has the vrf device, +# second iteration is done with the lower/real/veth0 device. +# +# test_ct_zone_in demonstrates unexpected change of nftables +# behavior # caused by commit 09e856d54bda5f28 "vrf: Reset skb conntrack +# connection on VRF rcv" +# +# It was possible to assign conntrack zone to a packet (or mark it for +# `notracking`) in the prerouting chain before conntrack, based on real iif. +# +# After the change, the zone assignment is lost and the zone is assigned based +# on the VRF master interface (in case such a rule exists). +# assignment is lost. Instead, assignment based on the `iif` matching +# Thus it is impossible to distinguish packets based on the original +# interface. +# +# test_masquerade_vrf and test_masquerade_veth0 demonstrate the problem +# that was supposed to be fixed by the commit mentioned above to make sure +# that any fix to test case 1 won't break masquerade again. + +source lib.sh + +IP0=172.30.30.1 +IP1=172.30.30.2 +PFXL=30 +ret=0 + +cleanup() +{ + ip netns pids $ns0 | xargs kill 2>/dev/null + ip netns pids $ns1 | xargs kill 2>/dev/null + + cleanup_all_ns +} + +checktool "nft --version" "run test without nft" +checktool "conntrack --version" "run test without conntrack" +checktool "socat -h" "run test without socat" + +trap cleanup EXIT + +setup_ns ns0 ns1 + +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns0" sysctl -q -w net.ipv4.conf.all.rp_filter=0 + +if ! ip link add veth0 netns "$ns0" type veth peer name veth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: Could not add veth device" + exit $ksft_skip +fi + +if ! ip -net "$ns0" li add tvrf type vrf table 9876; then + echo "SKIP: Could not add vrf device" + exit $ksft_skip +fi + +ip -net "$ns0" li set veth0 master tvrf +ip -net "$ns0" li set tvrf up +ip -net "$ns0" li set veth0 up +ip -net "$ns1" li set veth0 up + +ip -net "$ns0" addr add $IP0/$PFXL dev veth0 +ip -net "$ns1" addr add $IP1/$PFXL dev veth0 + +listener_ready() +{ + local ns="$1" + + ss -N "$ns" -l -n -t -o "sport = :55555" | grep -q "55555" +} + +ip netns exec "$ns1" socat -u -4 TCP-LISTEN:55555,reuseaddr,fork STDOUT > /dev/null & +busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" + +# test vrf ingress handling. +# The incoming connection should be placed in conntrack zone 1, +# as decided by the first iteration of the ruleset. +test_ct_zone_in() +{ +ip netns exec "$ns0" nft -f - <<EOF +table testct { + chain rawpre { + type filter hook prerouting priority raw; + + iif { veth0, tvrf } counter meta nftrace set 1 + iif veth0 counter ct zone set 1 counter return + iif tvrf counter ct zone set 2 counter return + ip protocol icmp counter + notrack counter + } + + chain rawout { + type filter hook output priority raw; + + oif veth0 counter ct zone set 1 counter return + oif tvrf counter ct zone set 2 counter return + notrack counter + } +} +EOF + ip netns exec "$ns1" ping -W 1 -c 1 -I veth0 "$IP0" > /dev/null + + # should be in zone 1, not zone 2 + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 1 2>/dev/null | wc -l) + if [ "$count" -eq 1 ]; then + echo "PASS: entry found in conntrack zone 1" + else + echo "FAIL: entry not found in conntrack zone 1" + count=$(ip netns exec "$ns0" conntrack -L -s $IP1 -d $IP0 -p icmp --zone 2 2> /dev/null | wc -l) + if [ "$count" -eq 1 ]; then + echo "FAIL: entry found in zone 2 instead" + else + echo "FAIL: entry not in zone 1 or 2, dumping table" + ip netns exec "$ns0" conntrack -L + ip netns exec "$ns0" nft list ruleset + fi + fi +} + +# add masq rule that gets evaluated w. outif set to vrf device. +# This tests the first iteration of the packet through conntrack, +# oifname is the vrf device. +test_masquerade_vrf() +{ + local qdisc=$1 + + if [ "$qdisc" != "default" ]; then + tc -net "$ns0" qdisc add dev tvrf root "$qdisc" + fi + + ip netns exec "$ns0" conntrack -F 2>/dev/null + +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip nat { + chain rawout { + type filter hook output priority raw; + + oif tvrf ct state untracked counter + } + chain postrouting2 { + type filter hook postrouting priority mangle; + + oif tvrf ct state untracked counter + } + chain postrouting { + type nat hook postrouting priority 0; + # NB: masquerade should always be combined with 'oif(name) bla', + # lack of this is intentional here, we want to exercise double-snat. + ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on vrf device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1' && + ip netns exec "$ns0" nft list table ip nat |grep -q 'untracked counter packets [1-9]'; then + echo "PASS: connect with masquerade + sport rewrite on vrf device ($qdisc qdisc)" + else + echo "FAIL: vrf rules have unexpected counter value" + ret=1 + fi + + if [ "$qdisc" != "default" ]; then + tc -net "$ns0" qdisc del dev tvrf root + fi +} + +# add masq rule that gets evaluated w. outif set to veth device. +# This tests the 2nd iteration of the packet through conntrack, +# oifname is the lower device (veth0 in this case). +test_masquerade_veth() +{ + ip netns exec "$ns0" conntrack -F 2>/dev/null +ip netns exec "$ns0" nft -f - <<EOF +flush ruleset +table ip nat { + chain postrouting { + type nat hook postrouting priority 0; + meta oif veth0 ip saddr 172.30.30.0/30 counter masquerade random + } +} +EOF + if ! ip netns exec "$ns0" ip vrf exec tvrf socat -u -4 STDIN TCP:"$IP1":55555 < /dev/null > /dev/null;then + echo "FAIL: connect failure with masquerade + sport rewrite on veth device" + ret=1 + return + fi + + # must also check that nat table was evaluated on second (lower device) iteration. + if ip netns exec "$ns0" nft list table ip nat |grep -q 'counter packets 1'; then + echo "PASS: connect with masquerade + sport rewrite on veth device" + else + echo "FAIL: vrf masq rule has unexpected counter value" + ret=1 + fi +} + +test_ct_zone_in +test_masquerade_vrf "default" +test_masquerade_vrf "pfifo" +test_masquerade_veth + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/ipvs.sh b/tools/testing/selftests/net/netfilter/ipvs.sh new file mode 100755 index 000000000000..4ceee9fb3949 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/ipvs.sh @@ -0,0 +1,211 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# End-to-end ipvs test suite +# Topology: +#--------------------------------------------------------------+ +# | | +# ns0 | ns1 | +# ----------- | ----------- ----------- | +# | veth01 | --------- | veth10 | | veth12 | | +# ----------- peer ----------- ----------- | +# | | | | +# ----------- | | | +# | br0 | |----------------- peer |--------------| +# ----------- | | | +# | | | | +# ---------- peer ---------- ----------- | +# | veth02 | --------- | veth20 | | veth21 | | +# ---------- | ---------- ----------- | +# | ns2 | +# | | +#--------------------------------------------------------------+ +# +# We assume that all network driver are loaded +# + +source lib.sh + +ret=0 +GREEN='\033[0;92m' +RED='\033[0;31m' +NC='\033[0m' # No Color + +readonly port=8080 + +readonly vip_v4=207.175.44.110 +readonly cip_v4=10.0.0.2 +readonly gip_v4=10.0.0.1 +readonly dip_v4=172.16.0.1 +readonly rip_v4=172.16.0.2 +readonly sip_v4=10.0.0.3 + +readonly infile="$(mktemp)" +readonly outfile="$(mktemp)" +readonly datalen=32 + +sysipvsnet="/proc/sys/net/ipv4/vs/" +if [ ! -d $sysipvsnet ]; then + if ! modprobe -q ip_vs; then + echo "skip: could not run test without ipvs module" + exit $ksft_skip + fi +fi + +checktool "ipvsadm -v" "run test without ipvsadm" +checktool "socat -h" "run test without socat" + +setup() { + setup_ns ns0 ns1 ns2 + + ip link add veth01 netns "${ns0}" type veth peer name veth10 netns "${ns1}" + ip link add veth02 netns "${ns0}" type veth peer name veth20 netns "${ns2}" + ip link add veth12 netns "${ns1}" type veth peer name veth21 netns "${ns2}" + + ip netns exec "${ns0}" ip link set veth01 up + ip netns exec "${ns0}" ip link set veth02 up + ip netns exec "${ns0}" ip link add br0 type bridge + ip netns exec "${ns0}" ip link set veth01 master br0 + ip netns exec "${ns0}" ip link set veth02 master br0 + ip netns exec "${ns0}" ip link set br0 up + ip netns exec "${ns0}" ip addr add "${cip_v4}/24" dev br0 + + ip netns exec "${ns1}" ip link set veth10 up + ip netns exec "${ns1}" ip addr add "${gip_v4}/24" dev veth10 + ip netns exec "${ns1}" ip link set veth12 up + ip netns exec "${ns1}" ip addr add "${dip_v4}/24" dev veth12 + + ip netns exec "${ns2}" ip link set veth21 up + ip netns exec "${ns2}" ip addr add "${rip_v4}/24" dev veth21 + ip netns exec "${ns2}" ip link set veth20 up + ip netns exec "${ns2}" ip addr add "${sip_v4}/24" dev veth20 + + sleep 1 + + dd if=/dev/urandom of="${infile}" bs="${datalen}" count=1 status=none +} + +cleanup() { + cleanup_all_ns + + if [ -f "${outfile}" ]; then + rm "${outfile}" + fi + if [ -f "${infile}" ]; then + rm "${infile}" + fi +} + +server_listen() { + ip netns exec "$ns2" socat -u -4 TCP-LISTEN:8080,reuseaddr STDOUT > "${outfile}" & + server_pid=$! + sleep 0.2 +} + +client_connect() { + ip netns exec "${ns0}" timeout 2 socat -u -4 STDIN TCP:"${vip_v4}":"${port}" < "${infile}" +} + +verify_data() { + wait "${server_pid}" + cmp "$infile" "$outfile" 2>/dev/null +} + +test_service() { + server_listen + client_connect + verify_data +} + + +test_dr() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + # avoid incorrect arp response + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + # avoid reverse route lookup + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +test_nat() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=1 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -m -t "${vip_v4}:${port}" -r "${rip_v4}:${port}" + ip netns exec "${ns1}" ip addr add "${vip_v4}/32" dev lo:1 + + ip netns exec "${ns2}" ip link del veth20 + ip netns exec "${ns2}" ip route add default via "${dip_v4}" dev veth21 + + test_service +} + +test_tun() { + ip netns exec "${ns0}" ip route add "${vip_v4}" via "${gip_v4}" dev br0 + + ip netns exec "${ns1}" modprobe -q ipip + ip netns exec "${ns1}" ip link set tunl0 up + ip netns exec "${ns1}" sysctl -qw net.ipv4.ip_forward=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.all.send_redirects=0 + ip netns exec "${ns1}" sysctl -qw net.ipv4.conf.default.send_redirects=0 + ip netns exec "${ns1}" ipvsadm -A -t "${vip_v4}:${port}" -s rr + ip netns exec "${ns1}" ipvsadm -a -i -t "${vip_v4}:${port}" -r ${rip_v4}:${port} + ip netns exec "${ns1}" ip addr add ${vip_v4}/32 dev lo:1 + + ip netns exec "${ns2}" modprobe -q ipip + ip netns exec "${ns2}" ip link set tunl0 up + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_ignore=1 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.arp_announce=2 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.all.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.tunl0.rp_filter=0 + ip netns exec "${ns2}" sysctl -qw net.ipv4.conf.veth21.rp_filter=0 + ip netns exec "${ns2}" ip addr add "${vip_v4}/32" dev lo:1 + + test_service +} + +run_tests() { + local errors= + + echo "Testing DR mode..." + cleanup + setup + test_dr + errors=$(( $errors + $? )) + + echo "Testing NAT mode..." + cleanup + setup + test_nat + errors=$(( $errors + $? )) + + echo "Testing Tunnel mode..." + cleanup + setup + test_tun + errors=$(( $errors + $? )) + + return $errors +} + +trap cleanup EXIT + +run_tests + +if [ $? -ne 0 ]; then + echo -e "$(basename $0): ${RED}FAIL${NC}" + exit 1 +fi +echo -e "$(basename $0): ${GREEN}PASS${NC}" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/lib.sh b/tools/testing/selftests/net/netfilter/lib.sh new file mode 100644 index 000000000000..bedd35298e15 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/lib.sh @@ -0,0 +1,10 @@ +net_netfilter_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") + +source "$net_netfilter_dir/../lib.sh" + +checktool (){ + if ! $1 > /dev/null 2>&1; then + echo "SKIP: Could not $2" + exit $ksft_skip + fi +} diff --git a/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh new file mode 100755 index 000000000000..c6fdd2079f4d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_conntrack_packetdrill.sh @@ -0,0 +1,71 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +checktool "conntrack --version" "run test without conntrack" +checktool "iptables --version" "run test without iptables" +checktool "ip6tables --version" "run test without ip6tables" + +modprobe -q tun +modprobe -q nf_conntrack +# echo 1 > /proc/sys/net/netfilter/nf_log_all_netns + +PDRILL_TIMEOUT=10 + +files=" +conntrack_ack_loss_stall.pkt +conntrack_inexact_rst.pkt +conntrack_syn_challenge_ack.pkt +conntrack_synack_old.pkt +conntrack_synack_reuse.pkt +conntrack_rst_invalid.pkt +" + +if ! packetdrill --dry_run --verbose "packetdrill/conntrack_ack_loss_stall.pkt";then + echo "SKIP: packetdrill not installed" + exit ${ksft_skip} +fi + +ret=0 + +run_packetdrill() +{ + filename="$1" + ipver="$2" + local mtu=1500 + + export NFCT_IP_VERSION="$ipver" + + if [ "$ipver" = "ipv4" ];then + export xtables="iptables" + elif [ "$ipver" = "ipv6" ];then + export xtables="ip6tables" + mtu=1520 + fi + + timeout "$PDRILL_TIMEOUT" unshare -n packetdrill --ip_version="$ipver" --mtu=$mtu \ + --tolerance_usecs=1000000 --non_fatal packet "$filename" +} + +run_one_test_file() +{ + filename="$1" + + for v in ipv4 ipv6;do + printf "%-50s(%s)%-20s" "$filename" "$v" "" + if run_packetdrill packetdrill/"$f" "$v";then + echo OK + else + echo FAIL + ret=1 + fi + done +} + +echo "Replaying packetdrill test cases:" +for f in $files;do + run_one_test_file packetdrill/"$f" +done + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh new file mode 100755 index 000000000000..1014551dd769 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_nat_edemux.sh @@ -0,0 +1,97 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Test NAT source port clash resolution +# + +source lib.sh +ret=0 +socatpid=0 + +cleanup() +{ + [ "$socatpid" -gt 0 ] && kill "$socatpid" + + cleanup_all_ns +} + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "run test without iptables" + +trap cleanup EXIT + +setup_ns ns1 ns2 + +# Connect the namespaces using a veth pair +ip link add name veth2 type veth peer name veth1 +ip link set netns "$ns1" dev veth1 +ip link set netns "$ns2" dev veth2 + +ip netns exec "$ns1" ip link set up dev lo +ip netns exec "$ns1" ip link set up dev veth1 +ip netns exec "$ns1" ip addr add 192.168.1.1/24 dev veth1 + +ip netns exec "$ns2" ip link set up dev lo +ip netns exec "$ns2" ip link set up dev veth2 +ip netns exec "$ns2" ip addr add 192.168.1.2/24 dev veth2 + +# Create a server in one namespace +ip netns exec "$ns1" socat -u TCP-LISTEN:5201,fork OPEN:/dev/null,wronly=1 & +socatpid=$! + +# Restrict source port to just one so we don't have to exhaust +# all others. +ip netns exec "$ns2" sysctl -q net.ipv4.ip_local_port_range="10000 10000" + +# add a virtual IP using DNAT +ip netns exec "$ns2" iptables -t nat -A OUTPUT -d 10.96.0.1/32 -p tcp --dport 443 -j DNAT --to-destination 192.168.1.1:5201 + +# ... and route it to the other namespace +ip netns exec "$ns2" ip route add 10.96.0.1 via 192.168.1.1 + +# add a persistent connection from the other namespace +ip netns exec "$ns2" socat -t 10 - TCP:192.168.1.1:5201 > /dev/null & + +sleep 1 + +# ip daddr:dport will be rewritten to 192.168.1.1 5201 +# NAT must reallocate source port 10000 because +# 192.168.1.2:10000 -> 192.168.1.1:5201 is already in use +echo test | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:10.96.0.1:443,connect-timeout=3 >/dev/null +ret=$? + +# Check socat can connect to 10.96.0.1:443 (aka 192.168.1.1:5201). +if [ $ret -eq 0 ]; then + echo "PASS: socat can connect via NAT'd address" +else + echo "FAIL: socat cannot connect via NAT'd address" +fi + +# check sport clashres. +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5202 -j REDIRECT --to-ports 5201 +ip netns exec "$ns1" iptables -t nat -A PREROUTING -p tcp --dport 5203 -j REDIRECT --to-ports 5201 + +sleep 5 | ip netns exec "$ns2" socat -t 5 -u STDIN TCP:192.168.1.1:5202,connect-timeout=5 >/dev/null & + +# if connect succeeds, client closes instantly due to EOF on stdin. +# if connect hangs, it will time out after 5s. +echo | ip netns exec "$ns2" socat -t 3 -u STDIN TCP:192.168.1.1:5203,connect-timeout=5 >/dev/null & +cpid2=$! + +time_then=$(date +%s) +wait $cpid2 +rv=$? +time_now=$(date +%s) + +# Check how much time has elapsed, expectation is for +# 'cpid2' to connect and then exit (and no connect delay). +delta=$((time_now - time_then)) + +if [ $delta -lt 2 ] && [ $rv -eq 0 ]; then + echo "PASS: could connect to service via redirected ports" +else + echo "FAIL: socat cannot connect to service via redirect ($delta seconds elapsed, returned $rv)" + ret=1 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nf_queue.c b/tools/testing/selftests/net/netfilter/nf_queue.c new file mode 100644 index 000000000000..9e56b9d47037 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nf_queue.c @@ -0,0 +1,395 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <errno.h> +#include <stdbool.h> +#include <stdio.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <string.h> +#include <time.h> +#include <arpa/inet.h> + +#include <libmnl/libmnl.h> +#include <linux/netfilter.h> +#include <linux/netfilter/nfnetlink.h> +#include <linux/netfilter/nfnetlink_queue.h> + +struct options { + bool count_packets; + bool gso_enabled; + int verbose; + unsigned int queue_num; + unsigned int timeout; + uint32_t verdict; + uint32_t delay_ms; +}; + +static unsigned int queue_stats[5]; +static struct options opts; + +static void help(const char *p) +{ + printf("Usage: %s [-c|-v [-vv] ] [-t timeout] [-q queue_num] [-Qdst_queue ] [ -d ms_delay ] [-G]\n", p); +} + +static int parse_attr_cb(const struct nlattr *attr, void *data) +{ + const struct nlattr **tb = data; + int type = mnl_attr_get_type(attr); + + /* skip unsupported attribute in user-space */ + if (mnl_attr_type_valid(attr, NFQA_MAX) < 0) + return MNL_CB_OK; + + switch (type) { + case NFQA_MARK: + case NFQA_IFINDEX_INDEV: + case NFQA_IFINDEX_OUTDEV: + case NFQA_IFINDEX_PHYSINDEV: + case NFQA_IFINDEX_PHYSOUTDEV: + if (mnl_attr_validate(attr, MNL_TYPE_U32) < 0) { + perror("mnl_attr_validate"); + return MNL_CB_ERROR; + } + break; + case NFQA_TIMESTAMP: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_timestamp)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_HWADDR: + if (mnl_attr_validate2(attr, MNL_TYPE_UNSPEC, + sizeof(struct nfqnl_msg_packet_hw)) < 0) { + perror("mnl_attr_validate2"); + return MNL_CB_ERROR; + } + break; + case NFQA_PAYLOAD: + break; + } + tb[type] = attr; + return MNL_CB_OK; +} + +static int queue_cb(const struct nlmsghdr *nlh, void *data) +{ + struct nlattr *tb[NFQA_MAX+1] = { 0 }; + struct nfqnl_msg_packet_hdr *ph = NULL; + uint32_t id = 0; + + (void)data; + + mnl_attr_parse(nlh, sizeof(struct nfgenmsg), parse_attr_cb, tb); + if (tb[NFQA_PACKET_HDR]) { + ph = mnl_attr_get_payload(tb[NFQA_PACKET_HDR]); + id = ntohl(ph->packet_id); + + if (opts.verbose > 0) + printf("packet hook=%u, hwproto 0x%x", + ntohs(ph->hw_protocol), ph->hook); + + if (ph->hook >= 5) { + fprintf(stderr, "Unknown hook %d\n", ph->hook); + return MNL_CB_ERROR; + } + + if (opts.verbose > 0) { + uint32_t skbinfo = 0; + + if (tb[NFQA_SKB_INFO]) + skbinfo = ntohl(mnl_attr_get_u32(tb[NFQA_SKB_INFO])); + if (skbinfo & NFQA_SKB_CSUMNOTREADY) + printf(" csumnotready"); + if (skbinfo & NFQA_SKB_GSO) + printf(" gso"); + if (skbinfo & NFQA_SKB_CSUM_NOTVERIFIED) + printf(" csumnotverified"); + puts(""); + } + + if (opts.count_packets) + queue_stats[ph->hook]++; + } + + return MNL_CB_OK + id; +} + +static struct nlmsghdr * +nfq_build_cfg_request(char *buf, uint8_t command, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_cmd cmd = { + .command = command, + .pf = htons(AF_INET), + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_CMD, sizeof(cmd), &cmd); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_cfg_params(char *buf, uint8_t mode, int range, int queue_num) +{ + struct nlmsghdr *nlh = mnl_nlmsg_put_header(buf); + struct nfqnl_msg_config_params params = { + .copy_range = htonl(range), + .copy_mode = mode, + }; + struct nfgenmsg *nfg; + + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_CONFIG; + nlh->nlmsg_flags = NLM_F_REQUEST; + + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_CFG_PARAMS, sizeof(params), ¶ms); + + return nlh; +} + +static struct nlmsghdr * +nfq_build_verdict(char *buf, int id, int queue_num, uint32_t verd) +{ + struct nfqnl_msg_verdict_hdr vh = { + .verdict = htonl(verd), + .id = htonl(id), + }; + struct nlmsghdr *nlh; + struct nfgenmsg *nfg; + + nlh = mnl_nlmsg_put_header(buf); + nlh->nlmsg_type = (NFNL_SUBSYS_QUEUE << 8) | NFQNL_MSG_VERDICT; + nlh->nlmsg_flags = NLM_F_REQUEST; + nfg = mnl_nlmsg_put_extra_header(nlh, sizeof(*nfg)); + nfg->nfgen_family = AF_UNSPEC; + nfg->version = NFNETLINK_V0; + nfg->res_id = htons(queue_num); + + mnl_attr_put(nlh, NFQA_VERDICT_HDR, sizeof(vh), &vh); + + return nlh; +} + +static void print_stats(void) +{ + unsigned int last, total; + int i; + + total = 0; + last = queue_stats[0]; + + for (i = 0; i < 5; i++) { + printf("hook %d packets %08u\n", i, queue_stats[i]); + last = queue_stats[i]; + total += last; + } + + printf("%u packets total\n", total); +} + +struct mnl_socket *open_queue(void) +{ + char buf[MNL_SOCKET_BUFFER_SIZE]; + unsigned int queue_num; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + struct timeval tv; + uint32_t flags; + + nl = mnl_socket_open(NETLINK_NETFILTER); + if (nl == NULL) { + perror("mnl_socket_open"); + exit(EXIT_FAILURE); + } + + if (mnl_socket_bind(nl, 0, MNL_SOCKET_AUTOPID) < 0) { + perror("mnl_socket_bind"); + exit(EXIT_FAILURE); + } + + queue_num = opts.queue_num; + nlh = nfq_build_cfg_request(buf, NFQNL_CFG_CMD_BIND, queue_num); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + nlh = nfq_build_cfg_params(buf, NFQNL_COPY_PACKET, 0xFFFF, queue_num); + + flags = opts.gso_enabled ? NFQA_CFG_F_GSO : 0; + flags |= NFQA_CFG_F_UID_GID; + mnl_attr_put_u32(nlh, NFQA_CFG_FLAGS, htonl(flags)); + mnl_attr_put_u32(nlh, NFQA_CFG_MASK, htonl(flags)); + + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + + memset(&tv, 0, sizeof(tv)); + tv.tv_sec = opts.timeout; + if (opts.timeout && setsockopt(mnl_socket_get_fd(nl), + SOL_SOCKET, SO_RCVTIMEO, + &tv, sizeof(tv))) { + perror("setsockopt(SO_RCVTIMEO)"); + exit(EXIT_FAILURE); + } + + return nl; +} + +static void sleep_ms(uint32_t delay) +{ + struct timespec ts = { .tv_sec = delay / 1000 }; + + delay %= 1000; + + ts.tv_nsec = delay * 1000llu * 1000llu; + + nanosleep(&ts, NULL); +} + +static int mainloop(void) +{ + unsigned int buflen = 64 * 1024 + MNL_SOCKET_BUFFER_SIZE; + struct mnl_socket *nl; + struct nlmsghdr *nlh; + unsigned int portid; + char *buf; + int ret; + + buf = malloc(buflen); + if (!buf) { + perror("malloc"); + exit(EXIT_FAILURE); + } + + nl = open_queue(); + portid = mnl_socket_get_portid(nl); + + for (;;) { + uint32_t id; + + ret = mnl_socket_recvfrom(nl, buf, buflen); + if (ret == -1) { + if (errno == ENOBUFS || errno == EINTR) + continue; + + if (errno == EAGAIN) { + errno = 0; + ret = 0; + break; + } + + perror("mnl_socket_recvfrom"); + exit(EXIT_FAILURE); + } + + ret = mnl_cb_run(buf, ret, 0, portid, queue_cb, NULL); + if (ret < 0) { + perror("mnl_cb_run"); + exit(EXIT_FAILURE); + } + + id = ret - MNL_CB_OK; + if (opts.delay_ms) + sleep_ms(opts.delay_ms); + + nlh = nfq_build_verdict(buf, id, opts.queue_num, opts.verdict); + if (mnl_socket_sendto(nl, nlh, nlh->nlmsg_len) < 0) { + perror("mnl_socket_sendto"); + exit(EXIT_FAILURE); + } + } + + mnl_socket_close(nl); + + return ret; +} + +static void parse_opts(int argc, char **argv) +{ + int c; + + while ((c = getopt(argc, argv, "chvt:q:Q:d:G")) != -1) { + switch (c) { + case 'c': + opts.count_packets = true; + break; + case 'h': + help(argv[0]); + exit(0); + break; + case 'q': + opts.queue_num = atoi(optarg); + if (opts.queue_num > 0xffff) + opts.queue_num = 0; + break; + case 'Q': + opts.verdict = atoi(optarg); + if (opts.verdict > 0xffff) { + fprintf(stderr, "Expected destination queue number\n"); + exit(1); + } + + opts.verdict <<= 16; + opts.verdict |= NF_QUEUE; + break; + case 'd': + opts.delay_ms = atoi(optarg); + if (opts.delay_ms == 0) { + fprintf(stderr, "Expected nonzero delay (in milliseconds)\n"); + exit(1); + } + break; + case 't': + opts.timeout = atoi(optarg); + break; + case 'G': + opts.gso_enabled = false; + break; + case 'v': + opts.verbose++; + break; + } + } + + if (opts.verdict != NF_ACCEPT && (opts.verdict >> 16 == opts.queue_num)) { + fprintf(stderr, "Cannot use same destination and source queue\n"); + exit(1); + } +} + +int main(int argc, char *argv[]) +{ + int ret; + + opts.verdict = NF_ACCEPT; + opts.gso_enabled = true; + + parse_opts(argc, argv); + + ret = mainloop(); + if (opts.count_packets) + print_stats(); + + return ret; +} diff --git a/tools/testing/selftests/net/netfilter/nft_audit.sh b/tools/testing/selftests/net/netfilter/nft_audit.sh new file mode 100755 index 000000000000..902f8114bc80 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_audit.sh @@ -0,0 +1,268 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# Check that audit logs generated for nft commands are as expected. + +SKIP_RC=4 +RC=0 + +if [ -r /var/run/auditd.pid ];then + read pid < /var/run/auditd.pid + p=$(pgrep ^auditd$) + + if [ "$pid" -eq "$p" ]; then + echo "SKIP: auditd is running" + exit $SKIP_RC + fi +fi + +nft --version >/dev/null 2>&1 || { + echo "SKIP: missing nft tool" + exit $SKIP_RC +} + +# nft must be recent enough to support "reset" keyword. +nft --check -f /dev/stdin >/dev/null 2>&1 <<EOF +add table t +add chain t c +reset rules t c +EOF + +if [ "$?" -ne 0 ];then + echo -n "SKIP: nft reset feature test failed: " + nft --version + exit $SKIP_RC +fi + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } + +# give other scripts a chance to finish - audit_logread sees all activity +sleep 1 + +logfile=$(mktemp) +rulefile=$(mktemp) +echo "logging into $logfile" +./audit_logread >"$logfile" & +logread_pid=$! +trap 'kill $logread_pid; rm -f $logfile $rulefile' EXIT +exec 3<"$logfile" + +do_test() { # (cmd, log) + echo -n "testing for cmd: $1 ... " + cat <&3 >/dev/null + $1 >/dev/null || exit 1 + sleep 0.1 + res=$(diff -a -u <(echo "$2") - <&3) + [ $? -eq 0 ] && { echo "OK"; return; } + echo "FAIL" + grep -v '^\(---\|+++\|@@\)' <<< "$res" + ((RC--)) +} + +nft flush ruleset + +# adding tables, chains and rules + +for table in t1 t2; do + do_test "nft add table $table" \ + "table=$table family=2 entries=1 op=nft_register_table" + + do_test "nft add chain $table c1" \ + "table=$table family=2 entries=1 op=nft_register_chain" + + do_test "nft add chain $table c2; add chain $table c3" \ + "table=$table family=2 entries=2 op=nft_register_chain" + + cmd="add rule $table c1 counter" + + do_test "nft $cmd" \ + "table=$table family=2 entries=1 op=nft_register_rule" + + do_test "nft $cmd; $cmd" \ + "table=$table family=2 entries=2 op=nft_register_rule" + + cmd="" + sep="" + for chain in c2 c3; do + for i in {1..3}; do + cmd+="$sep add rule $table $chain counter" + sep=";" + done + done + do_test "nft $cmd" \ + "table=$table family=2 entries=6 op=nft_register_rule" +done + +for ((i = 0; i < 500; i++)); do + echo "add rule t2 c3 counter accept comment \"rule $i\"" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=500 op=nft_register_rule' + +# adding sets and elements + +settype='type inet_service; counter' +setelem='{ 22, 80, 443 }' +setblock="{ $settype; elements = $setelem; }" +do_test "nft add set t1 s $setblock" \ +"table=t1 family=2 entries=4 op=nft_register_set" + +do_test "nft add set t1 s2 $setblock; add set t1 s3 { $settype; }" \ +"table=t1 family=2 entries=5 op=nft_register_set" + +do_test "nft add element t1 s3 $setelem" \ +"table=t1 family=2 entries=3 op=nft_register_setelem" + +# adding counters + +do_test 'nft add counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add counter t2 c1; add counter t2 c2' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add counter t2 c$i" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# adding/updating quotas + +do_test 'nft add quota t1 q1 { 10 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +do_test 'nft add quota t2 q1 { 10 bytes }; add quota t2 q2 { 10 bytes }' \ +'table=t2 family=2 entries=2 op=nft_register_obj' + +for ((i = 3; i <= 500; i++)); do + echo "add quota t2 q$i { 10 bytes }" +done > "$rulefile" +do_test "nft -f $rulefile" \ +'table=t2 family=2 entries=498 op=nft_register_obj' + +# changing the quota value triggers obj update path +do_test 'nft add quota t1 q1 { 20 bytes }' \ +'table=t1 family=2 entries=1 op=nft_register_obj' + +# resetting rules + +do_test 'nft reset rules t1 c2' \ +'table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules table t1' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule' + +do_test 'nft reset rules t2 c3' \ +'table=t2 family=2 entries=189 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=126 op=nft_reset_rule' + +do_test 'nft reset rules t2' \ +'table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=186 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=129 op=nft_reset_rule' + +do_test 'nft reset rules' \ +'table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t1 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=3 op=nft_reset_rule +table=t2 family=2 entries=180 op=nft_reset_rule +table=t2 family=2 entries=188 op=nft_reset_rule +table=t2 family=2 entries=135 op=nft_reset_rule' + +# resetting sets and elements + +elem=(22 ",80" ",443") +relem="" +for i in {1..3}; do + relem+="${elem[((i - 1))]}" + do_test "nft reset element t1 s { $relem }" \ + "table=t1 family=2 entries=$i op=nft_reset_setelem" +done + +do_test 'nft reset set t1 s' \ +'table=t1 family=2 entries=3 op=nft_reset_setelem' + +# resetting counters + +do_test 'nft reset counter t1 c1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset counters t2' \ +'table=t2 family=2 entries=342 op=nft_reset_obj +table=t2 family=2 entries=158 op=nft_reset_obj' + +do_test 'nft reset counters' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=341 op=nft_reset_obj +table=t2 family=2 entries=159 op=nft_reset_obj' + +# resetting quotas + +do_test 'nft reset quota t1 q1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t1' \ +'table=t1 family=2 entries=1 op=nft_reset_obj' + +do_test 'nft reset quotas t2' \ +'table=t2 family=2 entries=315 op=nft_reset_obj +table=t2 family=2 entries=185 op=nft_reset_obj' + +do_test 'nft reset quotas' \ +'table=t1 family=2 entries=1 op=nft_reset_obj +table=t2 family=2 entries=314 op=nft_reset_obj +table=t2 family=2 entries=186 op=nft_reset_obj' + +# deleting rules + +readarray -t handles < <(nft -a list chain t1 c1 | \ + sed -n 's/.*counter.* handle \(.*\)$/\1/p') + +do_test "nft delete rule t1 c1 handle ${handles[0]}" \ +'table=t1 family=2 entries=1 op=nft_unregister_rule' + +cmd='delete rule t1 c1 handle' +do_test "nft $cmd ${handles[1]}; $cmd ${handles[2]}" \ +'table=t1 family=2 entries=2 op=nft_unregister_rule' + +do_test 'nft flush chain t1 c2' \ +'table=t1 family=2 entries=3 op=nft_unregister_rule' + +do_test 'nft flush table t2' \ +'table=t2 family=2 entries=509 op=nft_unregister_rule' + +# deleting chains + +do_test 'nft delete chain t2 c2' \ +'table=t2 family=2 entries=1 op=nft_unregister_chain' + +# deleting sets and elements + +do_test 'nft delete element t1 s { 22 }' \ +'table=t1 family=2 entries=1 op=nft_unregister_setelem' + +do_test 'nft delete element t1 s { 80, 443 }' \ +'table=t1 family=2 entries=2 op=nft_unregister_setelem' + +do_test 'nft flush set t1 s2' \ +'table=t1 family=2 entries=3 op=nft_unregister_setelem' + +do_test 'nft delete set t1 s2' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +do_test 'nft delete set t1 s3' \ +'table=t1 family=2 entries=1 op=nft_unregister_set' + +exit $RC diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range.sh b/tools/testing/selftests/net/netfilter/nft_concat_range.sh new file mode 100755 index 000000000000..6d66240e149c --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_concat_range.sh @@ -0,0 +1,1622 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# nft_concat_range.sh - Tests for sets with concatenation of ranged fields +# +# Copyright (c) 2019 Red Hat GmbH +# +# Author: Stefano Brivio <sbrivio@redhat.com> +# +# shellcheck disable=SC2154,SC2034,SC2016,SC2030,SC2031,SC2317 +# ^ Configuration and templates sourced with eval, counters reused in subshells + +source lib.sh + +# Available test groups: +# - reported_issues: check for issues that were reported in the past +# - correctness: check that packets match given entries, and only those +# - concurrency: attempt races between insertion, deletion and lookup +# - timeout: check that packets match entries until they expire +# - performance: estimate matching rate, compare with rbtree and hash baselines +TESTS="reported_issues correctness concurrency timeout" +[ -n "$NFT_CONCAT_RANGE_TESTS" ] && TESTS="${NFT_CONCAT_RANGE_TESTS}" + +# Set types, defined by TYPE_ variables below +TYPES="net_port port_net net6_port port_proto net6_port_mac net6_port_mac_proto + net_port_net net_mac mac_net net_mac_icmp net6_mac_icmp + net6_port_net6_port net_port_mac_proto_net" + +# Reported bugs, also described by TYPE_ variables below +BUGS="flush_remove_add reload" + +# List of possible paths to pktgen script from kernel tree for performance tests +PKTGEN_SCRIPT_PATHS=" + ../../../../../samples/pktgen/pktgen_bench_xmit_mode_netif_receive.sh + pktgen/pktgen_bench_xmit_mode_netif_receive.sh" + +# Definition of set types: +# display display text for test report +# type_spec nftables set type specifier +# chain_spec nftables type specifier for rules mapping to set +# dst call sequence of format_*() functions for destination fields +# src call sequence of format_*() functions for source fields +# start initial integer used to generate addresses and ports +# count count of entries to generate and match +# src_delta number summed to destination generator for source fields +# tools list of tools for correctness and timeout tests, any can be used +# proto L4 protocol of test packets +# +# race_repeat race attempts per thread, 0 disables concurrency test for type +# flood_tools list of tools for concurrency tests, any can be used +# flood_proto L4 protocol of test packets for concurrency tests +# flood_spec nftables type specifier for concurrency tests +# +# perf_duration duration of single pktgen injection test +# perf_spec nftables type specifier for performance tests +# perf_dst format_*() functions for destination fields in performance test +# perf_src format_*() functions for source fields in performance test +# perf_entries number of set entries for performance test +# perf_proto L3 protocol of test packets +TYPE_net_port=" +display net,port +type_spec ipv4_addr . inet_service +chain_spec ip daddr . udp dport +dst addr4 port +src +start 1 +count 5 +src_delta 2000 +tools sendip bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec ip daddr . udp dport + +perf_duration 5 +perf_spec ip daddr . udp dport +perf_dst addr4 port +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_port_net=" +display port,net +type_spec inet_service . ipv4_addr +chain_spec udp dport . ip daddr +dst port addr4 +src +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto udp +flood_spec udp dport . ip daddr + +perf_duration 5 +perf_spec udp dport . ip daddr +perf_dst port addr4 +perf_src +perf_entries 100 +perf_proto ipv4 +" + +TYPE_net6_port=" +display net6,port +type_spec ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport +dst addr6 port +src +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . udp dport + +perf_duration 5 +perf_spec ip6 daddr . udp dport +perf_dst addr6 port +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_port_proto=" +display port,proto +type_spec inet_service . inet_proto +chain_spec udp dport . meta l4proto +dst port proto +src +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec udp dport . meta l4proto +perf_dst port proto +perf_src +perf_entries 30000 +perf_proto ipv4 +" + +TYPE_net6_port_mac=" +display net6,port,mac +type_spec ipv6_addr . inet_service . ether_addr +chain_spec ip6 daddr . udp dport . ether saddr +dst addr6 port +src mac +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr +perf_dst addr6 port mac +perf_src +perf_entries 10 +perf_proto ipv6 +" + +TYPE_net6_port_mac_proto=" +display net6,port,mac,proto +type_spec ipv6_addr . inet_service . ether_addr . inet_proto +chain_spec ip6 daddr . udp dport . ether saddr . meta l4proto +dst addr6 port +src mac proto +start 10 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp6 + +race_repeat 0 + +perf_duration 5 +perf_spec ip6 daddr . udp dport . ether daddr . meta l4proto +perf_dst addr6 port mac proto +perf_src +perf_entries 1000 +perf_proto ipv6 +" + +TYPE_net_port_net=" +display net,port,net +type_spec ipv4_addr . inet_service . ipv4_addr +chain_spec ip daddr . udp dport . ip saddr +dst addr4 port +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . udp dport . ip saddr + +perf_duration 0 +" + +TYPE_net6_port_net6_port=" +display net6,port,net6,port +type_spec ipv6_addr . inet_service . ipv6_addr . inet_service +chain_spec ip6 daddr . udp dport . ip6 saddr . udp sport +dst addr6 port +src addr6 port +start 10 +count 5 +src_delta 2000 +tools sendip socat +proto udp6 + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp6 +flood_spec ip6 daddr . tcp dport . ip6 saddr . tcp sport + +perf_duration 0 +" + +TYPE_net_port_mac_proto_net=" +display net,port,mac,proto,net +type_spec ipv4_addr . inet_service . ether_addr . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . ether saddr . meta l4proto . ip saddr +dst addr4 port +src mac proto addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac=" +display net,mac +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 5 +perf_spec ip daddr . ether daddr +perf_dst addr4 mac +perf_src +perf_entries 1000 +perf_proto ipv4 +" + +TYPE_mac_net=" +display mac,net +type_spec ether_addr . ipv4_addr +chain_spec ether saddr . ip saddr +dst +src mac addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_mac_icmp=" +display net,mac - ICMP +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 5 +src_delta 2000 +tools ping +proto icmp + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net6_mac_icmp=" +display net6,mac - ICMPv6 +type_spec ipv6_addr . ether_addr +chain_spec ip6 daddr . ether saddr +dst addr6 +src mac +start 10 +count 50 +src_delta 2000 +tools ping +proto icmp6 + +race_repeat 0 + +perf_duration 0 +" + +TYPE_net_port_proto_net=" +display net,port,proto,net +type_spec ipv4_addr . inet_service . inet_proto . ipv4_addr +chain_spec ip daddr . udp dport . meta l4proto . ip saddr +dst addr4 port proto +src addr4 +start 1 +count 5 +src_delta 2000 +tools sendip socat +proto udp + +race_repeat 3 +flood_tools iperf3 iperf netperf +flood_proto tcp +flood_spec ip daddr . tcp dport . meta l4proto . ip saddr + +perf_duration 0 +" + +# Definition of tests for bugs reported in the past: +# display display text for test report +TYPE_flush_remove_add=" +display Add two elements, flush, re-add +" + +TYPE_reload=" +display net,mac with reload +type_spec ipv4_addr . ether_addr +chain_spec ip daddr . ether saddr +dst addr4 +src mac +start 1 +count 1 +src_delta 2000 +tools sendip socat bash +proto udp + +race_repeat 0 + +perf_duration 0 +" + +# Set template for all tests, types and rules are filled in depending on test +set_template=' +flush ruleset + +table inet filter { + counter test { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval,timeout + } + + chain input { + type filter hook prerouting priority 0; policy accept; + ${chain_spec} @test counter name \"test\" + } +} + +table netdev perf { + counter test { + packets 0 bytes 0 + } + + counter match { + packets 0 bytes 0 + } + + set test { + type ${type_spec} + flags interval + } + + set norange { + type ${type_spec} + } + + set noconcat { + type ${type_spec%% *} + flags interval + } + + chain test { + type filter hook ingress device veth_a priority 0; + } +} +' + +err_buf= +info_buf= + +# Append string to error buffer +err() { + err_buf="${err_buf}${1} +" +} + +# Append string to information buffer +info() { + info_buf="${info_buf}${1} +" +} + +# Flush error buffer to stdout +err_flush() { + printf "%s" "${err_buf}" + err_buf= +} + +# Flush information buffer to stdout +info_flush() { + printf "%s" "${info_buf}" + info_buf= +} + +# Setup veth pair: this namespace receives traffic, B generates it +setup_veth() { + ip netns add B + ip link add veth_a type veth peer name veth_b || return 1 + + ip link set veth_a up + ip link set veth_b netns B + + ip -n B link set veth_b up + + ip addr add dev veth_a 10.0.0.1 + ip route add default dev veth_a + + ip -6 addr add fe80::1/64 dev veth_a nodad + ip -6 addr add 2001:db8::1/64 dev veth_a nodad + ip -6 route add default dev veth_a + + ip -n B route add default dev veth_b + + ip -6 -n B addr add fe80::2/64 dev veth_b nodad + ip -6 -n B addr add 2001:db8::2/64 dev veth_b nodad + ip -6 -n B route add default dev veth_b + + B() { + ip netns exec B "$@" >/dev/null 2>&1 + } +} + +# Fill in set template and initialise set +setup_set() { + eval "echo \"${set_template}\"" | nft -f - +} + +# Check that at least one of the needed tools is available +check_tools() { + [ -z "${tools}" ] && return 0 + + __tools= + for tool in ${tools}; do + __tools="${__tools} ${tool}" + + command -v "${tool}" >/dev/null && return 0 + done + err "need one of:${__tools}, skipping" && return 1 +} + +# Set up function to send ICMP packets +setup_send_icmp() { + send_icmp() { + B ping -c1 -W1 "${dst_addr4}" >/dev/null 2>&1 + } +} + +# Set up function to send ICMPv6 packets +setup_send_icmp6() { + if command -v ping6 >/dev/null; then + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping6 -q -c1 -W1 "${dst_addr6}" + } + else + send_icmp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ping -q -6 -c1 -W1 "${dst_addr6}" + } + fi +} + +# Set up function to send single UDP packets on IPv4 +setup_send_udp() { + if command -v sendip >/dev/null; then + send_udp() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + [ -n "${src_addr4}" ] && src_addr4="-is ${src_addr4}" + + # shellcheck disable=SC2086 # sendip needs split options + B sendip -p ipv4 -p udp ${src_addr4} ${src_port} \ + ${dst_port} "${dst_addr4}" + + src_port= + dst_port= + src_addr4= + } + elif command -v socat -v >/dev/null; then + send_udp() { + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}" dev veth_b + __socatbind=",bind=${src_addr4}" + if [ -n "${src_port}" ];then + __socatbind="${__socatbind}:${src_port}" + fi + fi + + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + [ -z "${dst_port}" ] && dst_port=12345 + + echo "test4" | B socat -t 0.01 STDIN UDP4-DATAGRAM:"$dst_addr4":"$dst_port""${__socatbind}" + + src_addr4= + src_port= + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp() { + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + B ip route add default dev veth_b + fi + + B bash -c "echo > /dev/udp/${dst_addr4}/${dst_port}" + + if [ -n "${src_addr4}" ]; then + B ip addr del "${src_addr4}/16" dev veth_b + fi + src_addr4= + } + else + return 1 + fi +} + +# Set up function to send single UDP packets on IPv6 +setup_send_udp6() { + if command -v sendip >/dev/null; then + send_udp6() { + [ -n "${src_port}" ] && src_port="-us ${src_port}" + [ -n "${dst_port}" ] && dst_port="-ud ${dst_port}" + if [ -n "${src_addr6}" ]; then + src_addr6="-6s ${src_addr6}" + else + src_addr6="-6s 2001:db8::2" + fi + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + B sendip -p ipv6 -p udp ${src_addr6} ${src_port} \ + ${dst_port} "${dst_addr6}" + + src_port= + dst_port= + src_addr6= + } + elif command -v socat -v >/dev/null; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + __socatbind6= + + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + + __socatbind6=",bind=[${src_addr6}]" + + if [ -n "${src_port}" ] ;then + __socatbind6="${__socatbind6}:${src_port}" + fi + fi + + echo "test6" | B socat -t 0.01 STDIN UDP6-DATAGRAM:["$dst_addr6"]:"$dst_port""${__socatbind6}" + } + elif [ -z "$(bash -c 'type -p')" ]; then + send_udp6() { + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + B ip addr add "${src_addr6}" dev veth_b nodad + B bash -c "echo > /dev/udp/${dst_addr6}/${dst_port}" + ip -6 addr del "${dst_addr6}" dev veth_a 2>/dev/null + } + else + return 1 + fi +} + +listener_ready() +{ + port="$1" + ss -lnt -o "sport = :$port" | grep -q "$port" +} + +# Set up function to send TCP traffic on IPv4 +setup_flood_tcp() { + if command -v iperf3 >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b 2>/dev/null + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr4}" ${dst_port} ${src_port} \ + ${src_addr4} -l16 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -DB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr4}" ${dst_port} ${src_addr4} \ + -l20 -t 1000 + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t TCP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send TCP traffic on IPv6 +setup_flood_tcp6() { + if command -v iperf3 >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "${n_port}" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -c "${dst_addr6}" ${dst_port} \ + ${src_port} ${src_addr6} -l16 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + src_addr6="-B ${src_addr6}" + else + src_addr6="-B 2001:db8::2" + fi + if [ -n "${src_port}" ]; then + src_addr6="${src_addr6}:${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -s -VDB "${dst_addr6}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -c "${dst_addr6}" -V ${dst_port} \ + ${src_addr6} -l1 -t 1000 + + src_addr6= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_tcp6() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr6}" ]; then + B ip addr add "${src_addr6}" dev veth_b nodad + else + src_addr6="2001:db8::2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip -6 addr add "${dst_addr6}" dev veth_a nodad \ + 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -6 ${dst_port} -L "${dst_addr6}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B netperf -6 -H "${dst_addr6}" ${dst_port} \ + -L "${src_addr6}" -l 1000 -t TCP_STREAM + + src_addr6= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Set up function to send UDP traffic on IPv4 +setup_flood_udp() { + if command -v iperf3 >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 2>/dev/null + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_port="--cport ${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf3 -s -DB "${dst_addr4}" ${dst_port} + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf3 -u -c "${dst_addr4}" -Z -b 100M -l16 -t1000 \ + ${dst_port} ${src_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v iperf >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + src_addr4="-B ${src_addr4}" + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="-B 10.0.0.2" + fi + if [ -n "${src_port}" ]; then + src_addr4="${src_addr4}:${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + iperf -u -sDB "${dst_addr4}" ${dst_port} >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B iperf -u -c "${dst_addr4}" -b 100M -l1 -t1000 \ + ${dst_port} ${src_addr4} + + src_addr4= + src_port= + dst_port= + } + elif command -v netperf >/dev/null; then + flood_udp() { + local n_port="${dst_port}" + [ -n "${dst_port}" ] && dst_port="-p ${dst_port}" + if [ -n "${src_addr4}" ]; then + B ip addr add "${src_addr4}/16" dev veth_b + else + B ip addr add dev veth_b 10.0.0.2 + src_addr4="10.0.0.2" + fi + if [ -n "${src_port}" ]; then + dst_port="${dst_port},${src_port}" + fi + B ip route add default dev veth_b + ip addr add "${dst_addr4}" dev veth_a 2>/dev/null + + # shellcheck disable=SC2086 # this needs split options + netserver -4 ${dst_port} -L "${dst_addr4}" \ + >/dev/null 2>&1 + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$n_port" + + # shellcheck disable=SC2086 # this needs split options + B netperf -4 -H "${dst_addr4}" ${dst_port} \ + -L "${src_addr4}" -l 1000 -t UDP_STREAM + + src_addr4= + src_port= + dst_port= + } + else + return 1 + fi +} + +# Find pktgen script and set up function to start pktgen injection +setup_perf() { + for pktgen_script_path in ${PKTGEN_SCRIPT_PATHS} __notfound; do + command -v "${pktgen_script_path}" >/dev/null && break + done + [ "${pktgen_script_path}" = "__notfound" ] && return 1 + + perf_ipv4() { + ${pktgen_script_path} -s80 \ + -i veth_a -d "${dst_addr4}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } + perf_ipv6() { + IP6=6 ${pktgen_script_path} -s100 \ + -i veth_a -d "${dst_addr6}" -p "${dst_port}" \ + -m "${dst_mac}" \ + -t $(($(nproc) / 5 + 1)) -b10000 -n0 2>/dev/null & + perf_pid=$! + } +} + +# Clean up before each test +cleanup() { + nft reset counter inet filter test >/dev/null 2>&1 + nft flush ruleset >/dev/null 2>&1 + ip link del dummy0 2>/dev/null + ip route del default 2>/dev/null + ip -6 route del default 2>/dev/null + ip netns pids B 2>/dev/null | xargs kill 2>/dev/null + ip netns del B 2>/dev/null + ip link del veth_a 2>/dev/null + timeout= + killall iperf3 2>/dev/null + killall iperf 2>/dev/null + killall netperf 2>/dev/null + killall netserver 2>/dev/null +} + +cleanup_exit() { + cleanup + rm -f "$tmp" +} + +# Entry point for setup functions +setup() { + if [ "$(id -u)" -ne 0 ]; then + echo " need to run as root" + exit ${ksft_skip} + fi + + cleanup + check_tools || return 1 + for arg do + if ! eval setup_"${arg}"; then + err " ${arg} not supported" + return 1 + fi + done +} + +# Format integer into IPv4 address, summing 10.0.0.5 (arbitrary) to it +format_addr4() { + a=$((${1} + 16777216 * 10 + 5)) + printf "%i.%i.%i.%i" \ + "$((a / 16777216))" "$((a % 16777216 / 65536))" \ + "$((a % 65536 / 256))" "$((a % 256))" +} + +# Format integer into IPv6 address, summing 2001:db8:: to it +format_addr6() { + printf "2001:db8::%04x:%04x" "$((${1} / 65536))" "$((${1} % 65536))" +} + +# Format integer into EUI-48 address, summing 00:01:00:00:00:00 to it +format_mac() { + printf "00:01:%02x:%02x:%02x:%02x" \ + "$((${1} / 16777216))" "$((${1} % 16777216 / 65536))" \ + "$((${1} % 65536 / 256))" "$((${1} % 256))" +} + +# Format integer into port, avoid 0 port +format_port() { + printf "%i" "$((${1} % 65534 + 1))" +} + +# Drop suffixed '6' from L4 protocol, if any +format_proto() { + printf "%s" "${proto}" | tr -d 6 +} + +# Format destination and source fields into nft concatenated type +format() { + __start= + __end= + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + for f in ${src}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __start="$(eval format_"${f}" "${srcstart}")" + __end="$(eval format_"${f}" "${srcend}")" + + if [ "${f}" = "proto" ]; then + __expr="${__expr}${__start}" + else + __expr="${__expr}${__start}-${__end}" + fi + done + + if [ -n "${timeout}" ]; then + echo "${__expr} timeout ${timeout}s }" + else + echo "${__expr} }" + fi +} + +# Format destination and source fields into nft type, start element only +format_norange() { + __expr="{ " + + for f in ${dst}; do + [ "${__expr}" != "{ " ] && __expr="${__expr} . " + + __expr="${__expr}$(eval format_"${f}" "${start}")" + done + for f in ${src}; do + __expr="${__expr} . $(eval format_"${f}" "${start}")" + done + + echo "${__expr} }" +} + +# Format first destination field into nft type +format_noconcat() { + for f in ${dst}; do + __start="$(eval format_"${f}" "${start}")" + __end="$(eval format_"${f}" "${end}")" + + if [ "${f}" = "proto" ]; then + echo "{ ${__start} }" + else + echo "{ ${__start}-${__end} }" + fi + return + done +} + +# Add single entry to 'test' set in 'inet filter' table +add() { + if ! nft add element inet filter test "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Format and output entries for sets in 'netdev perf' table +add_perf() { + if [ "${1}" = "test" ]; then + echo "add element netdev perf test $(format)" + elif [ "${1}" = "norange" ]; then + echo "add element netdev perf norange $(format_norange)" + elif [ "${1}" = "noconcat" ]; then + echo "add element netdev perf noconcat $(format_noconcat)" + fi +} + +# Add single entry to 'norange' set in 'netdev perf' table +add_perf_norange() { + if ! nft add element netdev perf norange "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Add single entry to 'noconcat' set in 'netdev perf' table +add_perf_noconcat() { + if ! nft add element netdev perf noconcat "${1}"; then + err "Failed to add ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Delete single entry from set +del() { + if ! nft delete element inet filter test "${1}"; then + err "Failed to delete ${1} given ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Return packet count from 'test' counter in 'inet filter' table +count_packets() { + found=0 + for token in $(nft list counter inet filter test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Return packet count from 'test' counter in 'netdev perf' table +count_perf_packets() { + found=0 + for token in $(nft list counter netdev perf test); do + [ ${found} -eq 1 ] && echo "${token}" && return + [ "${token}" = "packets" ] && found=1 + done +} + +# Set MAC addresses, send traffic according to specifier +flood() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval flood_\$proto +} + +# Set MAC addresses, start pktgen injection +perf() { + dst_mac="$(format_mac "${1}")" + ip link set veth_a address "${dst_mac}" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval perf_\$perf_proto +} + +# Set MAC addresses, send single packet, check that it matches, reset counter +send_match() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "1" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should have matched ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi + nft reset counter inet filter test >/dev/null +} + +# Set MAC addresses, send single packet, check that it doesn't match +send_nomatch() { + ip link set veth_a address "$(format_mac "${1}")" + ip -n B link set veth_b address "$(format_mac "${2}")" + + for f in ${dst}; do + eval dst_"$f"=\$\(format_\$f "${1}"\) + done + for f in ${src}; do + eval src_"$f"=\$\(format_\$f "${2}"\) + done + eval send_\$proto + if [ "$(count_packets)" != "0" ]; then + err "${proto} packet to:" + err " $(for f in ${dst}; do + eval format_\$f "${1}"; printf ' '; done)" + err "from:" + err " $(for f in ${src}; do + eval format_\$f "${2}"; printf ' '; done)" + err "should not have matched ruleset:" + err "$(nft -a list ruleset)" + return 1 + fi +} + +# Correctness test template: +# - add ranged element, check that packets match it +# - check that packets outside range don't match it +# - remove some elements, check that packets don't match anymore +test_correctness() { + setup veth send_"${proto}" set || return ${ksft_skip} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + send_nomatch $((end + 1)) $((end + 1 + src_delta)) || return 1 + + # Delete elements now and then + if [ $((i % 3)) -eq 0 ]; then + del "$(format)" || return 1 + for j in $(seq "$start" \ + $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) \ + || return 1 + done + fi + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Concurrency test template: +# - add all the elements +# - start a thread for each physical thread that: +# - adds all the elements +# - flushes the set +# - adds all the elements +# - flushes the entire ruleset +# - adds the set back +# - adds all the elements +# - delete all the elements +test_concurrency() { + proto=${flood_proto} + tools=${flood_tools} + chain_spec=${flood_spec} + setup veth flood_"${proto}" set || return ${ksft_skip} + + range_size=1 + cstart=${start} + flood_pids= + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + flood "${i}" $((i + src_delta)) & flood_pids="${flood_pids} $!" + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + sleep $((RANDOM%10)) + + pids= + for c in $(seq 1 "$(nproc)"); do ( + for r in $(seq 1 "${race_repeat}"); do + range_size=1 + + # $start needs to be local to this subshell + # shellcheck disable=SC2030 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush inet filter test 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset + setup set 2>/dev/null + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + range_size=1 + start=${cstart} + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + del "$(format)" 2>/dev/null + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + done + ) & pids="${pids} $!" + done + + # shellcheck disable=SC2046,SC2086 # word splitting wanted here + wait $(for pid in ${pids}; do echo ${pid}; done) + # shellcheck disable=SC2046,SC2086 + kill $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + # shellcheck disable=SC2046,SC2086 + wait $(for pid in ${flood_pids}; do echo ${pid}; done) 2>/dev/null + + return 0 +} + +# Timeout test template: +# - add all the elements with 3s timeout while checking that packets match +# - wait 3s after the last insertion, check that packets don't match any entry +test_timeout() { + setup veth send_"${proto}" set || return ${ksft_skip} + + timeout=3 + + [ "$KSFT_MACHINE_SLOW" = "yes" ] && timeout=8 + + range_size=1 + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + sleep $timeout + for i in $(seq "$start" $((start + count))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_nomatch "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done +} + +# Performance test template: +# - add concatenated ranged entries +# - add non-ranged concatenated entries (for hash set matching rate baseline) +# - add ranged entries with first field only (for rbhash baseline) +# - start pktgen injection directly on device rx path of this namespace +# - measure drop only rate, hash and rbtree baselines, then matching rate +test_performance() { + chain_spec=${perf_spec} + dst="${perf_dst}" + src="${perf_src}" + setup veth perf set || return ${ksft_skip} + + first=${start} + range_size=1 + for set in test norange noconcat; do + start=${first} + for i in $(seq "$start" $((start + perf_entries))); do + end=$((start + range_size)) + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + elif [ "$start" -eq "$end" ]; then + end=$((start + 1)) + fi + + add_perf ${set} + + start=$((end + range_size)) + done > "${tmp}" + nft -f "${tmp}" + done + + perf $((end - 1)) "$srcstart" + + sleep 2 + + nft add rule netdev perf test counter name \"test\" drop + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline (drop from netdev hook): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @norange \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline hash (non-ranged entries): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec%%. *} @noconcat \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + info " baseline rbtree (match on first field only): ${pps}pps" + handle="$(nft -a list chain netdev perf test | grep counter)" + handle="${handle##* }" + nft delete rule netdev perf test handle "${handle}" + + nft add rule "netdev perf test ${chain_spec} @test \ + counter name \"test\" drop" + nft reset counter netdev perf test >/dev/null 2>&1 + sleep "${perf_duration}" + pps="$(printf %10s $(($(count_perf_packets) / perf_duration)))" + p5="$(printf %5s "${perf_entries}")" + info " set with ${p5} full, ranged entries: ${pps}pps" + kill "${perf_pid}" +} + +test_bug_flush_remove_add() { + rounds=100 + [ "$KSFT_MACHINE_SLOW" = "yes" ] && rounds=10 + + set_cmd='{ set s { type ipv4_addr . inet_service; flags interval; }; }' + elem1='{ 10.0.0.1 . 22-25, 10.0.0.1 . 10-20 }' + elem2='{ 10.0.0.1 . 10-20, 10.0.0.1 . 22-25 }' + for i in $(seq 1 $rounds); do + nft add table t "$set_cmd" || return ${ksft_skip} + nft add element t s "$elem1" 2>/dev/null || return 1 + nft flush set t s 2>/dev/null || return 1 + nft add element t s "$elem2" 2>/dev/null || return 1 + done + nft flush ruleset +} + +# - add ranged element, check that packets match it +# - reload the set, check packets still match +test_bug_reload() { + setup veth send_"${proto}" set || return ${ksft_skip} + rstart=${start} + + range_size=1 + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + add "$(format)" || return 1 + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + # check kernel does allocate pcpu sctrach map + # for reload with no elemet add/delete + ( echo flush set inet filter test ; + nft list set inet filter test ) | nft -f - + + start=${rstart} + range_size=1 + + for i in $(seq "${start}" $((start + count))); do + end=$((start + range_size)) + + # Avoid negative or zero-sized port ranges + if [ $((end / 65534)) -gt $((start / 65534)) ]; then + start=${end} + end=$((end + 1)) + fi + srcstart=$((start + src_delta)) + srcend=$((end + src_delta)) + + for j in $(seq "$start" $((range_size / 2 + 1)) ${end}); do + send_match "${j}" $((j + src_delta)) || return 1 + done + + range_size=$((range_size + 1)) + start=$((end + range_size)) + done + + nft flush ruleset +} + +test_reported_issues() { + eval test_bug_"${subtest}" +} + +# Run everything in a separate network namespace +[ "${1}" != "run" ] && { unshare -n "${0}" run; exit $?; } +tmp="$(mktemp)" +trap cleanup_exit EXIT + +# Entry point for test runs +passed=0 +for name in ${TESTS}; do + printf "TEST: %s\n" "$(echo "$name" | tr '_' ' ')" + if [ "${name}" = "reported_issues" ]; then + SUBTESTS="${BUGS}" + else + SUBTESTS="${TYPES}" + fi + + for subtest in ${SUBTESTS}; do + eval desc=\$TYPE_"${subtest}" + IFS=' +' + for __line in ${desc}; do + # shellcheck disable=SC2086 + eval ${__line%% *}=\"${__line##* }\"; + done + IFS=' +' + + if [ "${name}" = "concurrency" ] && \ + [ "${race_repeat}" = "0" ]; then + continue + fi + if [ "${name}" = "performance" ] && \ + [ "${perf_duration}" = "0" ]; then + continue + fi + + [ "$KSFT_MACHINE_SLOW" = "yes" ] && count=1 + + printf " %-32s " "${display}" + tthen=$(date +%s) + eval test_"${name}" + ret=$? + + tnow=$(date +%s) + printf "%5ds%-30s" $((tnow-tthen)) + + if [ $ret -eq 0 ]; then + printf "[ OK ]\n" + info_flush + passed=$((passed + 1)) + elif [ $ret -eq 1 ]; then + printf "[FAIL]\n" + err_flush + exit 1 + elif [ $ret -eq ${ksft_skip} ]; then + printf "[SKIP]\n" + err_flush + fi + done +done + +[ ${passed} -eq 0 ] && exit ${ksft_skip} || exit 0 diff --git a/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh new file mode 100755 index 000000000000..5d276995a5c5 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_concat_range_perf.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# + +source lib.sh + +[ "$KSFT_MACHINE_SLOW" = yes ] && exit ${ksft_skip} + +NFT_CONCAT_RANGE_TESTS="performance" exec ./nft_concat_range.sh diff --git a/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh new file mode 100755 index 000000000000..abcaa7337197 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_conntrack_helper.sh @@ -0,0 +1,171 @@ +#!/bin/bash +# +# This tests connection tracking helper assignment: +# 1. can attach ftp helper to a connection from nft ruleset. +# 2. auto-assign still works. +# +# Kselftest framework requirement - SKIP code is 4. + +source lib.sh + +ret=0 + +testipv6=1 + +checktool "socat -h" "run test without socat" +checktool "conntrack --version" "run test without conntrack" +checktool "nft --version" "run test without nft" + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + + ip netns del "$ns1" + ip netns del "$ns2" +} + +trap cleanup EXIT + +setup_ns ns1 ns2 + +if ! ip link add veth0 netns "$ns1" type veth peer name veth0 netns "$ns2" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi + +ip -net "$ns1" link set veth0 up +ip -net "$ns2" link set veth0 up + +ip -net "$ns1" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns1" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$ns2" addr add 10.0.1.2/24 dev veth0 +ip -net "$ns2" addr add dead:1::2/64 dev veth0 nodad + +load_ruleset_family() { + local family=$1 + local ns=$2 + +ip netns exec "$ns" nft -f - <<EOF +table $family raw { + ct helper ftp { + type "ftp" protocol tcp + } + chain pre { + type filter hook prerouting priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } + chain output { + type filter hook output priority 0; policy accept; + tcp dport 2121 ct helper set "ftp" + } +} +EOF + return $? +} + +check_for_helper() +{ + local netns=$1 + local message=$2 + local port=$3 + + if echo "$message" |grep -q 'ipv6';then + local family="ipv6" + else + local family="ipv4" + fi + + if ! ip netns exec "$netns" conntrack -L -f $family -p tcp --dport "$port" 2> /dev/null |grep -q 'helper=ftp';then + if [ "$autoassign" -eq 0 ] ;then + echo "FAIL: ${netns} did not show attached helper $message" 1>&2 + ret=1 + else + echo "PASS: ${netns} did not show attached helper $message" 1>&2 + fi + else + if [ "$autoassign" -eq 0 ] ;then + echo "PASS: ${netns} connection on port $port has ftp helper attached" 1>&2 + else + echo "FAIL: ${netns} connection on port $port has ftp helper attached" 1>&2 + ret=1 + fi + fi + + return 0 +} + +listener_ready() +{ + ns="$1" + port="$2" + proto="$3" + ss -N "$ns" -lnt -o "sport = :$port" | grep -q "$port" +} + +test_helper() +{ + local port=$1 + local autoassign=$2 + + if [ "$autoassign" -eq 0 ] ;then + msg="set via ruleset" + else + msg="auto-assign" + fi + + ip netns exec "$ns2" socat -t 3 -u -4 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" "$port" "-4" + + ip netns exec "$ns1" socat -u -4 STDIN TCP:10.0.1.2:"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ip $msg" "$port" "$autoassign" + check_for_helper "$ns2" "ip $msg" "$port" "$autoassign" + + if [ $testipv6 -eq 0 ] ;then + return 0 + fi + + ip netns exec "$ns1" conntrack -F 2> /dev/null + ip netns exec "$ns2" conntrack -F 2> /dev/null + + ip netns exec "$ns2" socat -t 3 -u -6 TCP-LISTEN:"$port",reuseaddr STDOUT > /dev/null & + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" "$port" "-6" + + ip netns exec "$ns1" socat -t 3 -u -6 STDIN TCP:"[dead:1::2]":"$port" < /dev/null > /dev/null + + check_for_helper "$ns1" "ipv6 $msg" "$port" + check_for_helper "$ns2" "ipv6 $msg" "$port" +} + +if ! load_ruleset_family ip "$ns1"; then + echo "FAIL: ${ns1} cannot load ip ruleset" 1>&2 + exit 1 +fi + +if ! load_ruleset_family ip6 "$ns1"; then + echo "SKIP: ${ns1} cannot load ip6 ruleset" 1>&2 + testipv6=0 +fi + +if ! load_ruleset_family inet "${ns2}"; then + echo "SKIP: ${ns1} cannot load inet ruleset" 1>&2 + if ! load_ruleset_family ip "${ns2}"; then + echo "FAIL: ${ns2} cannot load ip ruleset" 1>&2 + exit 1 + fi + + if [ "$testipv6" -eq 1 ] ;then + if ! load_ruleset_family ip6 "$ns2"; then + echo "FAIL: ${ns2} cannot load ip6 ruleset" 1>&2 + exit 1 + fi + fi +fi + +test_helper 2121 0 +ip netns exec "$ns1" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +ip netns exec "$ns2" sysctl -qe 'net.netfilter.nf_conntrack_helper=1' +test_helper 21 1 + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_fib.sh b/tools/testing/selftests/net/netfilter/nft_fib.sh new file mode 100755 index 000000000000..ce1451c275fd --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_fib.sh @@ -0,0 +1,234 @@ +#!/bin/bash +# +# This tests the fib expression. +# +# Kselftest framework requirement - SKIP code is 4. + +source lib.sh + +ret=0 + +timeout=4 + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +cleanup() +{ + cleanup_all_ns + + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns=$log_netns +} + +checktool "nft --version" "run test without nft" + +setup_ns nsrouter ns1 ns2 + +trap cleanup EXIT + +if dmesg | grep -q ' nft_rpfilter: ';then + dmesg -c | grep ' nft_rpfilter: ' + echo "WARN: a previous test run has failed" 1>&2 +fi + +sysctl -q net.netfilter.nf_log_all_netns=1 + +load_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + fib saddr . iif oif missing counter log prefix "$netns nft_rpfilter: " drop + } +} +EOF +} + +load_pbr_ruleset() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain forward { + type filter hook forward priority raw; + fib saddr . iif oif gt 0 accept + log drop + } +} +EOF +} + +load_ruleset_count() { + local netns=$1 + +ip netns exec "$netns" nft -f /dev/stdin <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority 0; policy accept; + ip daddr 1.1.1.1 fib saddr . iif oif missing counter drop + ip6 daddr 1c3::c01d fib saddr . iif oif missing counter drop + } +} +EOF +} + +check_drops() { + if dmesg | grep -q ' nft_rpfilter: ';then + dmesg | grep ' nft_rpfilter: ' + echo "FAIL: rpfilter did drop packets" + return 1 + fi + + return 0 +} + +check_fib_counter() { + local want=$1 + local ns=$2 + local address=$3 + + if ! ip netns exec "$ns" nft list table inet filter | grep 'fib saddr . iif' | grep "$address" | grep -q "packets $want";then + echo "Netns $ns fib counter doesn't match expected packet count of $want for $address" 1>&2 + ip netns exec "$ns" nft list table inet filter + return 1 + fi + + if [ "$want" -gt 0 ]; then + echo "PASS: fib expression did drop packets for $address" + fi + + return 0 +} + +load_ruleset "$nsrouter" +load_ruleset "$ns1" +load_ruleset "$ns2" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +test_ping() { + local daddr4=$1 + local daddr6=$2 + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr4" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr4, ret $ret" 1>&2 + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q "$daddr6" > /dev/null; then + check_drops + echo "FAIL: ${ns1} cannot reach $daddr6, ret $ret" 1>&2 + return 1 + fi + + return 0 +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.all.rp_filter=0 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.rp_filter=0 > /dev/null + +test_ping 10.0.2.1 dead:2::1 || exit 1 +check_drops || exit 1 + +test_ping 10.0.2.99 dead:2::99 || exit 1 +check_drops || exit 1 + +echo "PASS: fib expression did not cause unwanted packet drops" + +ip netns exec "$nsrouter" nft flush table inet filter + +ip -net "$ns1" route del default +ip -net "$ns1" -6 route del default + +ip -net "$ns1" addr del 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr del dead:1::99/64 dev eth0 + +ip -net "$ns1" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr add dead:2::99/64 dev eth0 nodad + +ip -net "$ns1" route add default via 10.0.2.1 +ip -net "$ns1" -6 route add default via dead:2::1 + +ip -net "$nsrouter" addr add dead:2::1/64 dev veth0 nodad + +# switch to ruleset that doesn't log, this time +# its expected that this does drop the packets. +load_ruleset_count "$nsrouter" + +# ns1 has a default route, but nsrouter does not. +# must not check return value, ping to 1.1.1.1 will +# fail. +check_fib_counter 0 "$nsrouter" 1.1.1.1 || exit 1 +check_fib_counter 0 "$nsrouter" 1c3::c01d || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -c 1 -q 1.1.1.1 > /dev/null +check_fib_counter 1 "$nsrouter" 1.1.1.1 || exit 1 + +ip netns exec "$ns1" ping -W 0.5 -i 0.1 -c 3 -q 1c3::c01d > /dev/null +check_fib_counter 3 "$nsrouter" 1c3::c01d || exit 1 + +# delete all rules +ip netns exec "$ns1" nft flush ruleset +ip netns exec "$ns2" nft flush ruleset +ip netns exec "$nsrouter" nft flush ruleset + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad + +ip -net "$ns1" addr del 10.0.2.99/24 dev eth0 +ip -net "$ns1" addr del dead:2::99/64 dev eth0 + +ip -net "$nsrouter" addr del dead:2::1/64 dev veth0 + +# ... pbr ruleset for the router, check iif+oif. +if ! load_pbr_ruleset "$nsrouter";then + echo "SKIP: Could not load fib forward ruleset" + exit $ksft_skip +fi + +ip -net "$nsrouter" rule add from all table 128 +ip -net "$nsrouter" rule add from all iif veth0 table 129 +ip -net "$nsrouter" route add table 128 to 10.0.1.0/24 dev veth0 +ip -net "$nsrouter" route add table 129 to 10.0.2.0/24 dev veth1 + +# drop main ipv4 table +ip -net "$nsrouter" -4 rule delete table main + +if ! test_ping 10.0.2.99 dead:2::99;then + ip -net "$nsrouter" nft list ruleset + echo "FAIL: fib mismatch in pbr setup" + exit 1 +fi + +echo "PASS: fib expression forward check with policy based routing" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/nft_flowtable.sh b/tools/testing/selftests/net/netfilter/nft_flowtable.sh new file mode 100755 index 000000000000..b3995550856a --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_flowtable.sh @@ -0,0 +1,671 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 +# +# This tests basic flowtable functionality. +# Creates following default topology: +# +# Originator (MTU 9000) <-Router1-> MTU 1500 <-Router2-> Responder (MTU 2000) +# Router1 is the one doing flow offloading, Router2 has no special +# purpose other than having a link that is smaller than either Originator +# and responder, i.e. TCPMSS announced values are too large and will still +# result in fragmentation and/or PMTU discovery. +# +# You can check with different Orgininator/Link/Responder MTU eg: +# nft_flowtable.sh -o8000 -l1500 -r2000 +# + +source lib.sh + +ret=0 +SOCAT_TIMEOUT=60 + +nsin="" +ns1out="" +ns2out="" + +log_netns=$(sysctl -n net.netfilter.nf_log_all_netns) + +checktool "nft --version" "run test without nft tool" +checktool "socat -h" "run test without socat" + +setup_ns ns1 ns2 nsr1 nsr2 + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns + + rm -f "$nsin" "$ns1out" "$ns2out" + + [ "$log_netns" -eq 0 ] && sysctl -q net.netfilter.nf_log_all_netns="$log_netns" +} + +trap cleanup EXIT + +sysctl -q net.netfilter.nf_log_all_netns=1 + +ip link add veth0 netns "$nsr1" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr1" type veth peer name veth0 netns "$nsr2" + +ip link add veth1 netns "$nsr2" type veth peer name eth0 netns "$ns2" + +for dev in veth0 veth1; do + ip -net "$nsr1" link set "$dev" up + ip -net "$nsr2" link set "$dev" up +done + +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsr2" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsr2" addr add dead:2::1/64 dev veth1 nodad + +# set different MTUs so we need to push packets coming from ns1 (large MTU) +# to ns2 (smaller MTU) to stack either to perform fragmentation (ip_no_pmtu_disc=1), +# or to do PTMU discovery (send ICMP error back to originator). +# ns2 is going via nsr2 with a smaller mtu, so that TCPMSS announced by both peers +# is NOT the lowest link mtu. + +omtu=9000 +lmtu=1500 +rmtu=2000 + +usage(){ + echo "nft_flowtable.sh [OPTIONS]" + echo + echo "MTU options" + echo " -o originator" + echo " -l link" + echo " -r responder" + exit 1 +} + +while getopts "o:l:r:" o +do + case $o in + o) omtu=$OPTARG;; + l) lmtu=$OPTARG;; + r) rmtu=$OPTARG;; + *) usage;; + esac +done + +if ! ip -net "$nsr1" link set veth0 mtu "$omtu"; then + exit 1 +fi + +ip -net "$ns1" link set eth0 mtu "$omtu" + +if ! ip -net "$nsr2" link set veth1 mtu "$rmtu"; then + exit 1 +fi + +if ! ip -net "$nsr1" link set veth1 mtu "$lmtu"; then + exit 1 +fi + +if ! ip -net "$nsr2" link set veth0 mtu "$lmtu"; then + exit 1 +fi + +ip -net "$ns2" link set eth0 mtu "$rmtu" + +# transfer-net between nsr1 and nsr2. +# these addresses are not used for connections. +ip -net "$nsr1" addr add 192.168.10.1/24 dev veth1 +ip -net "$nsr1" addr add fee1:2::1/64 dev veth1 nodad + +ip -net "$nsr2" addr add 192.168.10.2/24 dev veth0 +ip -net "$nsr2" addr add fee1:2::2/64 dev veth0 nodad + +for i in 0 1; do + ip netns exec "$nsr1" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null + ip netns exec "$nsr2" sysctl net.ipv4.conf.veth$i.forwarding=1 > /dev/null +done + +for ns in "$ns1" "$ns2";do + ip -net "$ns" link set eth0 up + + if ! ip netns exec "$ns" sysctl net.ipv4.tcp_no_metrics_save=1 > /dev/null; then + echo "ERROR: Check Originator/Responder values (problem during address addition)" + exit 1 + fi + # don't set ip DF bit for first two tests + ip netns exec "$ns" sysctl net.ipv4.ip_no_pmtu_disc=1 > /dev/null +done + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$ns2" route add default via dead:2::1 + +ip -net "$nsr1" route add default via 192.168.10.2 +ip -net "$nsr2" route add default via 192.168.10.1 + +ip netns exec "$nsr1" nft -f - <<EOF +table inet filter { + flowtable f1 { + hook ingress priority 0 + devices = { veth0, veth1 } + } + + counter routed_orig { } + counter routed_repl { } + + chain forward { + type filter hook forward priority 0; policy drop; + + # flow offloaded? Tag ct with mark 1, so we can detect when it fails. + meta oif "veth1" tcp dport 12345 ct mark set 1 flow add @f1 counter name routed_orig accept + + # count packets supposedly offloaded as per direction. + ct mark 1 counter name ct direction map { original : routed_orig, reply : routed_repl } accept + + ct state established,related accept + + meta nfproto ipv4 meta l4proto icmp accept + meta nfproto ipv6 meta l4proto icmpv6 accept + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not load nft ruleset" + exit $ksft_skip +fi + +ip netns exec "$ns2" nft -f - <<EOF +table inet filter { + counter ip4dscp0 { } + counter ip4dscp3 { } + + chain input { + type filter hook input priority 0; policy accept; + meta l4proto tcp goto { + ip dscp cs3 counter name ip4dscp3 accept + ip dscp 0 counter name ip4dscp0 accept + } + } +} +EOF + +if [ $? -ne 0 ]; then + echo -n "SKIP: Could not load ruleset: " + nft --version + exit $ksft_skip +fi + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +nsin=$(mktemp) +ns1out=$(mktemp) +ns2out=$(mktemp) + +make_file() +{ + name=$1 + + SIZE=$((RANDOM % (1024 * 128))) + SIZE=$((SIZE + (1024 * 8))) + TSIZE=$((SIZE * 1024)) + + dd if=/dev/urandom of="$name" bs=1024 count=$SIZE 2> /dev/null + + SIZE=$((RANDOM % 1024)) + SIZE=$((SIZE + 128)) + TSIZE=$((TSIZE + SIZE)) + dd if=/dev/urandom conf=notrunc of="$name" bs=1 count=$SIZE 2> /dev/null +} + +check_counters() +{ + local what=$1 + local ok=1 + + local orig repl + orig=$(ip netns exec "$nsr1" nft reset counter inet filter routed_orig | grep packets) + repl=$(ip netns exec "$nsr1" nft reset counter inet filter routed_repl | grep packets) + + local orig_cnt=${orig#*bytes} + local repl_cnt=${repl#*bytes} + + local fs + fs=$(du -sb "$nsin") + local max_orig=${fs%%/*} + local max_repl=$((max_orig/4)) + + # flowtable fastpath should bypass normal routing one, i.e. the counters in forward hook + # should always be lower than the size of the transmitted file (max_orig). + if [ "$orig_cnt" -gt "$max_orig" ];then + echo "FAIL: $what: original counter $orig_cnt exceeds expected value $max_orig" 1>&2 + ret=1 + ok=0 + fi + + if [ "$repl_cnt" -gt $max_repl ];then + echo "FAIL: $what: reply counter $repl_cnt exceeds expected value $max_repl" 1>&2 + ret=1 + ok=0 + fi + + if [ $ok -eq 1 ]; then + echo "PASS: $what" + fi +} + +check_dscp() +{ + local what=$1 + local ok=1 + + local counter + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp3 | grep packets) + + local pc4=${counter%*bytes*} + local pc4=${pc4#*packets} + + counter=$(ip netns exec "$ns2" nft reset counter inet filter ip4dscp0 | grep packets) + local pc4z=${counter%*bytes*} + local pc4z=${pc4z#*packets} + + case "$what" in + "dscp_none") + if [ "$pc4" -gt 0 ] || [ "$pc4z" -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 == 0, dscp0 > 0, but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_fwd") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -eq 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 and dscp0 > 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_ingress") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + "dscp_egress") + if [ "$pc4" -eq 0 ] || [ "$pc4z" -gt 0 ]; then + echo "FAIL: dscp counters do not match, expected dscp3 > 0, dscp0 == 0 but got $pc4,$pc4z" 1>&2 + ret=1 + ok=0 + fi + ;; + *) + echo "FAIL: Unknown DSCP check" 1>&2 + ret=1 + ok=0 + esac + + if [ "$ok" -eq 1 ] ;then + echo "PASS: $what: dscp packet counters match" + fi +} + +check_transfer() +{ + in=$1 + out=$2 + what=$3 + + if ! cmp "$in" "$out" > /dev/null 2>&1; then + echo "FAIL: file mismatch for $what" 1>&2 + ls -l "$in" + ls -l "$out" + return 1 + fi + + return 0 +} + +listener_ready() +{ + ss -N "$nsb" -lnt -o "sport = :12345" | grep -q 12345 +} + +test_tcp_forwarding_ip() +{ + local nsa=$1 + local nsb=$2 + local dstip=$3 + local dstport=$4 + local lret=0 + + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsb" socat -4 TCP-LISTEN:12345,reuseaddr STDIO < "$nsin" > "$ns2out" & + lpid=$! + + busywait 1000 listener_ready + + timeout "$SOCAT_TIMEOUT" ip netns exec "$nsa" socat -4 TCP:"$dstip":"$dstport" STDIO < "$nsin" > "$ns1out" + + wait $lpid + + if ! check_transfer "$nsin" "$ns2out" "ns1 -> ns2"; then + lret=1 + ret=1 + fi + + if ! check_transfer "$nsin" "$ns1out" "ns1 <- ns2"; then + lret=1 + ret=1 + fi + + return $lret +} + +test_tcp_forwarding() +{ + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + + return $? +} + +test_tcp_forwarding_set_dscp() +{ + check_dscp "dscp_none" + +ip netns exec "$nsr1" nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook ingress device "veth0" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_ingress" + + ip netns exec "$nsr1" nft delete table netdev dscpmangle +else + echo "SKIP: Could not load netdev:ingress for veth0" +fi + +ip netns exec "$nsr1" nft -f - <<EOF +table netdev dscpmangle { + chain setdscp0 { + type filter hook egress device "veth1" priority 0; policy accept + ip dscp set cs3 + } +} +EOF +if [ $? -eq 0 ]; then + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_egress" + + ip netns exec "$nsr1" nft flush table netdev dscpmangle +else + echo "SKIP: Could not load netdev:egress for veth1" +fi + + # partial. If flowtable really works, then both dscp-is-0 and dscp-is-cs3 + # counters should have seen packets (before and after ft offload kicks in). + ip netns exec "$nsr1" nft -a insert rule inet filter forward ip dscp set cs3 + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + check_dscp "dscp_fwd" +} + +test_tcp_forwarding_nat() +{ + local lret + local pmtu + + test_tcp_forwarding_ip "$1" "$2" 10.0.2.99 12345 + lret=$? + + pmtu=$3 + what=$4 + + if [ "$lret" -eq 0 ] ; then + if [ "$pmtu" -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with masquerade and pmtu discovery $what" + else + echo "PASS: flow offload for ns1/ns2 with masquerade $what" + fi + + test_tcp_forwarding_ip "$1" "$2" 10.6.6.6 1666 + lret=$? + if [ "$pmtu" -eq 1 ] ;then + check_counters "flow offload for ns1/ns2 with dnat and pmtu discovery $what" + elif [ "$lret" -eq 0 ] ; then + echo "PASS: flow offload for ns1/ns2 with dnat $what" + fi + fi + + return $lret +} + +make_file "$nsin" + +# First test: +# No PMTU discovery, nsr1 is expected to fragment packets from ns1 to ns2 as needed. +# Due to MTU mismatch in both directions, all packets (except small packets like pure +# acks) have to be handled by normal forwarding path. Therefore, packet counters +# are not checked. +if test_tcp_forwarding "$ns1" "$ns2"; then + echo "PASS: flow offloaded for ns1/ns2" +else + echo "FAIL: flow offload for ns1/ns2:" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# delete default route, i.e. ns2 won't be able to reach ns1 and +# will depend on ns1 being masqueraded in nsr1. +# expect ns1 has nsr1 address. +ip -net "$ns2" route del default via 10.0.2.1 +ip -net "$ns2" route del default via dead:2::1 +ip -net "$ns2" route add 192.168.10.1 via 10.0.2.1 + +# Second test: +# Same, but with NAT enabled. Same as in first test: we expect normal forward path +# to handle most packets. +ip netns exec "$nsr1" nft -f - <<EOF +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "veth0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if ! test_tcp_forwarding_set_dscp "$ns1" "$ns2" 0 ""; then + echo "FAIL: flow offload for ns1/ns2 with dscp update" 1>&2 + exit 0 +fi + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 0 ""; then + echo "FAIL: flow offload for ns1/ns2 with NAT" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# Third test: +# Same as second test, but with PMTU discovery enabled. This +# means that we expect the fastpath to handle packets as soon +# as the endpoints adjust the packet size. +ip netns exec "$ns1" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null +ip netns exec "$ns2" sysctl net.ipv4.ip_no_pmtu_disc=0 > /dev/null + +# reset counters. +# With pmtu in-place we'll also check that nft counters +# are lower than file size and packets were forwarded via flowtable layer. +# For earlier tests (large mtus), packets cannot be handled via flowtable +# (except pure acks and other small packets). +ip netns exec "$nsr1" nft reset counters table inet filter >/dev/null + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 ""; then + echo "FAIL: flow offload for ns1/ns2 with NAT and pmtu discovery" 1>&2 + ip netns exec "$nsr1" nft list ruleset +fi + +# Another test: +# Add bridge interface br0 to Router1, with NAT enabled. +test_bridge() { +if ! ip -net "$nsr1" link add name br0 type bridge 2>/dev/null;then + echo "SKIP: could not add bridge br0" + [ "$ret" -eq 0 ] && ret=$ksft_skip + return +fi +ip -net "$nsr1" addr flush dev veth0 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set veth0 master br0 +ip -net "$nsr1" addr add 10.0.1.1/24 dev br0 +ip -net "$nsr1" addr add dead:1::1/64 dev br0 nodad +ip -net "$nsr1" link set up dev br0 + +ip netns exec "$nsr1" sysctl net.ipv4.conf.br0.forwarding=1 > /dev/null + +# br0 with NAT enabled. +ip netns exec "$nsr1" nft -f - <<EOF +flush table ip nat +table ip nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif "br0" ip daddr 10.6.6.6 tcp dport 1666 counter dnat ip to 10.0.2.99:12345 + } + + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oifname "veth1" counter masquerade + } +} +EOF + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "on bridge"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + + +# Another test: +# Add bridge interface br0 to Router1, with NAT and VLAN. +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set down dev veth0 +ip -net "$nsr1" link add link veth0 name veth0.10 type vlan id 10 +ip -net "$nsr1" link set up dev veth0 +ip -net "$nsr1" link set up dev veth0.10 +ip -net "$nsr1" link set veth0.10 master br0 + +ip -net "$ns1" addr flush dev eth0 +ip -net "$ns1" link add link eth0 name eth0.10 type vlan id 10 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" link set eth0.10 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0.10 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0.10 nodad + +if ! test_tcp_forwarding_nat "$ns1" "$ns2" 1 "bridge and VLAN"; then + echo "FAIL: flow offload for ns1/ns2 with bridge NAT and VLAN" 1>&2 + ip netns exec "$nsr1" nft list ruleset + ret=1 +fi + +# restore test topology (remove bridge and VLAN) +ip -net "$nsr1" link set veth0 nomaster +ip -net "$nsr1" link set veth0 down +ip -net "$nsr1" link set veth0.10 down +ip -net "$nsr1" link delete veth0.10 type vlan +ip -net "$nsr1" link delete br0 type bridge +ip -net "$ns1" addr flush dev eth0.10 +ip -net "$ns1" link set eth0.10 down +ip -net "$ns1" link set eth0 down +ip -net "$ns1" link delete eth0.10 type vlan + +# restore address in ns1 and nsr1 +ip -net "$ns1" link set eth0 up +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via dead:1::1 +ip -net "$nsr1" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr1" addr add dead:1::1/64 dev veth0 nodad +ip -net "$nsr1" link set up dev veth0 +} + +test_bridge + +KEY_SHA="0x"$(ps -af | sha1sum | cut -d " " -f 1) +KEY_AES="0x"$(ps -af | md5sum | cut -d " " -f 1) +SPI1=$RANDOM +SPI2=$RANDOM + +if [ $SPI1 -eq $SPI2 ]; then + SPI2=$((SPI2+1)) +fi + +do_esp() { + local ns=$1 + local me=$2 + local remote=$3 + local lnet=$4 + local rnet=$5 + local spi_out=$6 + local spi_in=$7 + + ip -net "$ns" xfrm state add src "$remote" dst "$me" proto esp spi "$spi_in" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$rnet" dst "$lnet" + ip -net "$ns" xfrm state add src "$me" dst "$remote" proto esp spi "$spi_out" enc aes "$KEY_AES" auth sha1 "$KEY_SHA" mode tunnel sel src "$lnet" dst "$rnet" + + # to encrypt packets as they go out (includes forwarded packets that need encapsulation) + ip -net "$ns" xfrm policy add src "$lnet" dst "$rnet" dir out tmpl src "$me" dst "$remote" proto esp mode tunnel priority 1 action allow + # to fwd decrypted packets after esp processing: + ip -net "$ns" xfrm policy add src "$rnet" dst "$lnet" dir fwd tmpl src "$remote" dst "$me" proto esp mode tunnel priority 1 action allow +} + +do_esp "$nsr1" 192.168.10.1 192.168.10.2 10.0.1.0/24 10.0.2.0/24 "$SPI1" "$SPI2" + +do_esp "$nsr2" 192.168.10.2 192.168.10.1 10.0.2.0/24 10.0.1.0/24 "$SPI2" "$SPI1" + +ip netns exec "$nsr1" nft delete table ip nat + +# restore default routes +ip -net "$ns2" route del 192.168.10.1 via 10.0.2.1 +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +if test_tcp_forwarding "$ns1" "$ns2"; then + check_counters "ipsec tunnel mode for ns1/ns2" +else + echo "FAIL: ipsec tunnel mode for ns1/ns2" + ip netns exec "$nsr1" nft list ruleset 1>&2 + ip netns exec "$nsr1" cat /proc/net/xfrm_stat 1>&2 +fi + +if [ "$1" = "" ]; then + low=1280 + mtu=$((65536 - low)) + o=$(((RANDOM%mtu) + low)) + l=$(((RANDOM%mtu) + low)) + r=$(((RANDOM%mtu) + low)) + + echo "re-run with random mtus: -o $o -l $l -r $r" + $0 -o "$o" -l "$l" -r "$r" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_meta.sh b/tools/testing/selftests/net/netfilter/nft_meta.sh new file mode 100755 index 000000000000..71505b6cb252 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_meta.sh @@ -0,0 +1,142 @@ +#!/bin/bash + +# check iif/iifname/oifgroup/iiftype match. + +# Kselftest framework requirement - SKIP code is 4. +ksft_skip=4 +sfx=$(mktemp -u "XXXXXXXX") +ns0="ns0-$sfx" + +if ! nft --version > /dev/null 2>&1; then + echo "SKIP: Could not run test without nft tool" + exit $ksft_skip +fi + +cleanup() +{ + ip netns del "$ns0" +} + +ip netns add "$ns0" +ip -net "$ns0" link set lo up +ip -net "$ns0" addr add 127.0.0.1 dev lo + +trap cleanup EXIT + +currentyear=$(date +%Y) +lastyear=$((currentyear-1)) +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table inet filter { + counter iifcount {} + counter iifnamecount {} + counter iifgroupcount {} + counter iiftypecount {} + counter infproto4count {} + counter il4protocounter {} + counter imarkcounter {} + counter icpu0counter {} + counter ilastyearcounter {} + counter icurrentyearcounter {} + + counter oifcount {} + counter oifnamecount {} + counter oifgroupcount {} + counter oiftypecount {} + counter onfproto4count {} + counter ol4protocounter {} + counter oskuidcounter {} + counter oskgidcounter {} + counter omarkcounter {} + + chain input { + type filter hook input priority 0; policy accept; + + meta iif lo counter name "iifcount" + meta iifname "lo" counter name "iifnamecount" + meta iifgroup "default" counter name "iifgroupcount" + meta iiftype "loopback" counter name "iiftypecount" + meta nfproto ipv4 counter name "infproto4count" + meta l4proto icmp counter name "il4protocounter" + meta mark 42 counter name "imarkcounter" + meta cpu 0 counter name "icpu0counter" + meta time "$lastyear-01-01" - "$lastyear-12-31" counter name ilastyearcounter + meta time "$currentyear-01-01" - "$currentyear-12-31" counter name icurrentyearcounter + } + + chain output { + type filter hook output priority 0; policy accept; + meta oif lo counter name "oifcount" counter + meta oifname "lo" counter name "oifnamecount" + meta oifgroup "default" counter name "oifgroupcount" + meta oiftype "loopback" counter name "oiftypecount" + meta nfproto ipv4 counter name "onfproto4count" + meta l4proto icmp counter name "ol4protocounter" + meta skuid 0 counter name "oskuidcounter" + meta skgid 0 counter name "oskgidcounter" + meta mark 42 counter name "omarkcounter" + } +} +EOF + +if [ $? -ne 0 ]; then + echo "SKIP: Could not add test ruleset" + exit $ksft_skip +fi + +ret=0 + +check_one_counter() +{ + local cname="$1" + local want="packets $2" + local verbose="$3" + + if ! ip netns exec "$ns0" nft list counter inet filter "$cname" | grep -q "$want"; then + echo "FAIL: $cname, want \"$want\", got" + ret=1 + ip netns exec "$ns0" nft list counter inet filter "$cname" + fi +} + +check_lo_counters() +{ + local want="$1" + local verbose="$2" + local counter + + for counter in iifcount iifnamecount iifgroupcount iiftypecount infproto4count \ + oifcount oifnamecount oifgroupcount oiftypecount onfproto4count \ + il4protocounter icurrentyearcounter ol4protocounter \ + ; do + check_one_counter "$counter" "$want" "$verbose" + done +} + +check_lo_counters "0" false +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 -m 42 > /dev/null + +check_lo_counters "2" true + +check_one_counter oskuidcounter "1" true +check_one_counter oskgidcounter "1" true +check_one_counter imarkcounter "1" true +check_one_counter omarkcounter "1" true +check_one_counter ilastyearcounter "0" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta iif/oif counters at expected values" +else + exit $ret +fi + +#First CPU execution and counter +taskset -p 01 $$ > /dev/null +ip netns exec "$ns0" nft reset counters > /dev/null +ip netns exec "$ns0" ping -q -c 1 127.0.0.1 > /dev/null +check_one_counter icpu0counter "2" true + +if [ $ret -eq 0 ];then + echo "OK: nftables meta cpu counter at expected values" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_nat.sh b/tools/testing/selftests/net/netfilter/nft_nat.sh new file mode 100755 index 000000000000..9e39de26455f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_nat.sh @@ -0,0 +1,1156 @@ +#!/bin/bash +# +# This test is for basic NAT functionality: snat, dnat, redirect, masquerade. +# + +source lib.sh + +ret=0 +test_inet_nat=true + +checktool "nft --version" "run test without nft tool" +checktool "socat -h" "run test without socat" + +cleanup() +{ + ip netns pids "$ns0" | xargs kill 2>/dev/null + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + rm -f "$INFILE" "$OUTFILE" + + cleanup_all_ns +} + +trap cleanup EXIT + +INFILE=$(mktemp) +OUTFILE=$(mktemp) + +setup_ns ns0 ns1 ns2 + +if ! ip link add veth0 netns "$ns0" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$ns0" type veth peer name eth0 netns "$ns2" + +ip -net "$ns0" link set veth0 up +ip -net "$ns0" addr add 10.0.1.1/24 dev veth0 +ip -net "$ns0" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$ns0" link set veth1 up +ip -net "$ns0" addr add 10.0.2.1/24 dev veth1 +ip -net "$ns0" addr add dead:2::1/64 dev veth1 nodad + +do_config() +{ + ns="$1" + subnet="$2" + + ip -net "$ns" link set eth0 up + ip -net "$ns" addr add "10.0.$subnet.99/24" dev eth0 + ip -net "$ns" route add default via "10.0.$subnet.1" + ip -net "$ns" addr add "dead:$subnet::99/64" dev eth0 nodad + ip -net "$ns" route add default via "dead:$subnet::1" +} + +do_config "$ns1" 1 +do_config "$ns2" 2 + +bad_counter() +{ + local ns=$1 + local counter=$2 + local expect=$3 + local tag=$4 + + echo "ERROR: $counter counter in $ns has unexpected value (expected $expect) at $tag" 1>&2 + ip netns exec "$ns" nft list counter inet filter "$counter" 1>&2 +} + +check_counters() +{ + ns=$1 + local lret=0 + + if ! ip netns exec "$ns" nft list counter inet filter ns0in | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0in "packets 1 bytes 84" "check_counters 1" + lret=1 + fi + + if ! ip netns exec "$ns" nft list counter inet filter ns0out | grep -q "packets 1 bytes 84";then + bad_counter "$ns" ns0out "packets 1 bytes 84" "check_counters 2" + lret=1 + fi + + expect="packets 1 bytes 104" + if ! ip netns exec "$ns" nft list counter inet filter ns0in6 | grep -q "$expect";then + bad_counter "$ns" ns0in6 "$expect" "check_counters 3" + lret=1 + fi + if ! ip netns exec "$ns" nft list counter inet filter ns0out6 | grep -q "$expect";then + bad_counter "$ns" ns0out6 "$expect" "check_counters 4" + lret=1 + fi + + return $lret +} + +check_ns0_counters() +{ + local ns=$1 + local lret=0 + + if ! ip netns exec "$ns0" nft list counter inet filter ns0in | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0in "packets 0 bytes 0" "check_ns0_counters 1" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns0in6 | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0in6 "packets 0 bytes 0" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns0out | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0out "packets 0 bytes 0" "check_ns0_counters 2" + lret=1 + fi + if ! ip netns exec "$ns0" nft list counter inet filter ns0out6 | grep -q "packets 0 bytes 0";then + bad_counter "$ns0" ns0out6 "packets 0 bytes 0" "check_ns0_counters3 " + lret=1 + fi + + for dir in "in" "out" ; do + expect="packets 1 bytes 84" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}" "$expect" "check_ns0_counters 4" + lret=1 + fi + + expect="packets 1 bytes 104" + if ! ip netns exec "$ns0" nft list counter inet filter "${ns}${dir}6" | grep -q "$expect";then + bad_counter "$ns0" "$ns${dir}6" "$expect" "check_ns0_counters 5" + lret=1 + fi + done + + return $lret +} + +reset_counters() +{ + for i in "$ns0" "$ns1" "$ns2" ;do + ip netns exec "$i" nft reset counters inet > /dev/null + done +} + +test_local_dnat6() +{ + local family=$1 + local lret=0 + local IPF="" + + if [ "$family" = "inet" ];then + IPF="ip6" + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + ip6 daddr dead:1::99 dnat $IPF to dead:2::99 + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + if ! ip netns exec "$ns0" ping -q -c 1 dead:1::99 > /dev/null;then + lret=1 + echo "ERROR: ping6 failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns1$dir "$expect" "test_local_dnat6 1" + lret=1 + fi + done + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat6 2" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_local_dnat6 3" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat6 4" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ipv6 ping to $ns1 was $family NATted to $ns2" + ip netns exec "$ns0" nft flush chain ip6 nat output + + return $lret +} + +test_local_dnat() +{ + local family=$1 + local lret=0 + local IPF="" + + if [ "$family" = "inet" ];then + IPF="ip" + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF 2>/dev/null +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + ip daddr 10.0.1.99 dnat $IPF to 10.0.2.99 + } +} +EOF + if [ $? -ne 0 ]; then + if [ "$family" = "inet" ];then + echo "SKIP: inet nat tests" + test_inet_nat=false + return $ksft_skip + fi + echo "SKIP: Could not add add $family dnat hook" + return $ksft_skip + fi + + # ping netns1, expect rewrite to netns2 + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_local_dnat 1" + lret=1 + fi + done + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns2$dir" "$expect" "test_local_dnat 2" + lret=1 + fi + done + + # expect 0 count in ns1 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_local_dnat 3" + lret=1 + fi + done + + # expect 1 packet in ns2 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter ns0${dir} | grep -q "$expect";then + bad_counter "$ns2" "ns0$dir" "$expect" "test_local_dnat 4" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to $ns1 was $family NATted to $ns2" + + ip netns exec "$ns0" nft flush chain "$family" nat output + + reset_counters + if ! ip netns exec "$ns0" ping -q -c 1 10.0.1.99 > /dev/null;then + lret=1 + echo "ERROR: ping failed" + return $lret + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns1$dir "$expect" "test_local_dnat 5" + lret=1 + fi + done + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns2$dir "$expect" "test_local_dnat 6" + lret=1 + fi + done + + # expect 1 count in ns1 + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns0$dir "$expect" "test_local_dnat 7" + lret=1 + fi + done + + # expect 0 packet in ns2 + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns0$dir "$expect" "test_local_dnat 8" + lret=1 + fi + done + + test $lret -eq 0 && echo "PASS: ping to $ns1 OK after $family nat output chain flush" + + return $lret +} + +listener_ready() +{ + local ns="$1" + local port="$2" + local proto="$3" + ss -N "$ns" -ln "$proto" -o "sport = :$port" | grep -q "$port" +} + +test_local_dnat_portonly() +{ + local family=$1 + local daddr=$2 + local lret=0 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain output { + type nat hook output priority 0; policy accept; + meta l4proto tcp dnat to :2000 + + } +} +EOF + if [ $? -ne 0 ]; then + if [ "$family" = "inet" ];then + echo "SKIP: inet port test" + test_inet_nat=false + return + fi + echo "SKIP: Could not add $family dnat hook" + return + fi + + echo "SERVER-$family" | ip netns exec "$ns1" timeout 3 socat -u STDIN TCP-LISTEN:2000 & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 2000 "-t" + + result=$(ip netns exec "$ns0" timeout 1 socat -u TCP:"$daddr":2000 STDOUT) + + if [ "$result" = "SERVER-inet" ];then + echo "PASS: inet port rewrite without l3 address" + else + echo "ERROR: inet port rewrite without l3 address, got $result" + ret=1 + fi +} + +test_masquerade6() +{ + local family=$1 + local natflags=$2 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6" + return 1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade6 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade6 2" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade $natflags + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 3" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_masquerade6 4" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_masquerade6 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade6 6" + lret=1 + fi + done + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active ipv6 masquerade $natflags (attempt 2)" + lret=1 + fi + + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting;then + echo "ERROR: Could not flush $family nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IPv6 masquerade $natflags for $ns2" + + return $lret +} + +test_masquerade() +{ + local family=$1 + local natflags=$2 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 $natflags" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns2$dir" "$expect" "test_masquerade 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 2" + lret=1 + fi + done + + reset_counters + +# add masquerading rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade $natflags + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family masquerade $natflags" + lret=1 + fi + + # ns1 should have seen packets from ns0, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns0${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 3" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" "ns1$dir" "$expect" "test_masquerade 4" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "ns0$dir" "$expect" "test_masquerade 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns0" "ns1$dir" "$expect" "test_masquerade 6" + lret=1 + fi + done + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active ip masquerade $natflags (attempt 2)" + lret=1 + fi + + if ! ip netns exec "$ns0" nft flush chain "$family" nat postrouting; then + echo "ERROR: Could not flush $family nat postrouting" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IP masquerade $natflags for $ns2" + + return $lret +} + +test_redirect6() +{ + local family=$1 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannnot ping $ns1 from $ns2 via ipv6" + lret=1 + fi + + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns2$dir "$expect" "test_redirect6 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter "ns1${dir}" | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_redirect6 2" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 meta l4proto icmpv6 ip6 saddr dead:2::99 ip6 daddr dead:1::99 redirect + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 dead:1::99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 via ipv6 with active $family redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 3" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 104" + for dir in "in6" "out6" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect6 4" + lret=1 + fi + done + + if ! ip netns exec "$ns0" nft delete table "$family" nat;then + echo "ERROR: Could not delete $family nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IPv6 redirection for $ns2" + + return $lret +} + +test_redirect() +{ + local family=$1 + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2" + lret=1 + fi + + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" "$ns2$dir" "$expect" "test_redirect 1" + lret=1 + fi + + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_redirect 2" + lret=1 + fi + done + + reset_counters + +# add redirect rule +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain prerouting { + type nat hook prerouting priority 0; policy accept; + meta iif veth1 ip protocol icmp ip saddr 10.0.2.99 ip daddr 10.0.1.99 redirect + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family redirect hook" + return $ksft_skip + fi + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 with active $family ip redirect" + lret=1 + fi + + # ns1 should have seen no packets from ns2, due to redirection + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + + if ! ip netns exec "$ns1" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_redirect 3" + lret=1 + fi + done + + # ns0 should have seen packets from ns2, due to masquerade + expect="packets 1 bytes 84" + for dir in "in" "out" ; do + if ! ip netns exec "$ns0" nft list counter inet filter "ns2${dir}" | grep -q "$expect";then + bad_counter "$ns0" ns0$dir "$expect" "test_redirect 4" + lret=1 + fi + done + + if ! ip netns exec "$ns0" nft delete table "$family" nat;then + echo "ERROR: Could not delete $family nat table" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: $family IP redirection for $ns2" + + return $lret +} + +# test port shadowing. +# create two listening services, one on router (ns0), one +# on client (ns2), which is masqueraded from ns1 point of view. +# ns2 sends udp packet coming from service port to ns1, on a highport. +# Later, if n1 uses same highport to connect to ns0:service, packet +# might be port-forwarded to ns2 instead. + +# second argument tells if we expect the 'fake-entry' to take effect +# (CLIENT) or not (ROUTER). +test_port_shadow() +{ + local test=$1 + local expect=$2 + local daddrc="10.0.1.99" + local daddrs="10.0.1.1" + local result="" + local logmsg="" + + # make shadow entry, from client (ns2), going to (ns1), port 41404, sport 1405. + echo "fake-entry" | ip netns exec "$ns2" timeout 1 socat -u STDIN UDP:"$daddrc":41404,sourceport=1405 + + echo ROUTER | ip netns exec "$ns0" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405 2>/dev/null & + local sc_r=$! + echo CLIENT | ip netns exec "$ns2" timeout 3 socat -T 3 -u STDIN UDP4-LISTEN:1405,reuseport 2>/dev/null & + local sc_c=$! + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns0" 1405 "-u" + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns2" 1405 "-u" + + # ns1 tries to connect to ns0:1405. With default settings this should connect + # to client, it matches the conntrack entry created above. + + result=$(echo "data" | ip netns exec "$ns1" timeout 1 socat - UDP:"$daddrs":1405,sourceport=41404) + + if [ "$result" = "$expect" ] ;then + echo "PASS: portshadow test $test: got reply from ${expect}${logmsg}" + else + echo "ERROR: portshadow test $test: got reply from \"$result\", not $expect as intended" + ret=1 + fi + + kill $sc_r $sc_c 2>/dev/null + + # flush udp entries for next test round, if any + ip netns exec "$ns0" conntrack -F >/dev/null 2>&1 +} + +# This prevents port shadow of router service via packet filter, +# packets claiming to originate from service port from internal +# network are dropped. +test_port_shadow_filter() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family filter { + chain forward { + type filter hook forward priority 0; policy accept; + meta iif veth1 udp sport 1405 drop + } +} +EOF + test_port_shadow "port-filter" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" filter +} + +# This prevents port shadow of router service via notrack. +test_port_shadow_notrack() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family raw { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 udp dport 1405 notrack + } + chain output { + type filter hook output priority -300; policy accept; + meta oif veth0 udp sport 1405 notrack + } +} +EOF + test_port_shadow "port-notrack" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" raw +} + +# This prevents port shadow of router service via sport remap. +test_port_shadow_pat() +{ + local family=$1 + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family pat { + chain postrouting { + type nat hook postrouting priority -1; policy accept; + meta iif veth1 udp sport <= 1405 masquerade to : 1406-65535 random + } +} +EOF + test_port_shadow "pat" "ROUTER" + + ip netns exec "$ns0" nft delete table "$family" pat +} + +test_port_shadowing() +{ + local family="ip" + + if ! conntrack -h >/dev/null 2>&1;then + echo "SKIP: Could not run nat port shadowing test without conntrack tool" + return + fi + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run nat port shadowing test without socat tool" + return + fi + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table $family nat { + chain postrouting { + type nat hook postrouting priority 0; policy accept; + meta oif veth0 masquerade + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add add $family masquerade hook" + return $ksft_skip + fi + + # test default behaviour. Packet from ns1 to ns0 is redirected to ns2. + test_port_shadow "default" "CLIENT" + + # test packet filter based mitigation: prevent forwarding of + # packets claiming to come from the service port. + test_port_shadow_filter "$family" + + # test conntrack based mitigation: connections going or coming + # from router:service bypass connection tracking. + test_port_shadow_notrack "$family" + + # test nat based mitigation: fowarded packets coming from service port + # are masqueraded with random highport. + test_port_shadow_pat "$family" + + ip netns exec "$ns0" nft delete table $family nat +} + +test_stateless_nat_ip() +{ + local lret=0 + + ip netns exec "$ns0" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null + ip netns exec "$ns0" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null;then + echo "ERROR: cannot ping $ns1 from $ns2 before loading stateless rules" + return 1 + fi + +ip netns exec "$ns0" nft -f /dev/stdin <<EOF +table ip stateless { + map xlate_in { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth1" . 10.0.2.99 . 10.0.1.99 : 10.0.2.2, + } + } + map xlate_out { + typeof meta iifname . ip saddr . ip daddr : ip daddr + elements = { + "veth0" . 10.0.1.99 . 10.0.2.2 : 10.0.2.99 + } + } + + chain prerouting { + type filter hook prerouting priority -400; policy accept; + ip saddr set meta iifname . ip saddr . ip daddr map @xlate_in + ip daddr set meta iifname . ip saddr . ip daddr map @xlate_out + } +} +EOF + if [ $? -ne 0 ]; then + echo "SKIP: Could not add ip statless rules" + return $ksft_skip + fi + + reset_counters + + if ! ip netns exec "$ns2" ping -q -c 1 10.0.1.99 > /dev/null; then + echo "ERROR: cannot ping $ns1 from $ns2 with stateless rules" + lret=1 + fi + + # ns1 should have seen packets from .2.2, due to stateless rewrite. + expect="packets 1 bytes 84" + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 1" + lret=1 + fi + + for dir in "in" "out" ; do + if ! ip netns exec "$ns2" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns2" ns1$dir "$expect" "test_stateless 2" + lret=1 + fi + done + + # ns1 should not have seen packets from ns2, due to masquerade + expect="packets 0 bytes 0" + for dir in "in" "out" ; do + if ! ip netns exec "$ns1" nft list counter inet filter ns2${dir} | grep -q "$expect";then + bad_counter "$ns1" ns0$dir "$expect" "test_stateless 3" + lret=1 + fi + + if ! ip netns exec "$ns0" nft list counter inet filter ns1${dir} | grep -q "$expect";then + bad_counter "$ns0" ns1$dir "$expect" "test_stateless 4" + lret=1 + fi + done + + reset_counters + + if ! socat -h > /dev/null 2>&1;then + echo "SKIP: Could not run stateless nat frag test without socat tool" + if [ $lret -eq 0 ]; then + return $ksft_skip + fi + + ip netns exec "$ns0" nft delete table ip stateless + return $lret + fi + + dd if=/dev/urandom of="$INFILE" bs=4096 count=1 2>/dev/null + + ip netns exec "$ns1" timeout 3 socat -u UDP4-RECV:4233 OPEN:"$OUTFILE" < /dev/null 2>/dev/null & + + busywait $BUSYWAIT_TIMEOUT listener_ready "$ns1" 4233 "-u" + + # re-do with large ping -> ip fragmentation + if ! ip netns exec "$ns2" timeout 3 socat -u STDIN UDP4-SENDTO:"10.0.1.99:4233" < "$INFILE" > /dev/null;then + echo "ERROR: failed to test udp $ns1 to $ns2 with stateless ip nat" 1>&2 + lret=1 + fi + + wait + + if ! cmp "$INFILE" "$OUTFILE";then + ls -l "$INFILE" "$OUTFILE" + echo "ERROR: in and output file mismatch when checking udp with stateless nat" 1>&2 + lret=1 + fi + + :> "$OUTFILE" + + # ns1 should have seen packets from 2.2, due to stateless rewrite. + expect="packets 3 bytes 4164" + if ! ip netns exec "$ns1" nft list counter inet filter ns0insl | grep -q "$expect";then + bad_counter "$ns1" ns0insl "$expect" "test_stateless 5" + lret=1 + fi + + if ! ip netns exec "$ns0" nft delete table ip stateless; then + echo "ERROR: Could not delete table ip stateless" 1>&2 + lret=1 + fi + + test $lret -eq 0 && echo "PASS: IP statless for $ns2" + + return $lret +} + +# ip netns exec "$ns0" ping -c 1 -q 10.0.$i.99 +for i in "$ns0" "$ns1" "$ns2" ;do +ip netns exec "$i" nft -f /dev/stdin <<EOF +table inet filter { + counter ns0in {} + counter ns1in {} + counter ns2in {} + + counter ns0out {} + counter ns1out {} + counter ns2out {} + + counter ns0in6 {} + counter ns1in6 {} + counter ns2in6 {} + + counter ns0out6 {} + counter ns1out6 {} + counter ns2out6 {} + + map nsincounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0in", + 10.0.2.1 : "ns0in", + 10.0.1.99 : "ns1in", + 10.0.2.99 : "ns2in" } + } + + map nsincounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0in6", + dead:2::1 : "ns0in6", + dead:1::99 : "ns1in6", + dead:2::99 : "ns2in6" } + } + + map nsoutcounter { + type ipv4_addr : counter + elements = { 10.0.1.1 : "ns0out", + 10.0.2.1 : "ns0out", + 10.0.1.99: "ns1out", + 10.0.2.99: "ns2out" } + } + + map nsoutcounter6 { + type ipv6_addr : counter + elements = { dead:1::1 : "ns0out6", + dead:2::1 : "ns0out6", + dead:1::99 : "ns1out6", + dead:2::99 : "ns2out6" } + } + + chain input { + type filter hook input priority 0; policy accept; + counter name ip saddr map @nsincounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 saddr map @nsincounter6 + } + chain output { + type filter hook output priority 0; policy accept; + counter name ip daddr map @nsoutcounter + icmpv6 type { "echo-request", "echo-reply" } counter name ip6 daddr map @nsoutcounter6 + } +} +EOF +done + +# special case for stateless nat check, counter needs to +# be done before (input) ip defragmentation +ip netns exec "$ns1" nft -f /dev/stdin <<EOF +table inet filter { + counter ns0insl {} + + chain pre { + type filter hook prerouting priority -400; policy accept; + ip saddr 10.0.2.2 counter name "ns0insl" + } +} +EOF + +ping_basic() +{ + i="$1" + if ! ip netns exec "$ns0" ping -c 1 -q 10.0."$i".99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s)" 1>&2 + ret=1 + fi + + if ! ip netns exec "$ns0" ping -c 1 -q dead:"$i"::99 > /dev/null;then + echo "ERROR: Could not reach other namespace(s) via ipv6" 1>&2 + ret=1 + fi +} + +test_basic_conn() +{ + local nsexec + name="$1" + + nsexec=$(eval echo \$"$1") + + ping_basic 1 + ping_basic 2 + + if ! check_counters "$nsexec";then + return 1 + fi + + if ! check_ns0_counters "$name";then + return 1 + fi + + reset_counters + return 0 +} + +if ! test_basic_conn "ns1" ; then + echo "ERROR: basic test for ns1 failed" 1>&2 + exit 1 +fi +if ! test_basic_conn "ns2"; then + echo "ERROR: basic test for ns1 failed" 1>&2 +fi + +if [ $ret -eq 0 ];then + echo "PASS: netns routing/connectivity: $ns0 can reach $ns1 and $ns2" +fi + +reset_counters +test_local_dnat ip +test_local_dnat6 ip6 + +reset_counters +test_local_dnat_portonly inet 10.0.1.99 + +reset_counters +$test_inet_nat && test_local_dnat inet +$test_inet_nat && test_local_dnat6 inet + +for flags in "" "fully-random"; do +reset_counters +test_masquerade ip $flags +test_masquerade6 ip6 $flags +reset_counters +$test_inet_nat && test_masquerade inet $flags +$test_inet_nat && test_masquerade6 inet $flags +done + +reset_counters +test_redirect ip +test_redirect6 ip6 +reset_counters +$test_inet_nat && test_redirect inet +$test_inet_nat && test_redirect6 inet + +test_port_shadowing +test_stateless_nat_ip + +if [ $ret -ne 0 ];then + echo -n "FAIL: " + nft --version +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_nat_zones.sh b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh new file mode 100755 index 000000000000..3b81d88bdde3 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_nat_zones.sh @@ -0,0 +1,267 @@ +#!/bin/bash +# +# Test connection tracking zone and NAT source port reallocation support. +# + +source lib.sh + +# Don't increase too much, 2000 clients should work +# just fine but script can then take several minutes with +# KASAN/debug builds. +maxclients=100 + +have_socat=0 +ret=0 + +[ "$KSFT_MACHINE_SLOW" = yes ] && maxclients=40 +# client1---. +# veth1-. +# | +# NAT Gateway --veth0--> Server +# | | +# veth2-' | +# client2---' | +# .... | +# clientX----vethX---' + +# All clients share identical IP address. +# NAT Gateway uses policy routing and conntrack zones to isolate client +# namespaces. Each client connects to Server, each with colliding tuples: +# clientsaddr:10000 -> serveraddr:dport +# NAT Gateway is supposed to do port reallocation for each of the +# connections. + +v4gc1=$(sysctl -n net.ipv4.neigh.default.gc_thresh1 2>/dev/null) +v4gc2=$(sysctl -n net.ipv4.neigh.default.gc_thresh2 2>/dev/null) +v4gc3=$(sysctl -n net.ipv4.neigh.default.gc_thresh3 2>/dev/null) +v6gc1=$(sysctl -n net.ipv6.neigh.default.gc_thresh1 2>/dev/null) +v6gc2=$(sysctl -n net.ipv6.neigh.default.gc_thresh2 2>/dev/null) +v6gc3=$(sysctl -n net.ipv6.neigh.default.gc_thresh3 2>/dev/null) + +cleanup() +{ + cleanup_all_ns + + sysctl -q net.ipv4.neigh.default.gc_thresh1="$v4gc1" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh2="$v4gc2" 2>/dev/null + sysctl -q net.ipv4.neigh.default.gc_thresh3="$v4gc3" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh1="$v6gc1" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh2="$v6gc2" 2>/dev/null + sysctl -q net.ipv6.neigh.default.gc_thresh3="$v6gc3" 2>/dev/null +} + +checktool "nft --version" echo "run test without nft tool" +checktool "conntrack -V" "run test without conntrack tool" + +if socat -h >/dev/null 2>&1; then + have_socat=1 +fi + +setup_ns gw srv + +trap cleanup EXIT + +ip link add veth0 netns "$gw" type veth peer name eth0 netns "$srv" +ip -net "$gw" link set veth0 up +ip -net "$srv" link set eth0 up + +sysctl -q net.ipv6.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv6.neigh.default.gc_thresh3=4096 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh1=512 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh2=1024 2>/dev/null +sysctl -q net.ipv4.neigh.default.gc_thresh3=4096 2>/dev/null + +for i in $(seq 1 "$maxclients");do + setup_ns "cl$i" + + cl=$(eval echo \$cl"$i") + if ! ip link add veth"$i" netns "$gw" type veth peer name eth0 netns "$cl" > /dev/null 2>&1;then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip + fi +done + +for i in $(seq 1 "$maxclients");do + cl=$(eval echo \$cl"$i") + echo netns exec "$cl" ip link set eth0 up + echo netns exec "$cl" sysctl -q net.ipv4.tcp_syn_retries=2 + echo netns exec "$gw" ip link set "veth$i" up + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".arp_ignore=2 + echo netns exec "$gw" sysctl -q net.ipv4.conf.veth"$i".rp_filter=0 + + # clients have same IP addresses. + echo netns exec "$cl" ip addr add 10.1.0.3/24 dev eth0 + echo netns exec "$cl" ip addr add dead:1::3/64 dev eth0 nodad + echo netns exec "$cl" ip route add default via 10.1.0.2 dev eth0 + echo netns exec "$cl" ip route add default via dead:1::2 dev eth0 + + # NB: same addresses on client-facing interfaces. + echo netns exec "$gw" ip addr add 10.1.0.2/24 dev "veth$i" + echo netns exec "$gw" ip addr add dead:1::2/64 dev "veth$i" nodad + + # gw: policy routing + echo netns exec "$gw" ip route add 10.1.0.0/24 dev "veth$i" table $((1000+i)) + echo netns exec "$gw" ip route add dead:1::0/64 dev "veth$i" table $((1000+i)) + echo netns exec "$gw" ip route add 10.3.0.0/24 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip route add dead:3::0/64 dev veth0 table $((1000+i)) + echo netns exec "$gw" ip rule add fwmark "$i" lookup $((1000+i)) +done | ip -batch /dev/stdin + +ip -net "$gw" addr add 10.3.0.1/24 dev veth0 +ip -net "$gw" addr add dead:3::1/64 dev veth0 nodad + +ip -net "$srv" addr add 10.3.0.99/24 dev eth0 +ip -net "$srv" addr add dead:3::99/64 dev eth0 nodad + +ip netns exec "$gw" nft -f /dev/stdin<<EOF +table inet raw { + map iiftomark { + type ifname : mark + } + + map iiftozone { + typeof iifname : ct zone + } + + set inicmp { + flags dynamic + type ipv4_addr . ifname . ipv4_addr + } + set inflows { + flags dynamic + type ipv4_addr . inet_service . ifname . ipv4_addr . inet_service + } + + set inflows6 { + flags dynamic + type ipv6_addr . inet_service . ifname . ipv6_addr . inet_service + } + + chain prerouting { + type filter hook prerouting priority -64000; policy accept; + ct original zone set meta iifname map @iiftozone + meta mark set meta iifname map @iiftomark + + tcp flags & (syn|ack) == ack add @inflows { ip saddr . tcp sport . meta iifname . ip daddr . tcp dport counter } + add @inflows6 { ip6 saddr . tcp sport . meta iifname . ip6 daddr . tcp dport counter } + ip protocol icmp add @inicmp { ip saddr . meta iifname . ip daddr counter } + } + + chain nat_postrouting { + type nat hook postrouting priority 0; policy accept; + ct mark set meta mark meta oifname veth0 masquerade + } + + chain mangle_prerouting { + type filter hook prerouting priority -100; policy accept; + ct direction reply meta mark set ct mark + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Could not add nftables rules" + exit $ksft_skip +fi + +( echo add element inet raw iiftomark \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth"$i"\" : "$i", + done + echo \"veth"$maxclients"\" : "$maxclients" \} + echo add element inet raw iiftozone \{ + for i in $(seq 1 $((maxclients-1))); do + echo \"veth"$i"\" : "$i", + done + echo \"veth$maxclients\" : $maxclients \} +) | ip netns exec "$gw" nft -f /dev/stdin + +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv4.conf.all.rp_filter=0 >/dev/null + +# useful for debugging: allows to use 'ping' from clients to gateway. +ip netns exec "$gw" sysctl -q net.ipv4.fwmark_reflect=1 > /dev/null +ip netns exec "$gw" sysctl -q net.ipv6.fwmark_reflect=1 > /dev/null + +for i in $(seq 1 "$maxclients"); do + cl=$(eval echo \$cl"$i") + ip netns exec "$cl" ping -i 0.5 -q -c 3 10.3.0.99 > /dev/null 2>&1 & +done + +wait || ret=1 + +[ "$ret" -ne 0 ] && "FAIL: Ping failure from $cl" 1>&2 + +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" | grep -q "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 counter packets 3 bytes 252 }"; then + ret=1 + echo "FAIL: counter icmp mismatch for veth$i" 1>&2 + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.1.0.3 . \"veth$i\" . 10.3.0.99 }" 1>&2 + break + fi +done + +if ! ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 }" | grep -q "{ 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }"; then + ret=1 + echo "FAIL: counter icmp mismatch for veth0: { 10.3.0.99 . \"veth0\" . 10.3.0.1 counter packets $((3 * maxclients)) bytes $((252 * maxclients)) }" + ip netns exec "$gw" nft get element inet raw inicmp "{ 10.3.99 . \"veth0\" . 10.3.0.1 }" 1>&2 +fi + +if [ $ret -eq 0 ]; then + echo "PASS: ping test from all $maxclients namespaces" +fi + +if [ $have_socat -eq 0 ];then + echo "SKIP: socat not installed" + if [ $ret -ne 0 ];then + exit $ret + fi + exit $ksft_skip +fi + +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :5201" | grep -q 5201 +} + +ip netns exec "$srv" socat -u TCP-LISTEN:5201,fork STDOUT > /dev/null 2>/dev/null & +socatpid=$! + +busywait 1000 listener_ready "$srv" + +for i in $(seq 1 "$maxclients"); do + if [ $ret -ne 0 ]; then + break + fi + cl=$(eval echo \$cl"$i") + if ! ip netns exec "$cl" socat -4 -u STDIN TCP:10.3.0.99:5201,sourceport=10000 < /dev/null > /dev/null; then + echo "FAIL: Failure to connect for $cl" 1>&2 + ip netns exec "$gw" conntrack -S 1>&2 + ret=1 + fi +done +if [ $ret -eq 0 ];then + echo "PASS: socat connections for all $maxclients net namespaces" +fi + +kill $socatpid +wait + +for i in $(seq 1 "$maxclients"); do + if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.1.0.3 . 10000 . \"veth$i\" . 10.3.0.99 . 5201 }" > /dev/null;then + ret=1 + echo "FAIL: can't find expected tcp entry for veth$i" 1>&2 + break + fi +done +if [ $ret -eq 0 ];then + echo "PASS: Found client connection for all $maxclients net namespaces" +fi + +if ! ip netns exec "$gw" nft get element inet raw inflows "{ 10.3.0.99 . 5201 . \"veth0\" . 10.3.0.1 . 10000 }" > /dev/null;then + ret=1 + echo "FAIL: cannot find return entry on veth0" 1>&2 +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_queue.sh b/tools/testing/selftests/net/netfilter/nft_queue.sh new file mode 100755 index 000000000000..8538f08c64c2 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_queue.sh @@ -0,0 +1,417 @@ +#!/bin/bash +# +# This tests nf_queue: +# 1. can process packets from all hooks +# 2. support running nfqueue from more than one base chain +# +# shellcheck disable=SC2162,SC2317 + +source lib.sh +ret=0 +timeout=2 + +cleanup() +{ + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + ip netns pids "$nsrouter" | xargs kill 2>/dev/null + + cleanup_all_ns + + rm -f "$TMPINPUT" + rm -f "$TMPFILE0" + rm -f "$TMPFILE1" + rm -f "$TMPFILE2" "$TMPFILE3" +} + +checktool "nft --version" "test without nft tool" + +trap cleanup EXIT + +setup_ns ns1 ns2 nsrouter + +TMPFILE0=$(mktemp) +TMPFILE1=$(mktemp) +TMPFILE2=$(mktemp) +TMPFILE3=$(mktemp) + +TMPINPUT=$(mktemp) +dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" + +if ! ip link add veth0 netns "$nsrouter" type veth peer name eth0 netns "$ns1" > /dev/null 2>&1; then + echo "SKIP: No virtual ethernet pair device support in kernel" + exit $ksft_skip +fi +ip link add veth1 netns "$nsrouter" type veth peer name eth0 netns "$ns2" + +ip -net "$nsrouter" link set veth0 up +ip -net "$nsrouter" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsrouter" addr add dead:1::1/64 dev veth0 nodad + +ip -net "$nsrouter" link set veth1 up +ip -net "$nsrouter" addr add 10.0.2.1/24 dev veth1 +ip -net "$nsrouter" addr add dead:2::1/64 dev veth1 nodad + +ip -net "$ns1" link set eth0 up +ip -net "$ns2" link set eth0 up + +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns1" addr add dead:1::99/64 dev eth0 nodad +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns1" route add default via dead:1::1 + +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns2" addr add dead:2::99/64 dev eth0 nodad +ip -net "$ns2" route add default via 10.0.2.1 +ip -net "$ns2" route add default via dead:2::1 + +load_ruleset() { + local name=$1 + local prio=$2 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet $name { + chain nfq { + ip protocol icmp queue bypass + icmpv6 type { "echo-request", "echo-reply" } queue num 1 bypass + } + chain pre { + type filter hook prerouting priority $prio; policy accept; + jump nfq + } + chain input { + type filter hook input priority $prio; policy accept; + jump nfq + } + chain forward { + type filter hook forward priority $prio; policy accept; + tcp dport 12345 queue num 2 + jump nfq + } + chain output { + type filter hook output priority $prio; policy accept; + tcp dport 12345 queue num 3 + tcp sport 23456 queue num 3 + jump nfq + } + chain post { + type filter hook postrouting priority $prio; policy accept; + jump nfq + } +} +EOF +} + +load_counter_ruleset() { + local prio=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table inet countrules { + chain pre { + type filter hook prerouting priority $prio; policy accept; + counter + } + chain input { + type filter hook input priority $prio; policy accept; + counter + } + chain forward { + type filter hook forward priority $prio; policy accept; + counter + } + chain output { + type filter hook output priority $prio; policy accept; + counter + } + chain post { + type filter hook postrouting priority $prio; policy accept; + counter + } +} +EOF +} + +test_ping() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + return 1 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::99 > /dev/null; then + return 2 + fi + + return 0 +} + +test_ping_router() { + if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.1 > /dev/null; then + return 3 + fi + + if ! ip netns exec "$ns1" ping -c 1 -q dead:2::1 > /dev/null; then + return 4 + fi + + return 0 +} + +test_queue_blackhole() { + local proto=$1 + +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +table $proto blackh { + chain forward { + type filter hook forward priority 0; policy accept; + queue num 600 + } +} +EOF + if [ "$proto" = "ip" ] ;then + ip netns exec "$ns1" ping -W 2 -c 1 -q 10.0.2.99 > /dev/null + lret=$? + elif [ "$proto" = "ip6" ]; then + ip netns exec "$ns1" ping -W 2 -c 1 -q dead:2::99 > /dev/null + lret=$? + else + lret=111 + fi + + # queue without bypass keyword should drop traffic if no listener exists. + if [ "$lret" -eq 0 ];then + echo "FAIL: $proto expected failure, got $lret" 1>&2 + exit 1 + fi + + if ! ip netns exec "$nsrouter" nft delete table "$proto" blackh; then + echo "FAIL: $proto: Could not delete blackh table" + exit 1 + fi + + echo "PASS: $proto: statement with no listener results in packet drop" +} + +nf_queue_wait() +{ + local procfile="/proc/self/net/netfilter/nfnetlink_queue" + local netns id + + netns="$1" + id="$2" + + # if this file doesn't exist, nfnetlink_module isn't loaded. + # rather than loading it ourselves, wait for kernel module autoload + # completion, nfnetlink should do so automatically because nf_queue + # helper program, spawned in the background, asked for this functionality. + test -f "$procfile" && + ip netns exec "$netns" cat "$procfile" | grep -q "^ *$id " +} + +test_queue() +{ + local expected="$1" + local last="" + + # spawn nf_queue listeners + ip netns exec "$nsrouter" ./nf_queue -c -q 0 -t $timeout > "$TMPFILE0" & + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t $timeout > "$TMPFILE1" & + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 0 + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 1 + + if ! test_ping;then + echo "FAIL: netns routing/connectivity with active listener on queues 0 and 1: $ret" 1>&2 + exit $ret + fi + + if ! test_ping_router;then + echo "FAIL: netns router unreachable listener on queue 0 and 1: $ret" 1>&2 + exit $ret + fi + + wait + ret=$? + + for file in $TMPFILE0 $TMPFILE1; do + last=$(tail -n1 "$file") + if [ x"$last" != x"$expected packets total" ]; then + echo "FAIL: Expected $expected packets total, but got $last" 1>&2 + ip netns exec "$nsrouter" nft list ruleset + exit 1 + fi + done + + echo "PASS: Expected and received $last" +} + +listener_ready() +{ + ss -N "$1" -lnt -o "sport = :12345" | grep -q 12345 +} + +test_tcp_forward() +{ + ip netns exec "$nsrouter" ./nf_queue -q 2 -t "$timeout" & + local nfqpid=$! + + timeout 5 ip netns exec "$ns2" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$ns2" + + ip netns exec "$ns1" socat -u STDIN TCP:10.0.2.99:12345 <"$TMPINPUT" >/dev/null + + wait "$rpid" && echo "PASS: tcp and nfqueue in forward chain" +} + +test_tcp_localhost() +{ + dd conv=sparse status=none if=/dev/zero bs=1M count=200 of="$TMPINPUT" + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" >/dev/null + + wait "$rpid" && echo "PASS: tcp via loopback" + wait 2>/dev/null +} + +test_tcp_localhost_connectclose() +{ + ip netns exec "$nsrouter" ./connect_close -p 23456 -t "$timeout" & + ip netns exec "$nsrouter" ./nf_queue -q 3 -t "$timeout" & + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$nsrouter" 3 + + wait && echo "PASS: tcp via loopback with connect/close" + wait 2>/dev/null +} + +test_tcp_localhost_requeue() +{ +ip netns exec "$nsrouter" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } + chain post { + type filter hook postrouting priority 0; policy accept; + tcp dport 12345 limit rate 1/second burst 1 packets counter queue num 0 + } +} +EOF + timeout 5 ip netns exec "$nsrouter" socat -u TCP-LISTEN:12345 STDOUT >/dev/null & + local rpid=$! + + ip netns exec "$nsrouter" ./nf_queue -c -q 1 -t "$timeout" > "$TMPFILE2" & + + # nfqueue 1 will be called via output hook. But this time, + # re-queue the packet to nfqueue program on queue 2. + ip netns exec "$nsrouter" ./nf_queue -G -d 150 -c -q 0 -Q 1 -t "$timeout" > "$TMPFILE3" & + + busywait "$BUSYWAIT_TIMEOUT" listener_ready "$nsrouter" + ip netns exec "$nsrouter" socat -u STDIN TCP:127.0.0.1:12345 <"$TMPINPUT" > /dev/null + + wait + + if ! diff -u "$TMPFILE2" "$TMPFILE3" ; then + echo "FAIL: lost packets during requeue?!" 1>&2 + return + fi + + echo "PASS: tcp via loopback and re-queueing" +} + +test_icmp_vrf() { + if ! ip -net "$ns1" link add tvrf type vrf table 9876;then + echo "SKIP: Could not add vrf device" + return + fi + + ip -net "$ns1" li set eth0 master tvrf + ip -net "$ns1" li set tvrf up + + ip -net "$ns1" route add 10.0.2.0/24 via 10.0.1.1 dev eth0 table 9876 +ip netns exec "$ns1" nft -f /dev/stdin <<EOF +flush ruleset +table inet filter { + chain output { + type filter hook output priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } + chain post { + type filter hook postrouting priority 0; policy accept; + meta oifname "tvrf" icmp type echo-request counter queue num 1 + meta oifname "eth0" icmp type echo-request counter queue num 1 + } +} +EOF + ip netns exec "$ns1" ./nf_queue -q 1 -t "$timeout" & + local nfqpid=$! + + busywait "$BUSYWAIT_TIMEOUT" nf_queue_wait "$ns1" 1 + + ip netns exec "$ns1" ip vrf exec tvrf ping -c 1 10.0.2.99 > /dev/null + + for n in output post; do + for d in tvrf eth0; do + if ! ip netns exec "$ns1" nft list chain inet filter "$n" | grep -q "oifname \"$d\" icmp type echo-request counter packets 1"; then + echo "FAIL: chain $n: icmp packet counter mismatch for device $d" 1>&2 + ip netns exec "$ns1" nft list ruleset + ret=1 + return + fi + done + done + + wait "$nfqpid" && echo "PASS: icmp+nfqueue via vrf" + wait 2>/dev/null +} + +ip netns exec "$nsrouter" sysctl net.ipv6.conf.all.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth0.forwarding=1 > /dev/null +ip netns exec "$nsrouter" sysctl net.ipv4.conf.veth1.forwarding=1 > /dev/null + +load_ruleset "filter" 0 + +if test_ping; then + # queue bypass works (rules were skipped, no listener) + echo "PASS: ${ns1} can reach ${ns2}" +else + echo "FAIL: ${ns1} cannot reach ${ns2}: $ret" 1>&2 + exit $ret +fi + +test_queue_blackhole ip +test_queue_blackhole ip6 + +# dummy ruleset to add base chains between the +# queueing rules. We don't want the second reinject +# to re-execute the old hooks. +load_counter_ruleset 10 + +# we are hooking all: prerouting/input/forward/output/postrouting. +# we ping ${ns2} from ${ns1} via ${nsrouter} using ipv4 and ipv6, so: +# 1x icmp prerouting,forward,postrouting -> 3 queue events (6 incl. reply). +# 1x icmp prerouting,input,output postrouting -> 4 queue events incl. reply. +# so we expect that userspace program receives 10 packets. +test_queue 10 + +# same. We queue to a second program as well. +load_ruleset "filter2" 20 +test_queue 20 + +test_tcp_forward +test_tcp_localhost +test_tcp_localhost_connectclose +test_tcp_localhost_requeue +test_icmp_vrf + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_synproxy.sh b/tools/testing/selftests/net/netfilter/nft_synproxy.sh new file mode 100755 index 000000000000..293f667a6aec --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_synproxy.sh @@ -0,0 +1,96 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +source lib.sh + +ret=0 + +checktool "nft --version" "run test without nft tool" +checktool "iperf3 --version" "run test without iperf3" + +setup_ns nsr ns1 ns2 + +modprobe -q nf_conntrack + +cleanup() { + ip netns pids "$ns1" | xargs kill 2>/dev/null + ip netns pids "$ns2" | xargs kill 2>/dev/null + + cleanup_all_ns +} + +trap cleanup EXIT + +ip link add veth0 netns "$nsr" type veth peer name eth0 netns "$ns1" +ip link add veth1 netns "$nsr" type veth peer name eth0 netns "$ns2" + +for dev in veth0 veth1; do + ip -net "$nsr" link set "$dev" up +done + +ip -net "$nsr" addr add 10.0.1.1/24 dev veth0 +ip -net "$nsr" addr add 10.0.2.1/24 dev veth1 + +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth0.forwarding=1 +ip netns exec "$nsr" sysctl -q net.ipv4.conf.veth1.forwarding=1 +ip netns exec "$nsr" sysctl -q net.netfilter.nf_conntrack_tcp_loose=0 + +for n in $ns1 $ns2; do + ip -net "$n" link set eth0 up +done +ip -net "$ns1" addr add 10.0.1.99/24 dev eth0 +ip -net "$ns2" addr add 10.0.2.99/24 dev eth0 +ip -net "$ns1" route add default via 10.0.1.1 +ip -net "$ns2" route add default via 10.0.2.1 + +# test basic connectivity +if ! ip netns exec "$ns1" ping -c 1 -q 10.0.2.99 > /dev/null; then + echo "ERROR: $ns1 cannot reach $ns2" 1>&2 + exit 1 +fi + +if ! ip netns exec "$ns2" ping -c 1 -q 10.0.1.99 > /dev/null; then + echo "ERROR: $ns2 cannot reach $ns1" 1>&2 + exit 1 +fi + +ip netns exec "$ns2" iperf3 -s > /dev/null 2>&1 & +# ip netns exec $nsr tcpdump -vvv -n -i veth1 tcp | head -n 10 & + +sleep 1 + +ip netns exec "$nsr" nft -f - <<EOF +table inet filter { + chain prerouting { + type filter hook prerouting priority -300; policy accept; + meta iif veth0 tcp flags syn counter notrack + } + + chain forward { + type filter hook forward priority 0; policy accept; + + ct state new,established counter accept + + meta iif veth0 meta l4proto tcp ct state untracked,invalid synproxy mss 1460 sack-perm timestamp + + ct state invalid counter drop + + # make ns2 unreachable w.o. tcp synproxy + tcp flags syn counter drop + } +} +EOF +if [ $? -ne 0 ]; then + echo "SKIP: Cannot add nft synproxy" + exit $ksft_skip +fi + +if ! ip netns exec "$ns1" timeout 5 iperf3 -c 10.0.2.99 -n $((1 * 1024 * 1024)) > /dev/null; then + echo "FAIL: iperf3 returned an error" 1>&2 + ret=1 + ip netns exec "$nsr" nft list ruleset +else + echo "PASS: synproxy connection successful" +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/nft_zones_many.sh b/tools/testing/selftests/net/netfilter/nft_zones_many.sh new file mode 100755 index 000000000000..7db9982ba5a6 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/nft_zones_many.sh @@ -0,0 +1,164 @@ +#!/bin/bash + +# Test insertion speed for packets with identical addresses/ports +# that are all placed in distinct conntrack zones. + +source lib.sh + +zones=2000 +[ "$KSFT_MACHINE_SLOW" = yes ] && zones=500 + +have_ct_tool=0 +ret=0 + +cleanup() +{ + cleanup_all_ns +} + +checktool "nft --version" "run test without nft tool" +checktool "socat -V" "run test without socat tool" + +setup_ns ns1 + +trap cleanup EXIT + +if conntrack -V > /dev/null 2>&1; then + have_ct_tool=1 +fi + +test_zones() { + local max_zones=$1 + +ip netns exec "$ns1" nft -f /dev/stdin<<EOF +flush ruleset +table inet raw { + map rndzone { + typeof numgen inc mod $max_zones : ct zone + } + + chain output { + type filter hook output priority -64000; policy accept; + udp dport 12345 ct zone set numgen inc mod 65536 map @rndzone + } +} +EOF +if [ "$?" -ne 0 ];then + echo "SKIP: Cannot add nftables rules" + exit $ksft_skip +fi + + ip netns exec "$ns1" sysctl -q net.netfilter.nf_conntrack_udp_timeout=3600 + + ( + echo "add element inet raw rndzone {" + for i in $(seq 1 "$max_zones");do + echo -n "$i : $i" + if [ "$i" -lt "$max_zones" ]; then + echo "," + else + echo "}" + fi + done + ) | ip netns exec "$ns1" nft -f /dev/stdin + + local i=0 + local j=0 + local outerstart + local stop + outerstart=$(date +%s%3N) + stop=$outerstart + + while [ "$i" -lt "$max_zones" ]; do + local start + start=$(date +%s%3N) + i=$((i + 1000)) + j=$((j + 1)) + # nft rule in output places each packet in a different zone. + dd if=/dev/zero bs=8k count=1000 2>/dev/null | ip netns exec "$ns1" socat -u STDIN UDP:127.0.0.1:12345,sourceport=12345 + if [ $? -ne 0 ] ;then + ret=1 + break + fi + + stop=$(date +%s%3N) + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total, loop $j)" + done + + if [ "$have_ct_tool" -eq 1 ]; then + local count duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) + + if [ "$count" -ge "$max_zones" ]; then + echo "PASS: inserted $count entries from packet path in $duration ms total" + else + ip netns exec "$ns1" conntrack -S 1>&2 + echo "FAIL: inserted $count entries from packet path in $duration ms total, expected $max_zones entries" + ret=1 + fi + fi + + if [ $ret -ne 0 ];then + echo "FAIL: insert $max_zones entries from packet path" 1>&2 + fi +} + +test_conntrack_tool() { + local max_zones=$1 + + ip netns exec "$ns1" conntrack -F >/dev/null 2>/dev/null + + local outerstart start stop i + outerstart=$(date +%s%3N) + start=$(date +%s%3N) + stop="$start" + i=0 + while [ "$i" -lt "$max_zones" ]; do + i=$((i + 1)) + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i >/dev/null 2>&1 + if [ $? -ne 0 ];then + ip netns exec "$ns1" conntrack -I -s 1.1.1.1 -d 2.2.2.2 --protonum 6 \ + --timeout 3600 --state ESTABLISHED --sport 12345 --dport 1000 --zone $i > /dev/null + echo "FAIL: conntrack -I returned an error" + ret=1 + break + fi + + if [ $((i%1000)) -eq 0 ];then + stop=$(date +%s%3N) + + local duration=$((stop-start)) + echo "PASS: added 1000 entries in $duration ms (now $i total)" + start=$stop + fi + done + + local count + local duration + count=$(ip netns exec "$ns1" conntrack -C) + duration=$((stop-outerstart)) + + if [ "$count" -eq "$max_zones" ]; then + echo "PASS: inserted $count entries via ctnetlink in $duration ms" + else + ip netns exec "$ns1" conntrack -S 1>&2 + echo "FAIL: inserted $count entries via ctnetlink in $duration ms, expected $max_zones entries ($duration ms)" + ret=1 + fi +} + +test_zones $zones + +if [ "$have_ct_tool" -eq 1 ];then + test_conntrack_tool $zones +else + echo "SKIP: Could not run ctnetlink insertion test without conntrack tool" + if [ $ret -eq 0 ];then + exit $ksft_skip + fi +fi + +exit $ret diff --git a/tools/testing/selftests/net/netfilter/packetdrill/common.sh b/tools/testing/selftests/net/netfilter/packetdrill/common.sh new file mode 100755 index 000000000000..ed36d535196d --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/common.sh @@ -0,0 +1,33 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# for debugging set net.netfilter.nf_log_all_netns=1 in init_net +# or do not use net namespaces. +modprobe -q nf_conntrack +sysctl -q net.netfilter.nf_conntrack_log_invalid=6 + +# Flush old cached data (fastopen cookies). +ip tcp_metrics flush all > /dev/null 2>&1 + +# TCP min, default, and max receive and send buffer sizes. +sysctl -q net.ipv4.tcp_rmem="4096 540000 $((15*1024*1024))" +sysctl -q net.ipv4.tcp_wmem="4096 $((256*1024)) 4194304" + +# TCP congestion control. +sysctl -q net.ipv4.tcp_congestion_control=cubic + +# TCP slow start after idle. +sysctl -q net.ipv4.tcp_slow_start_after_idle=0 + +# TCP Explicit Congestion Notification (ECN) +sysctl -q net.ipv4.tcp_ecn=0 + +sysctl -q net.ipv4.tcp_notsent_lowat=4294967295 > /dev/null 2>&1 + +# Override the default qdisc on the tun device. +# Many tests fail with timing errors if the default +# is FQ and that paces their flows. +tc qdisc add dev tun0 root pfifo + +# Enable conntrack +$xtables -A INPUT -m conntrack --ctstate NEW -p tcp --syn diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt new file mode 100644 index 000000000000..d755bd64c54f --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_ack_loss_stall.pkt @@ -0,0 +1,118 @@ +// check that already-acked (retransmitted) packet is let through rather +// than tagged as INVALID. + +`packetdrill/common.sh` + +// should set -P DROP but it disconnects VM w.o. extra netns ++0 `$xtables -A INPUT -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000> ++0 > S. 0:0(0) ack 1 <mss 1460> ++.01 < . 1:1(0) ack 1 win 65535 ++0 accept(3, ..., ...) = 4 + ++0.0001 < P. 1:1461(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 1461 win 65535 ++0.0001 < P. 1461:2921(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 2921 win 65535 ++0.0001 < P. 2921:4381(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 4381 win 65535 ++0.0001 < P. 4381:5841(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 5841 win 65535 ++0.0001 < P. 5841:7301(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 7301 win 65535 ++0.0001 < P. 7301:8761(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 8761 win 65535 ++0.0001 < P. 8761:10221(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 10221 win 65535 ++0.0001 < P. 10221:11681(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 11681 win 65535 ++0.0001 < P. 11681:13141(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 13141 win 65535 ++0.0001 < P. 13141:14601(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 14601 win 65535 ++0.0001 < P. 14601:16061(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 16061 win 65535 ++0.0001 < P. 16061:17521(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 17521 win 65535 ++0.0001 < P. 17521:18981(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 18981 win 65535 ++0.0001 < P. 18981:20441(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 20441 win 65535 ++0.0001 < P. 20441:21901(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 21901 win 65535 ++0.0001 < P. 21901:23361(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 23361 win 65535 ++0.0001 < P. 23361:24821(1460) ack 1 win 257 +0.055 > . 1:1(0) ack 24821 win 65535 ++0.0001 < P. 24821:26281(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 26281 win 65535 ++0.0001 < P. 26281:27741(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 27741 win 65535 ++0.0001 < P. 27741:29201(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 29201 win 65535 ++0.0001 < P. 29201:30661(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 30661 win 65535 ++0.0001 < P. 30661:32121(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 32121 win 65535 ++0.0001 < P. 32121:33581(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 33581 win 65535 ++0.0001 < P. 33581:35041(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 35041 win 65535 ++0.0001 < P. 35041:36501(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 36501 win 65535 ++0.0001 < P. 36501:37961(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 37961 win 65535 ++0.0001 < P. 37961:39421(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 39421 win 65535 ++0.0001 < P. 39421:40881(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 40881 win 65535 ++0.0001 < P. 40881:42341(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 42341 win 65535 ++0.0001 < P. 42341:43801(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 43801 win 65535 ++0.0001 < P. 43801:45261(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 45261 win 65535 ++0.0001 < P. 45261:46721(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 46721 win 65535 ++0.0001 < P. 46721:48181(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 48181 win 65535 ++0.0001 < P. 48181:49641(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 49641 win 65535 ++0.0001 < P. 49641:51101(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 51101 win 65535 ++0.0001 < P. 51101:52561(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 52561 win 65535 ++0.0001 < P. 52561:54021(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 54021 win 65535 ++0.0001 < P. 54021:55481(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 55481 win 65535 ++0.0001 < P. 55481:56941(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 56941 win 65535 ++0.0001 < P. 56941:58401(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 58401 win 65535 ++0.0001 < P. 58401:59861(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 59861 win 65535 ++0.0001 < P. 59861:61321(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 61321 win 65535 ++0.0001 < P. 61321:62781(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 62781 win 65535 ++0.0001 < P. 62781:64241(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 64241 win 65535 ++0.0001 < P. 64241:65701(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 65701 win 65535 ++0.0001 < P. 65701:67161(1460) ack 1 win 257 ++.0 > . 1:1(0) ack 67161 win 65535 + +// nf_ct_proto_6: SEQ is under the lower bound (already ACKed data retransmitted) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.24.72 LEN=1500 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=34375 DPT=8080 SEQ=1 ACK=4162510439 WINDOW=257 RES=0x00 ACK PSH URGP=0 ++0.0001 < P. 1:1461(1460) ack 1 win 257 + +// only sent if above packet isn't flagged as invalid ++.0 > . 1:1(0) ack 67161 win 65535 + ++0 `$xtables -D INPUT -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt new file mode 100644 index 000000000000..dccdd4c009c6 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_inexact_rst.pkt @@ -0,0 +1,62 @@ +// check RST packet that doesn't exactly match expected next sequence +// number still transitions conntrack state to CLOSE iff its already in +// FIN/CLOSE_WAIT. + +`packetdrill/common.sh` + +// 5.771921 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.771994 server_ip > client_ip TLSv1.2 337 [Packet size limited during capture] +// 5.772212 client_ip > server_ip TCP 66 45020 > 443 [ACK] Seq=1905874048 Ack=781810658 Win=36352 Len=0 TSval=3317842872 TSecr=675936334 +// 5.787924 server_ip > client_ip TLSv1.2 1300 [Packet size limited during capture] +// 5.788126 server_ip > client_ip TLSv1.2 90 Application Data +// 5.788207 server_ip > client_ip TCP 66 443 > 45020 [FIN, ACK] Seq=781811916 Ack=1905874048 Win=31104 Len=0 TSval=675936350 TSecr=3317842872 +// 5.788447 client_ip > server_ip TLSv1.2 90 Application Data +// 5.788479 client_ip > server_ip TCP 66 45020 > 443 [RST, ACK] Seq=1905874072 Ack=781811917 Win=39040 Len=0 TSval=3317842889 TSecr=675936350 +// 5.788581 server_ip > client_ip TCP 54 8443 > 45020 [RST] Seq=781811892 Win=0 Len=0 + ++0 `iptables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `iptables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 + +// Conntrack should move to FIN_WAIT, then CLOSE_WAIT. ++0 < F. 3001:3001(0) ack 1001 win 65535 ++0 > . 1001:1001(0) ack 3002 win 65535 + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE_WAIT` + ++1 close(3) = 0 +// RST: unread data. FIN was seen, hence ack + 1 ++0 > R. 1001:1001(0) ack 3002 win 65535 +// ... and then, CLOSE. ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` + +// Spurious RST from peer -- no sk state. Should NOT get +// marked INVALID, because conntrack is already closing. ++0.1 < R 2001:2001(0) win 0 + +// No packets should have been marked INVALID ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `iptables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt new file mode 100644 index 000000000000..686f18a3d9ef --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_rst_invalid.pkt @@ -0,0 +1,59 @@ +// check that out of window resets are marked as INVALID and conntrack remains +// in ESTABLISHED state. + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) + +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + ++0.1 < S. 1:1(0) ack 1 win 65535 <mss 1460> + ++0 > . 1:1(0) ack 1 win 65535 ++0 < . 1:1001(1000) ack 1 win 65535 ++0 < . 1001:2001(1000) ack 1 win 65535 ++0 < . 2001:3001(1000) ack 1 win 65535 + ++0 > . 1:1(0) ack 1001 win 65535 ++0 > . 1:1(0) ack 2001 win 65535 ++0 > . 1:1(0) ack 3001 win 65535 + ++0 write(3, ..., 1000) = 1000 + +// out of window ++0.0 < R 0:0(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// out of window ++0.0 < R 1000000:1000000(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// in-window but not exact match ++0.0 < R 42:42(0) win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0.0 > P. 1:1001(1000) ack 3001 win 65535 + ++0.1 read(3, ..., 1000) = 1000 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + ++0 < . 3001:3001(0) ack 1001 win 65535 + ++0.0 < R. 3000:3000(0) ack 1001 win 0 ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q ESTABLISHED` + +// exact next sequence ++0.0 < R. 3001:3001(0) ack 1001 win 0 +// Conntrack should move to CLOSE + +// Expect four invalid RSTs ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 4 "` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null |grep -q CLOSE\ ` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt new file mode 100644 index 000000000000..3442cd29bc93 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_syn_challenge_ack.pkt @@ -0,0 +1,44 @@ +// Check connection re-use, i.e. peer that receives the SYN answers with +// a challenge-ACK. +// Check that conntrack lets all packets pass, including the challenge ack, +// and that a new connection is established. + +`packetdrill/common.sh` + +// S > +// . < (challnge-ack) +// R. > +// S > +// S. < +// Expected outcome: established connection. + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// Challenge ACK, old incarnation. +0.1 < . 145824453:145824453(0) ack 643160523 win 240 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + ++0.01 > R 643160523:643160523(0) win 0 + ++0.01 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// Must go through. ++0.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// correct synack ++0.1 < S. 0:0(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// 3whs completes. ++0.01 > . 1:1(0) ack 1 win 256 <nop,nop,TS val 1 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + +// No packets should have been marked INVALID ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt new file mode 100644 index 000000000000..3047160c4bf3 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_old.pkt @@ -0,0 +1,51 @@ +// Check conntrack copes with syn/ack reply for a previous, old incarnation. + +// tcpdump with buggy sequence +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083107423 ecr 0,nop,wscale 7], length 0 +// OLD synack, for old/previous S +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 145824453, ack 643160523, win 65535, options [mss 8952,nop,wscale 5,TS val 3215437785 ecr 2082921663,nop,nop], length 0 +// This reset never makes it to the endpoint, elided in the packetdrill script +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [R.], seq 1, ack 1, win 65535, options [mss 8952,nop,wscale 5,TS val 3215443451 ecr 2082921663,nop,nop], length 0 +// Syn retransmit, no change +// 10.176.25.8.829 > 10.192.171.30.2049: Flags [S], seq 2375731741, win 29200, options [mss 1460,sackOK,TS val 2083115583 ecr 0,nop,wscale 7], length 0 +// CORRECT synack, should be accepted, but conntrack classified this as INVALID: +// SEQ is over the upper bound (over the window of the receiver) IN=tun0 OUT= MAC= SRC=192.0.2.1 DST=192.168.37.78 LEN=40 TOS=0x00 PREC=0x00 TTL=255 ID=0 PROTO=TCP SPT=8080 DPT=34500 SEQ=162602411 ACK=2124350315 .. +// 10.192.171.30.2049 > 10.176.25.8.829: Flags [S.], seq 162602410, ack 2375731742, win 65535, options [mss 8952,nop,wscale 5,TS val 3215445754 ecr 2083115583,nop,nop], length 0 + +`packetdrill/common.sh` + ++0 `$xtables -A INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -A OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 fcntl(3, F_SETFL, O_RDWR|O_NONBLOCK) = 0 + +0.1 connect(3, ..., ...) = -1 EINPROGRESS (Operation now in progress) +0.1 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1 ecr 0,nop,wscale 8> + +// bogus/outdated synack, invalid ack value +0.1 < S. 145824453:145824453(0) ack 643160523 win 240 <mss 1440,nop,nop,TS val 1 ecr 1,nop,wscale 0> + +// syn retransmitted +1.01 > S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep UNREPLIED | grep -q SYN_SENT` + +// correct synack ++0 < S. 145758918:145758918(0) ack 1 win 250 <mss 1460,nop,nop,TS val 1 ecr 1,nop,wscale 0> ++0 write(3, ..., 1) = 1 + +// with buggy conntrack above packet is dropped, so SYN rtx is seen: +// script packet: 1.054007 . 1:1(0) ack 16777958 win 256 <nop,nop,TS val 1033 ecr 1> +// actual packet: 3.010000 S 0:0(0) win 65535 <mss 1460,sackOK,TS val 1015 ecr 0,nop,wscale 8> ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ESTABLISHED | grep -q ASSURED` + ++0 > P. 1:2(1) ack 4294901762 win 256 <nop,nop,TS val 1067 ecr 1> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep ASSURED | grep -q ESTABLISHED` + +// No packets should have been marked INVALID in OUTPUT direction, 1 in INPUT ++0 `$xtables -v -S OUTPUT | grep INVALID | grep -q -- "-c 0 0"` ++0 `$xtables -v -S INPUT | grep INVALID | grep -q -- "-c 1 "` + ++0 `$xtables -D INPUT -p tcp -m conntrack --ctstate INVALID -j DROP` ++0 `$xtables -D OUTPUT -p tcp -m conntrack --ctstate INVALID -j DROP` diff --git a/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt new file mode 100644 index 000000000000..842242f8ccf7 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/packetdrill/conntrack_synack_reuse.pkt @@ -0,0 +1,34 @@ +// Check reception of another SYN while we have an established conntrack state. +// Challenge ACK is supposed to pass through, RST reply should clear conntrack +// state and SYN retransmit should give us new 'SYN_RECV' connection state. + +`packetdrill/common.sh` + +// should show a match if bug is present: ++0 `iptables -A INPUT -m conntrack --ctstate INVALID -p tcp --tcp-flags SYN,ACK SYN,ACK` + ++0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 ++0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 ++0 bind(3, ..., ...) = 0 ++0 listen(3, 10) = 0 + ++0 < S 0:0(0) win 32792 <mss 1000,nop,wscale 7, TS val 1 ecr 0,nop,nop> ++0 > S. 0:0(0) ack 1 <mss 1460,nop,nop,TS val 100 ecr 1,nop,wscale 8> ++.01 < . 1:1(0) ack 1 win 257 <TS val 1 ecr 100,nop,nop> ++0 accept(3, ..., ...) = 4 + ++0 < P. 1:101(100) ack 1 win 257 <TS val 2 ecr 100,nop,nop> ++.001 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 110 ecr 2> ++0 read(4, ..., 101) = 100 + +1.0 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 7, TS val 233 ecr 0,nop,nop> +// Won't expect this: challenge ack. + ++0 > . 1:1(0) ack 101 win 256 <nop,nop,TS val 112 ecr 2> ++0 < R. 101:101(0) ack 1 win 257 ++0 close(4) = 0 + +1.5 < S 2000:2000(0) win 32792 <mss 1000,nop,wscale 0, TS val 233 ecr 0,nop,nop> + ++0 `conntrack -f $NFCT_IP_VERSION -L -p tcp --dport 8080 2>/dev/null | grep -q SYN_RECV` ++0 `iptables -v -S INPUT | grep INVALID | grep -q -- "-c 0 0"` diff --git a/tools/testing/selftests/net/netfilter/rpath.sh b/tools/testing/selftests/net/netfilter/rpath.sh new file mode 100755 index 000000000000..4485fd7675ed --- /dev/null +++ b/tools/testing/selftests/net/netfilter/rpath.sh @@ -0,0 +1,175 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 + +# search for legacy iptables (it uses the xtables extensions +if iptables-legacy --version >/dev/null 2>&1; then + iptables='iptables-legacy' +elif iptables --version >/dev/null 2>&1; then + iptables='iptables' +else + iptables='' +fi + +if ip6tables-legacy --version >/dev/null 2>&1; then + ip6tables='ip6tables-legacy' +elif ip6tables --version >/dev/null 2>&1; then + ip6tables='ip6tables' +else + ip6tables='' +fi + +if nft --version >/dev/null 2>&1; then + nft='nft' +else + nft='' +fi + +if [ -z "$iptables$ip6tables$nft" ]; then + echo "SKIP: Test needs iptables, ip6tables or nft" + exit $ksft_skip +fi + +sfx=$(mktemp -u "XXXXXXXX") +ns1="ns1-$sfx" +ns2="ns2-$sfx" +trap "ip netns del $ns1; ip netns del $ns2" EXIT + +# create two netns, disable rp_filter in ns2 and +# keep IPv6 address when moving into VRF +ip netns add "$ns1" +ip netns add "$ns2" +ip netns exec "$ns2" sysctl -q net.ipv4.conf.all.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv4.conf.default.rp_filter=0 +ip netns exec "$ns2" sysctl -q net.ipv6.conf.all.keep_addr_on_down=1 + +# a standard connection between the netns, should not trigger rp filter +ip -net "$ns1" link add v0 type veth peer name v0 netns "$ns2" +ip -net "$ns1" link set v0 up; ip -net "$ns2" link set v0 up +ip -net "$ns1" a a 192.168.23.2/24 dev v0 +ip -net "$ns2" a a 192.168.23.1/24 dev v0 +ip -net "$ns1" a a fec0:23::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:23::1/64 dev v0 nodad + +# rp filter testing: ns1 sends packets via v0 which ns2 would route back via d0 +ip -net "$ns2" link add d0 type dummy +ip -net "$ns2" link set d0 up +ip -net "$ns1" a a 192.168.42.2/24 dev v0 +ip -net "$ns2" a a 192.168.42.1/24 dev d0 +ip -net "$ns1" a a fec0:42::2/64 dev v0 nodad +ip -net "$ns2" a a fec0:42::1/64 dev d0 nodad + +# firewall matches to test +[ -n "$iptables" ] && { + common='-t raw -A PREROUTING -s 192.168.0.0/16' + if ! ip netns exec "$ns2" "$iptables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi + ip netns exec "$ns2" "$iptables" $common -m rpfilter --invert +} +[ -n "$ip6tables" ] && { + common='-t raw -A PREROUTING -s fec0::/16' + if ! ip netns exec "$ns2" "$ip6tables" $common -m rpfilter;then + echo "Cannot add rpfilter rule" + exit $ksft_skip + fi + ip netns exec "$ns2" "$ip6tables" $common -m rpfilter --invert +} +[ -n "$nft" ] && ip netns exec "$ns2" $nft -f - <<EOF +table inet t { + chain c { + type filter hook prerouting priority raw; + ip saddr 192.168.0.0/16 fib saddr . iif oif exists counter + ip6 saddr fec0::/16 fib saddr . iif oif exists counter + } +} +EOF + +die() { + echo "FAIL: $*" + #ip netns exec "$ns2" "$iptables" -t raw -vS + #ip netns exec "$ns2" "$ip6tables" -t raw -vS + #ip netns exec "$ns2" nft list ruleset + exit 1 +} + +# check rule counters, return true if rule did not match +ipt_zero_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | grep -q -- "-m rpfilter -c 0 0" +} +ipt_zero_reverse_rule() { # (command) + [ -n "$1" ] || return 0 + ip netns exec "$ns2" "$1" -t raw -vS | \ + grep -q -- "-m rpfilter --invert -c 0 0" +} +nft_zero_rule() { # (family) + [ -n "$nft" ] || return 0 + ip netns exec "$ns2" "$nft" list chain inet t c | \ + grep -q "$1 saddr .* counter packets 0 bytes 0" +} + +netns_ping() { # (netns, args...) + local netns="$1" + shift + ip netns exec "$netns" ping -q -c 1 -W 1 "$@" >/dev/null +} + +clear_counters() { + [ -n "$iptables" ] && ip netns exec "$ns2" "$iptables" -t raw -Z + [ -n "$ip6tables" ] && ip netns exec "$ns2" "$ip6tables" -t raw -Z + if [ -n "$nft" ]; then + ( + echo "delete table inet t"; + ip netns exec "$ns2" $nft -s list table inet t; + ) | ip netns exec "$ns2" $nft -f - + fi +} + +testrun() { + clear_counters + + # test 1: martian traffic should fail rpfilter matches + netns_ping "$ns1" -I v0 192.168.42.1 && \ + die "martian ping 192.168.42.1 succeeded" + netns_ping "$ns1" -I v0 fec0:42::1 && \ + die "martian ping fec0:42::1 succeeded" + + ipt_zero_rule "$iptables" || die "iptables matched martian" + ipt_zero_rule "$ip6tables" || die "ip6tables matched martian" + ipt_zero_reverse_rule "$iptables" && die "iptables not matched martian" + ipt_zero_reverse_rule "$ip6tables" && die "ip6tables not matched martian" + nft_zero_rule ip || die "nft IPv4 matched martian" + nft_zero_rule ip6 || die "nft IPv6 matched martian" + + clear_counters + + # test 2: rpfilter match should pass for regular traffic + netns_ping "$ns1" 192.168.23.1 || \ + die "regular ping 192.168.23.1 failed" + netns_ping "$ns1" fec0:23::1 || \ + die "regular ping fec0:23::1 failed" + + ipt_zero_rule "$iptables" && die "iptables match not effective" + ipt_zero_rule "$ip6tables" && die "ip6tables match not effective" + ipt_zero_reverse_rule "$iptables" || die "iptables match over-effective" + ipt_zero_reverse_rule "$ip6tables" || die "ip6tables match over-effective" + nft_zero_rule ip && die "nft IPv4 match not effective" + nft_zero_rule ip6 && die "nft IPv6 match not effective" + +} + +testrun + +# repeat test with vrf device in $ns2 +ip -net "$ns2" link add vrf0 type vrf table 10 +ip -net "$ns2" link set vrf0 up +ip -net "$ns2" link set v0 master vrf0 + +testrun + +echo "PASS: netfilter reverse path match works as intended" +exit 0 diff --git a/tools/testing/selftests/net/netfilter/sctp_collision.c b/tools/testing/selftests/net/netfilter/sctp_collision.c new file mode 100644 index 000000000000..21bb1cfd8a85 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/sctp_collision.c @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> + +int main(int argc, char *argv[]) +{ + struct sockaddr_in saddr = {}, daddr = {}; + int sd, ret, len = sizeof(daddr); + struct timeval tv = {25, 0}; + char buf[] = "hello"; + + if (argc != 6 || (strcmp(argv[1], "server") && strcmp(argv[1], "client"))) { + printf("%s <server|client> <LOCAL_IP> <LOCAL_PORT> <REMOTE_IP> <REMOTE_PORT>\n", + argv[0]); + return -1; + } + + sd = socket(AF_INET, SOCK_SEQPACKET, IPPROTO_SCTP); + if (sd < 0) { + printf("Failed to create sd\n"); + return -1; + } + + saddr.sin_family = AF_INET; + saddr.sin_addr.s_addr = inet_addr(argv[2]); + saddr.sin_port = htons(atoi(argv[3])); + + ret = bind(sd, (struct sockaddr *)&saddr, sizeof(saddr)); + if (ret < 0) { + printf("Failed to bind to address\n"); + goto out; + } + + ret = listen(sd, 5); + if (ret < 0) { + printf("Failed to listen on port\n"); + goto out; + } + + daddr.sin_family = AF_INET; + daddr.sin_addr.s_addr = inet_addr(argv[4]); + daddr.sin_port = htons(atoi(argv[5])); + + /* make test shorter than 25s */ + ret = setsockopt(sd, SOL_SOCKET, SO_RCVTIMEO, &tv, sizeof(tv)); + if (ret < 0) { + printf("Failed to setsockopt SO_RCVTIMEO\n"); + goto out; + } + + if (!strcmp(argv[1], "server")) { + sleep(1); /* wait a bit for client's INIT */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + printf("Server: sent! %d\n", ret); + } + + if (!strcmp(argv[1], "client")) { + usleep(300000); /* wait a bit for server's listening */ + ret = connect(sd, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to connect to peer\n"); + goto out; + } + sleep(1); /* wait a bit for server's delayed INIT_ACK to reproduce the issue */ + ret = sendto(sd, buf, strlen(buf) + 1, 0, (struct sockaddr *)&daddr, len); + if (ret < 0) { + printf("Failed to send msg %d\n", ret); + goto out; + } + ret = recvfrom(sd, buf, sizeof(buf), 0, (struct sockaddr *)&daddr, &len); + if (ret < 0) { + printf("Failed to recv msg %d\n", ret); + goto out; + } + printf("Client: rcvd! %d\n", ret); + } + ret = 0; +out: + close(sd); + return ret; +} diff --git a/tools/testing/selftests/net/netfilter/settings b/tools/testing/selftests/net/netfilter/settings new file mode 100644 index 000000000000..abc5648b59ab --- /dev/null +++ b/tools/testing/selftests/net/netfilter/settings @@ -0,0 +1 @@ +timeout=1800 diff --git a/tools/testing/selftests/net/netfilter/xt_string.sh b/tools/testing/selftests/net/netfilter/xt_string.sh new file mode 100755 index 000000000000..8d401c69e317 --- /dev/null +++ b/tools/testing/selftests/net/netfilter/xt_string.sh @@ -0,0 +1,133 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +# return code to signal skipped test +ksft_skip=4 +rc=0 + +source lib.sh + +checktool "socat -h" "run test without socat" +checktool "iptables --version" "test needs iptables" + +infile=$(mktemp) + +cleanup() +{ + ip netns del "$netns" + rm -f "$infile" +} + +trap cleanup EXIT + +setup_ns netns + +ip -net "$netns" link add d0 type dummy +ip -net "$netns" link set d0 up +ip -net "$netns" addr add 10.1.2.1/24 dev d0 + +pattern="foo bar baz" +patlen=11 +hdrlen=$((20 + 8)) # IPv4 + UDP + +#ip netns exec "$netns" tcpdump -npXi d0 & +#tcpdump_pid=$! +#trap 'kill $tcpdump_pid; ip netns del $netns' EXIT + +add_rule() { # (alg, from, to) + ip netns exec "$netns" \ + iptables -A OUTPUT -o d0 -m string \ + --string "$pattern" --algo "$1" --from "$2" --to "$3" +} +showrules() { # () + ip netns exec "$netns" iptables -v -S OUTPUT | grep '^-A' +} +zerorules() { + ip netns exec "$netns" iptables -Z OUTPUT +} +countrule() { # (pattern) + showrules | grep -c -- "$*" +} +send() { # (offset) + ( for ((i = 0; i < $1 - hdrlen; i++)); do + echo -n " " + done + echo -n "$pattern" + ) > "$infile" + + ip netns exec "$netns" socat -t 1 -u STDIN UDP-SENDTO:10.1.2.2:27374 < "$infile" +} + +add_rule bm 1000 1500 +add_rule bm 1400 1600 +add_rule kmp 1000 1500 +add_rule kmp 1400 1600 + +zerorules +send 0 +send $((1000 - patlen)) +if [ "$(countrule -c 0 0)" -ne 4 ]; then + echo "FAIL: rules match data before --from" + showrules + ((rc--)) +fi + +zerorules +send 1000 +send $((1400 - patlen)) +if [ "$(countrule -c 2)" -ne 2 ]; then + echo "FAIL: only two rules should match at low offset" + showrules + ((rc--)) +fi + +zerorules +send $((1500 - patlen)) +if [ "$(countrule -c 1)" -ne 4 ]; then + echo "FAIL: all rules should match at end of packet" + showrules + ((rc--)) +fi + +zerorules +send 1495 +if [ "$(countrule -c 1)" -ne 1 ]; then + echo "FAIL: only kmp with proper --to should match pattern spanning fragments" + showrules + ((rc--)) +fi + +zerorules +send 1500 +if [ "$(countrule -c 1)" -ne 2 ]; then + echo "FAIL: two rules should match pattern at start of second fragment" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - patlen)) +if [ "$(countrule -c 1)" -ne 2 ]; then + echo "FAIL: two rules should match pattern at end of largest --to" + showrules + ((rc--)) +fi + +zerorules +send $((1600 - patlen + 1)) +if [ "$(countrule -c 1)" -ne 0 ]; then + echo "FAIL: no rules should match pattern extending largest --to" + showrules + ((rc--)) +fi + +zerorules +send 1600 +if [ "$(countrule -c 1)" -ne 0 ]; then + echo "FAIL: no rule should match pattern past largest --to" + showrules + ((rc--)) +fi + +[ $rc -eq 0 ] && echo "PASS: string match tests" +exit $rc diff --git a/tools/testing/selftests/net/netns-name.sh b/tools/testing/selftests/net/netns-name.sh index 7d3d3fc99461..6974474c26f3 100755 --- a/tools/testing/selftests/net/netns-name.sh +++ b/tools/testing/selftests/net/netns-name.sh @@ -1,9 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +source lib.sh set -o pipefail -NS=netns-name-test DEV=dummy-dev0 DEV2=dummy-dev1 ALT_NAME=some-alt-name @@ -11,7 +11,7 @@ ALT_NAME=some-alt-name RET_CODE=0 cleanup() { - ip netns del $NS + cleanup_ns $NS $test_ns } trap cleanup EXIT @@ -21,50 +21,50 @@ fail() { RET_CODE=1 } -ip netns add $NS +setup_ns NS test_ns # # Test basic move without a rename # ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 || +ip -netns $NS link set dev $DEV netns $test_ns || fail "Can't perform a netns move" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" -ip link del $DEV || fail +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns 2> /dev/null && fail "Performed a netns move with a name conflict" -ip link show dev $DEV >> /dev/null || fail "Device not found after move" +ip -netns $test_ns link show dev $DEV >> /dev/null || fail "Device not found after move" ip -netns $NS link del $DEV || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail # # Test move with a conflict and rename # -ip link add name $DEV type dummy +ip -netns $test_ns link add name $DEV type dummy ip -netns $NS link add name $DEV type dummy || fail -ip -netns $NS link set dev $DEV netns 1 name $DEV2 || +ip -netns $NS link set dev $DEV netns $test_ns name $DEV2 || fail "Can't perform a netns move with rename" -ip link del $DEV2 || fail -ip link del $DEV || fail +ip -netns $test_ns link del $DEV2 || fail +ip -netns $test_ns link del $DEV || fail # # Test dup alt-name with netns move # -ip link add name $DEV type dummy || fail -ip link property add dev $DEV altname $ALT_NAME || fail +ip -netns $test_ns link add name $DEV type dummy || fail +ip -netns $test_ns link property add dev $DEV altname $ALT_NAME || fail ip -netns $NS link add name $DEV2 type dummy || fail ip -netns $NS link property add dev $DEV2 altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV2 netns 1 2> /dev/null && +ip -netns $NS link set dev $DEV2 netns $test_ns 2> /dev/null && fail "Moved with alt-name dup" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail ip -netns $NS link del $DEV2 || fail # @@ -72,11 +72,11 @@ ip -netns $NS link del $DEV2 || fail # ip -netns $NS link add name $DEV type dummy || fail ip -netns $NS link property add dev $DEV altname $ALT_NAME || fail -ip -netns $NS link set dev $DEV netns 1 || fail -ip link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" -ip -netns $NS link show dev $ALT_NAME 2> /dev/null && +ip -netns $NS link set dev $DEV netns $test_ns || fail +ip -netns $test_ns link show dev $ALT_NAME >> /dev/null || fail "Can't find alt-name after move" +ip -netns $NS link show dev $ALT_NAME 2> /dev/null && fail "Can still find alt-name after move" -ip link del $DEV || fail +ip -netns $test_ns link del $DEV || fail echo -ne "$(basename $0) \t\t\t\t" if [ $RET_CODE -eq 0 ]; then diff --git a/tools/testing/selftests/net/nl_netdev.py b/tools/testing/selftests/net/nl_netdev.py new file mode 100755 index 000000000000..93d9d914529b --- /dev/null +++ b/tools/testing/selftests/net/nl_netdev.py @@ -0,0 +1,98 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import ksft_eq, ksft_ge, ksft_busy_wait +from lib.py import NetdevFamily, NetdevSimDev, ip + + +def empty_check(nf) -> None: + devs = nf.dev_get({}, dump=True) + ksft_ge(len(devs), 1) + + +def lo_check(nf) -> None: + lo_info = nf.dev_get({"ifindex": 1}) + ksft_eq(len(lo_info['xdp-features']), 0) + ksft_eq(len(lo_info['xdp-rx-metadata-features']), 0) + + +def page_pool_check(nf) -> None: + with NetdevSimDev() as nsimdev: + nsim = nsimdev.nsims[0] + + def up(): + ip(f"link set dev {nsim.ifname} up") + + def down(): + ip(f"link set dev {nsim.ifname} down") + + def get_pp(): + pp_list = nf.page_pool_get({}, dump=True) + return [pp for pp in pp_list if pp.get("ifindex") == nsim.ifindex] + + # No page pools when down + down() + ksft_eq(len(get_pp()), 0) + + # Up, empty page pool appears + up() + pp_list = get_pp() + ksft_ge(len(pp_list), 0) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 0) + + # Down, it disappears, again + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 0) + + # Up, allocate a page + up() + nsim.dfs_write("pp_hold", "y") + pp_list = nf.page_pool_get({}, dump=True) + refs = sum([pp["inflight"] for pp in pp_list if pp.get("ifindex") == nsim.ifindex]) + ksft_ge(refs, 1) + + # Now let's leak a page + down() + pp_list = get_pp() + ksft_eq(len(pp_list), 1) + refs = sum([pp["inflight"] for pp in pp_list]) + ksft_eq(refs, 1) + attached = [pp for pp in pp_list if "detach-time" not in pp] + ksft_eq(len(attached), 0) + + # New pp can get created, and we'll have two + up() + pp_list = get_pp() + attached = [pp for pp in pp_list if "detach-time" not in pp] + detached = [pp for pp in pp_list if "detach-time" in pp] + ksft_eq(len(attached), 1) + ksft_eq(len(detached), 1) + + # Free the old page and the old pp is gone + nsim.dfs_write("pp_hold", "n") + # Freeing check is once a second so we may need to retry + ksft_busy_wait(lambda: len(get_pp()) == 1, deadline=2) + + # And down... + down() + ksft_eq(len(get_pp()), 0) + + # Last, leave the page hanging for destroy, nothing to check + # we're trying to exercise the orphaning path in the kernel + up() + nsim.dfs_write("pp_hold", "y") + + +def main() -> None: + nf = NetdevFamily() + ksft_run([empty_check, lo_check, page_pool_check], + args=(nf, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index f8499d4c87f3..5cae53543849 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -17,6 +17,7 @@ tests=" ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct connect_v4 ip4-xon: Basic ipv4 ping between two NS nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT + nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces drop_reason drop: test drop reasons are emitted" @@ -473,6 +474,67 @@ test_nat_connect_v4 () { return 0 } +# nat_related_v4 test +# - client->server ip packets go via SNAT +# - client solicits ICMP destination unreachable packet from server +# - undo NAT for ICMP reply and test dst ip has been updated +test_nat_related_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_related_v4" || return $? + + ovs_add_dp "test_nat_related_v4" natrelated4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10 + + # Allow ARP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1 + + # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \ + "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "2" || return 1 + + # Allow related ICMP responses back from server and undo NAT to restore original IP + # Drop any ICMP related packets where dst ip hasn't been restored back to original IP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \ + "1" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \ + "drop" || return 1 + + # Solicit destination unreachable response from server + ovs_sbx "test_nat_related_v4" ip netns exec client \ + bash -c "echo a | nc -u -w 1 172.31.110.20 10000" + + # Check to make sure no packets matched the drop rule with incorrect dst ip + python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \ + | grep "drop" | grep "packets:0" >/dev/null || return 1 + + info "done..." + return 0 +} + # netlink_validation # - Create a dp # - check no warning with "old version" simulation @@ -502,9 +564,22 @@ test_netlink_checks () { wc -l) == 2 ] || \ return 1 + info "Checking clone depth" ERR_MSG="Flow actions may not be safe on all matching packets" PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") ovs_add_flow "test_netlink_checks" nv0 \ + 'in_port(1),eth(),eth_type(0x800),ipv4()' \ + 'clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(clone(drop)))))))))))))))))' \ + >/dev/null 2>&1 && return 1 + POST_TEST=$(dmesg | grep -c "${ERR_MSG}") + + if [ "$PRE_TEST" == "$POST_TEST" ]; then + info "failed - clone depth too large" + return 1 + fi + + PRE_TEST=$(dmesg | grep -c "${ERR_MSG}") + ovs_add_flow "test_netlink_checks" nv0 \ 'in_port(1),eth(),eth_type(0x0806),arp()' 'drop(0),2' \ &> /dev/null && return 1 POST_TEST=$(dmesg | grep -c "${ERR_MSG}") diff --git a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py index b97e621face9..1dd057afd3fb 100644 --- a/tools/testing/selftests/net/openvswitch/ovs-dpctl.py +++ b/tools/testing/selftests/net/openvswitch/ovs-dpctl.py @@ -299,7 +299,7 @@ class ovsactions(nla): ("OVS_ACTION_ATTR_PUSH_NSH", "none"), ("OVS_ACTION_ATTR_POP_NSH", "flag"), ("OVS_ACTION_ATTR_METER", "none"), - ("OVS_ACTION_ATTR_CLONE", "none"), + ("OVS_ACTION_ATTR_CLONE", "recursive"), ("OVS_ACTION_ATTR_CHECK_PKT_LEN", "none"), ("OVS_ACTION_ATTR_ADD_MPLS", "none"), ("OVS_ACTION_ATTR_DEC_TTL", "none"), @@ -465,33 +465,46 @@ class ovsactions(nla): print_str += "pop_mpls" else: datum = self.get_attr(field[0]) - print_str += datum.dpstr(more) + if field[0] == "OVS_ACTION_ATTR_CLONE": + print_str += "clone(" + print_str += datum.dpstr(more) + print_str += ")" + else: + print_str += datum.dpstr(more) return print_str def parse(self, actstr): + totallen = len(actstr) while len(actstr) != 0: parsed = False + parencount = 0 if actstr.startswith("drop"): # If no reason is provided, the implicit drop is used (i.e no # action). If some reason is given, an explicit action is used. - actstr, reason = parse_extract_field( - actstr, - "drop(", - "([0-9]+)", - lambda x: int(x, 0), - False, - None, - ) + reason = None + if actstr.startswith("drop("): + parencount += 1 + + actstr, reason = parse_extract_field( + actstr, + "drop(", + r"([0-9]+)", + lambda x: int(x, 0), + False, + None, + ) + if reason is not None: self["attrs"].append(["OVS_ACTION_ATTR_DROP", reason]) parsed = True else: - return + actstr = actstr[len("drop"): ] + return (totallen - len(actstr)) - elif parse_starts_block(actstr, "^(\d+)", False, True): + elif parse_starts_block(actstr, r"^(\d+)", False, True): actstr, output = parse_extract_field( - actstr, None, "(\d+)", lambda x: int(x), False, "0" + actstr, None, r"(\d+)", lambda x: int(x), False, "0" ) self["attrs"].append(["OVS_ACTION_ATTR_OUTPUT", output]) parsed = True @@ -499,11 +512,12 @@ class ovsactions(nla): actstr, recircid = parse_extract_field( actstr, "recirc(", - "([0-9a-fA-Fx]+)", + r"([0-9a-fA-Fx]+)", lambda x: int(x, 0), False, 0, ) + parencount += 1 self["attrs"].append(["OVS_ACTION_ATTR_RECIRC", recircid]) parsed = True @@ -516,12 +530,22 @@ class ovsactions(nla): for flat_act in parse_flat_map: if parse_starts_block(actstr, flat_act[0], False): - actstr += len(flat_act[0]) + actstr = actstr[len(flat_act[0]):] self["attrs"].append([flat_act[1]]) actstr = actstr[strspn(actstr, ", ") :] parsed = True - if parse_starts_block(actstr, "ct(", False): + if parse_starts_block(actstr, "clone(", False): + parencount += 1 + subacts = ovsactions() + actstr = actstr[len("clone("):] + parsedLen = subacts.parse(actstr) + lst = [] + self["attrs"].append(("OVS_ACTION_ATTR_CLONE", subacts)) + actstr = actstr[parsedLen:] + parsed = True + elif parse_starts_block(actstr, "ct(", False): + parencount += 1 actstr = actstr[len("ct(") :] ctact = ovsactions.ctact() @@ -553,6 +577,7 @@ class ovsactions(nla): natact = ovsactions.ctact.natattr() if actstr.startswith("("): + parencount += 1 t = None actstr = actstr[1:] if actstr.startswith("src"): @@ -563,17 +588,17 @@ class ovsactions(nla): actstr = actstr[3:] actstr, ip_block_min = parse_extract_field( - actstr, "=", "([0-9a-fA-F\.]+)", str, False + actstr, "=", r"([0-9a-fA-F\.]+)", str, False ) actstr, ip_block_max = parse_extract_field( - actstr, "-", "([0-9a-fA-F\.]+)", str, False + actstr, "-", r"([0-9a-fA-F\.]+)", str, False ) actstr, proto_min = parse_extract_field( - actstr, ":", "(\d+)", int, False + actstr, ":", r"(\d+)", int, False ) actstr, proto_max = parse_extract_field( - actstr, "-", "(\d+)", int, False + actstr, "-", r"(\d+)", int, False ) if t is not None: @@ -607,15 +632,29 @@ class ovsactions(nla): actstr = actstr[strspn(actstr, ", ") :] ctact["attrs"].append(["OVS_CT_ATTR_NAT", natact]) - actstr = actstr[strspn(actstr, ",) ") :] + actstr = actstr[strspn(actstr, ", ") :] self["attrs"].append(["OVS_ACTION_ATTR_CT", ctact]) parsed = True - actstr = actstr[strspn(actstr, "), ") :] + actstr = actstr[strspn(actstr, ", ") :] + while parencount > 0: + parencount -= 1 + actstr = actstr[strspn(actstr, " "):] + if len(actstr) and actstr[0] != ")": + raise ValueError("Action str: '%s' unbalanced" % actstr) + actstr = actstr[1:] + + if len(actstr) and actstr[0] == ")": + return (totallen - len(actstr)) + + actstr = actstr[strspn(actstr, ", ") :] + if not parsed: raise ValueError("Action str: '%s' not supported" % actstr) + return (totallen - len(actstr)) + class ovskey(nla): nla_flags = NLA_F_NESTED @@ -2111,6 +2150,8 @@ def main(argv): ovsflow = OvsFlow() ndb = NDB() + sys.setrecursionlimit(100000) + if hasattr(args, "showdp"): found = False for iface in ndb.interfaces: diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh index f838dd370f6a..cfc84958025a 100755 --- a/tools/testing/selftests/net/pmtu.sh +++ b/tools/testing/selftests/net/pmtu.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # Check that route PMTU values match expectations, and that initial device MTU @@ -198,8 +198,8 @@ # - pmtu_ipv6_route_change # Same as above but with IPv6 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh +source net_helper.sh PAUSE_ON_FAIL=no VERBOSE=0 @@ -268,16 +268,6 @@ tests=" pmtu_ipv4_route_change ipv4: PMTU exception w/route replace 1 pmtu_ipv6_route_change ipv6: PMTU exception w/route replace 1" -NS_A="ns-A" -NS_B="ns-B" -NS_C="ns-C" -NS_R1="ns-R1" -NS_R2="ns-R2" -ns_a="ip netns exec ${NS_A}" -ns_b="ip netns exec ${NS_B}" -ns_c="ip netns exec ${NS_C}" -ns_r1="ip netns exec ${NS_R1}" -ns_r2="ip netns exec ${NS_R2}" # Addressing and routing for tests with routers: four network segments, with # index SEGMENT between 1 and 4, a common prefix (PREFIX4 or PREFIX6) and an # identifier ID, which is 1 for hosts (A and B), 2 for routers (R1 and R2). @@ -543,13 +533,17 @@ setup_ip6ip6() { } setup_namespaces() { + setup_ns NS_A NS_B NS_C NS_R1 NS_R2 for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns add ${n} || return 1 - # Disable DAD, so that we don't have to wait to use the # configured IPv6 addresses ip netns exec ${n} sysctl -q net/ipv6/conf/default/accept_dad=0 done + ns_a="ip netns exec ${NS_A}" + ns_b="ip netns exec ${NS_B}" + ns_c="ip netns exec ${NS_C}" + ns_r1="ip netns exec ${NS_R1}" + ns_r2="ip netns exec ${NS_R2}" } setup_veth() { @@ -714,23 +708,23 @@ setup_xfrm6() { } setup_xfrm4udp() { - setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${veth4_a_addr} ${veth4_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udp() { - setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${veth6_a_addr} ${veth6_b_addr} "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_xfrm4udprouted() { - setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 4 4500 + setup_xfrm 4 ${prefix4}.${a_r1}.1 ${prefix4}.${b_r1}.1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 4 4500 } setup_xfrm6udprouted() { - setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" - setup_nettest_xfrm 6 4500 + setup_xfrm 6 ${prefix6}:${a_r1}::1 ${prefix6}:${b_r1}::1 "encap espinudp 4500 4500 0.0.0.0" && \ + setup_nettest_xfrm 6 4500 } setup_routing_old() { @@ -839,7 +833,7 @@ setup_bridge() { run_cmd ${ns_a} ip link set br0 up run_cmd ${ns_c} ip link add veth_C-A type veth peer name veth_A-C - run_cmd ${ns_c} ip link set veth_A-C netns ns-A + run_cmd ${ns_c} ip link set veth_A-C netns ${NS_A} run_cmd ${ns_a} ip link set veth_A-C up run_cmd ${ns_c} ip link set veth_C-A up @@ -944,9 +938,7 @@ cleanup() { done socat_pids= - for n in ${NS_A} ${NS_B} ${NS_C} ${NS_R1} ${NS_R2}; do - ip netns del ${n} 2> /dev/null - done + cleanup_all_ns ip link del veth_A-C 2>/dev/null ip link del veth_A-R1 2>/dev/null @@ -1344,12 +1336,14 @@ test_pmtu_ipvX_over_bridged_vxlanY_or_geneveY_exception() { else TCPDST="TCP:[${dst}]:50000" fi - ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000 STDOUT > $tmpoutfile & + ${ns_b} socat -T 3 -u -6 TCP-LISTEN:50000,reuseaddr STDOUT > $tmpoutfile & + local socat_pid=$! - sleep 1 + wait_local_port_listen ${NS_B} 50000 tcp - dd if=/dev/zero of=/dev/stdout status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + dd if=/dev/zero status=none bs=1M count=1 | ${target} socat -T 3 -u STDIN $TCPDST,connect-timeout=3 + wait ${socat_pid} size=$(du -sb $tmpoutfile) size=${size%%/tmp/*} @@ -1963,6 +1957,13 @@ check_command() { return 0 } +check_running() { + pid=${1} + cmd=${2} + + [ "$(cat /proc/${pid}/cmdline 2>/dev/null | tr -d '\0')" = "{cmd}" ] +} + test_cleanup_vxlanX_exception() { outer="${1}" encap="vxlan" @@ -1993,11 +1994,12 @@ test_cleanup_vxlanX_exception() { ${ns_a} ip link del dev veth_A-R1 & iplink_pid=$! - sleep 1 - if [ "$(cat /proc/${iplink_pid}/cmdline 2>/dev/null | tr -d '\0')" = "iplinkdeldevveth_A-R1" ]; then - err " can't delete veth device in a timely manner, PMTU dst likely leaked" - return 1 - fi + for i in $(seq 1 20); do + check_running ${iplink_pid} "iplinkdeldevveth_A-R1" || return 0 + sleep 0.1 + done + err " can't delete veth device in a timely manner, PMTU dst likely leaked" + return 1 } test_cleanup_ipv6_exception() { @@ -2048,7 +2050,7 @@ run_test() { case $ret in 0) all_skipped=false - [ $exitcode=$ksft_skip ] && exitcode=0 + [ $exitcode -eq $ksft_skip ] && exitcode=0 ;; $ksft_skip) [ $all_skipped = true ] && exitcode=$ksft_skip diff --git a/tools/testing/selftests/net/reuseaddr_conflict.c b/tools/testing/selftests/net/reuseaddr_conflict.c index 7c5b12664b03..bfb07dc49518 100644 --- a/tools/testing/selftests/net/reuseaddr_conflict.c +++ b/tools/testing/selftests/net/reuseaddr_conflict.c @@ -109,6 +109,6 @@ int main(void) fd1 = open_port(0, 1); if (fd1 >= 0) error(1, 0, "Was allowed to create an ipv4 reuseport on an already bound non-reuseport socket with no ipv6"); - fprintf(stderr, "Success"); + fprintf(stderr, "Success\n"); return 0; } diff --git a/tools/testing/selftests/net/rps_default_mask.sh b/tools/testing/selftests/net/rps_default_mask.sh index a26c5624429f..4287a8529890 100755 --- a/tools/testing/selftests/net/rps_default_mask.sh +++ b/tools/testing/selftests/net/rps_default_mask.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 readonly ksft_skip=4 @@ -33,6 +33,10 @@ chk_rps() { rps_mask=$($cmd /sys/class/net/$dev_name/queues/rx-0/rps_cpus) printf "%-60s" "$msg" + + # In case there is more than 32 CPUs we need to remove commas from masks + rps_mask=${rps_mask//,} + expected_rps_mask=${expected_rps_mask//,} if [ $rps_mask -eq $expected_rps_mask ]; then echo "[ ok ]" else diff --git a/tools/testing/selftests/net/rtnetlink.sh b/tools/testing/selftests/net/rtnetlink.sh index 5f2b3f6c0d74..bdf6f10d0558 100755 --- a/tools/testing/selftests/net/rtnetlink.sh +++ b/tools/testing/selftests/net/rtnetlink.sh @@ -28,6 +28,7 @@ ALL_TESTS=" kci_test_neigh_get kci_test_bridge_parent_id kci_test_address_proto + kci_test_enslave_bonding " devdummy="test-dummy0" @@ -35,8 +36,7 @@ VERBOSE=0 PAUSE=no PAUSE_ON_FAIL=no -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # set global exit status, but never reset nonzero one. check_err() @@ -297,7 +297,7 @@ kci_test_addrlft() done sleep 5 - run_cmd_grep "10.23.11." ip addr show dev "$devdummy" + run_cmd_grep_fail "10.23.11." ip addr show dev "$devdummy" if [ $? -eq 0 ]; then check_err 1 end_test "FAIL: preferred_lft addresses remaining" @@ -440,7 +440,6 @@ kci_test_encap_vxlan() local ret=0 vxlan="test-vxlan0" vlan="test-vlan0" - testns="$1" run_cmd ip -netns "$testns" link add "$vxlan" type vxlan id 42 group 239.1.1.1 \ dev "$devdummy" dstport 4789 if [ $? -ne 0 ]; then @@ -485,7 +484,6 @@ kci_test_encap_fou() { local ret=0 name="test-fou" - testns="$1" run_cmd_grep 'Usage: ip fou' ip fou help if [ $? -ne 0 ];then end_test "SKIP: fou: iproute2 too old" @@ -517,9 +515,8 @@ kci_test_encap_fou() # test various encap methods, use netns to avoid unwanted interference kci_test_encap() { - testns="testns" local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP encap tests: cannot add net namespace $testns" return $ksft_skip @@ -527,8 +524,8 @@ kci_test_encap() run_cmd ip -netns "$testns" link set lo up run_cmd ip -netns "$testns" link add name "$devdummy" type dummy run_cmd ip -netns "$testns" link set "$devdummy" up - run_cmd kci_test_encap_vxlan "$testns" - run_cmd kci_test_encap_fou "$testns" + run_cmd kci_test_encap_vxlan + run_cmd kci_test_encap_fou ip netns del "$testns" return $ret @@ -574,6 +571,10 @@ kci_test_macsec_offload() return $ksft_skip fi + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -738,6 +739,10 @@ kci_test_ipsec_offload() sysfsnet=/sys/bus/netdevsim/devices/netdevsim0/net/ probed=false + if ! mount | grep -q debugfs; then + mount -t debugfs none /sys/kernel/debug/ &> /dev/null + fi + # setup netdevsim since dummydev doesn't have offload support if [ ! -w /sys/bus/netdevsim/new_device ] ; then run_cmd modprobe -q netdevsim @@ -796,6 +801,8 @@ kci_test_ipsec_offload() end_test "FAIL: ipsec_offload SA offload missing from list output" fi + # we didn't create a peer, make sure we can Tx + ip neigh add $dstip dev $dev lladdr 00:11:22:33:44:55 # use ping to exercise the Tx path ping -I $dev -c 3 -W 1 -i 0 $dstip >/dev/null @@ -836,11 +843,10 @@ EOF kci_test_gretap() { - testns="testns" DEV_NS=gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -859,7 +865,7 @@ kci_test_gretap() run_cmd ip -netns "$testns" addr add dev "$DEV_NS" 10.1.1.100/24 - run_cmd ip -netns "$testns" link set dev $DEV_NS ups + run_cmd ip -netns "$testns" link set dev $DEV_NS up run_cmd ip -netns "$testns" link del "$DEV_NS" # test external mode @@ -878,11 +884,10 @@ kci_test_gretap() kci_test_ip6gretap() { - testns="testns" DEV_NS=ip6gretap00 local ret=0 - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6gretap tests: cannot add net namespace $testns" return $ksft_skip @@ -920,7 +925,6 @@ kci_test_ip6gretap() kci_test_erspan() { - testns="testns" DEV_NS=erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help erspan @@ -928,7 +932,7 @@ kci_test_erspan() end_test "SKIP: erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -970,7 +974,6 @@ kci_test_erspan() kci_test_ip6erspan() { - testns="testns" DEV_NS=ip6erspan00 local ret=0 run_cmd_grep "^Usage:" ip link help ip6erspan @@ -978,7 +981,7 @@ kci_test_ip6erspan() end_test "SKIP: ip6erspan: iproute2 too old" return $ksft_skip fi - run_cmd ip netns add "$testns" + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP ip6erspan tests: cannot add net namespace $testns" return $ksft_skip @@ -1022,8 +1025,6 @@ kci_test_ip6erspan() kci_test_fdb_get() { - IP="ip -netns testns" - BRIDGE="bridge -netns testns" brdev="test-br0" vxlandev="vxlan10" test_mac=de:ad:be:ef:13:37 @@ -1037,11 +1038,13 @@ kci_test_fdb_get() return $ksft_skip fi - run_cmd ip netns add testns + setup_ns testns if [ $? -ne 0 ]; then end_test "SKIP fdb get tests: cannot add net namespace $testns" return $ksft_skip fi + IP="ip -netns $testns" + BRIDGE="bridge -netns $testns" run_cmd $IP link add "$vxlandev" type vxlan id 10 local $localip \ dstport 4789 run_cmd $IP link add name "$brdev" type bridge @@ -1052,7 +1055,7 @@ kci_test_fdb_get() run_cmd_grep "dev $vxlandev master $brdev" $BRIDGE fdb get $test_mac br "$brdev" run_cmd_grep "dev $vxlandev dst $dstip" $BRIDGE fdb get $test_mac dev "$vxlandev" self - ip netns del testns &>/dev/null + ip netns del $testns &>/dev/null if [ $ret -ne 0 ]; then end_test "FAIL: bridge fdb get" @@ -1239,6 +1242,31 @@ kci_test_address_proto() return $ret } +kci_test_enslave_bonding() +{ + local bond="bond123" + local ret=0 + + setup_ns testns + if [ $? -ne 0 ]; then + end_test "SKIP bonding tests: cannot add net namespace $testns" + return $ksft_skip + fi + + run_cmd ip -netns $testns link add dev $bond type bond mode balance-rr + run_cmd ip -netns $testns link add dev $devdummy type dummy + run_cmd ip -netns $testns link set dev $devdummy up + run_cmd ip -netns $testns link set dev $devdummy master $bond down + if [ $ret -ne 0 ]; then + end_test "FAIL: initially up interface added to a bond and set down" + ip netns del "$testns" + return 1 + fi + + end_test "PASS: enslave interface in a bond" + ip netns del "$testns" +} + kci_test_rtnl() { local current_test diff --git a/tools/testing/selftests/net/sample_map_ret0.bpf.c b/tools/testing/selftests/net/sample_map_ret0.bpf.c new file mode 100644 index 000000000000..43ca92594926 --- /dev/null +++ b/tools/testing/selftests/net/sample_map_ret0.bpf.c @@ -0,0 +1,34 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +struct { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} htab SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, __u32); + __type(value, long); + __uint(max_entries, 2); +} array SEC(".maps"); + +/* Sample program which should always load for testing control paths. */ +SEC("xdp") int func() +{ + __u64 key64 = 0; + __u32 key = 0; + long *value; + + value = bpf_map_lookup_elem(&htab, &key); + if (!value) + return 1; + value = bpf_map_lookup_elem(&array, &key64); + if (!value) + return 1; + + return 0; +} diff --git a/tools/testing/selftests/net/sample_ret0.bpf.c b/tools/testing/selftests/net/sample_ret0.bpf.c new file mode 100644 index 000000000000..1df5ca98bb65 --- /dev/null +++ b/tools/testing/selftests/net/sample_ret0.bpf.c @@ -0,0 +1,10 @@ +/* SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) */ + +#define SEC(name) __attribute__((section(name), used)) + +/* Sample program which should always load for testing control paths. */ +SEC("xdp") +int func() +{ + return 0; +} diff --git a/tools/testing/selftests/net/sctp_vrf.sh b/tools/testing/selftests/net/sctp_vrf.sh index c721e952e5f3..c854034b6aa1 100755 --- a/tools/testing/selftests/net/sctp_vrf.sh +++ b/tools/testing/selftests/net/sctp_vrf.sh @@ -6,13 +6,11 @@ # SERVER_NS # CLIENT_NS2 (veth1) <---> (veth2) -> vrf_s2 -CLIENT_NS1="client-ns1" -CLIENT_NS2="client-ns2" +source lib.sh CLIENT_IP4="10.0.0.1" CLIENT_IP6="2000::1" CLIENT_PORT=1234 -SERVER_NS="server-ns" SERVER_IP4="10.0.0.2" SERVER_IP6="2000::2" SERVER_PORT=1234 @@ -20,9 +18,7 @@ SERVER_PORT=1234 setup() { modprobe sctp modprobe sctp_diag - ip netns add $CLIENT_NS1 - ip netns add $CLIENT_NS2 - ip netns add $SERVER_NS + setup_ns CLIENT_NS1 CLIENT_NS2 SERVER_NS ip net exec $CLIENT_NS1 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null ip net exec $CLIENT_NS2 sysctl -w net.ipv6.conf.default.accept_dad=0 2>&1 >/dev/null @@ -67,9 +63,7 @@ setup() { cleanup() { ip netns exec $SERVER_NS pkill sctp_hello 2>&1 >/dev/null - ip netns del "$CLIENT_NS1" - ip netns del "$CLIENT_NS2" - ip netns del "$SERVER_NS" + cleanup_ns $CLIENT_NS1 $CLIENT_NS2 $SERVER_NS } wait_server() { diff --git a/tools/testing/selftests/net/settings b/tools/testing/selftests/net/settings index dfc27cdc6c05..ed8418e8217a 100644 --- a/tools/testing/selftests/net/settings +++ b/tools/testing/selftests/net/settings @@ -1 +1 @@ -timeout=1500 +timeout=3600 diff --git a/tools/testing/selftests/net/setup_loopback.sh b/tools/testing/selftests/net/setup_loopback.sh index e57bbfbc5208..2070b57849de 100755..100644 --- a/tools/testing/selftests/net/setup_loopback.sh +++ b/tools/testing/selftests/net/setup_loopback.sh @@ -5,6 +5,8 @@ readonly FLUSH_PATH="/sys/class/net/${dev}/gro_flush_timeout" readonly IRQ_PATH="/sys/class/net/${dev}/napi_defer_hard_irqs" readonly FLUSH_TIMEOUT="$(< ${FLUSH_PATH})" readonly HARD_IRQS="$(< ${IRQ_PATH})" +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) netdev_check_for_carrier() { local -r dev="$1" @@ -97,12 +99,12 @@ setup_interrupt() { setup_ns() { # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_macvlan_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_macvlan_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_macvlan_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns ${server_ns} server ${client_ns} client } setup() { diff --git a/tools/testing/selftests/net/setup_veth.sh b/tools/testing/selftests/net/setup_veth.sh index 1003ddf7b3b2..1f78a87f6f37 100644 --- a/tools/testing/selftests/net/setup_veth.sh +++ b/tools/testing/selftests/net/setup_veth.sh @@ -1,6 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 +readonly server_ns=$(mktemp -u server-XXXXXXXX) +readonly client_ns=$(mktemp -u client-XXXXXXXX) + setup_veth_ns() { local -r link_dev="$1" local -r ns_name="$2" @@ -8,7 +11,7 @@ setup_veth_ns() { local -r ns_mac="$4" [[ -e /var/run/netns/"${ns_name}" ]] || ip netns add "${ns_name}" - echo 100000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" + echo 1000000 > "/sys/class/net/${ns_dev}/gro_flush_timeout" ip link set dev "${ns_dev}" netns "${ns_name}" mtu 65535 ip -netns "${ns_name}" link set dev "${ns_dev}" up @@ -19,14 +22,14 @@ setup_ns() { # Set up server_ns namespace and client_ns namespace ip link add name server type veth peer name client - setup_veth_ns "${dev}" server_ns server "${SERVER_MAC}" - setup_veth_ns "${dev}" client_ns client "${CLIENT_MAC}" + setup_veth_ns "${dev}" ${server_ns} server "${SERVER_MAC}" + setup_veth_ns "${dev}" ${client_ns} client "${CLIENT_MAC}" } cleanup_ns() { local ns_name - for ns_name in client_ns server_ns; do + for ns_name in ${client_ns} ${server_ns}; do [[ -e /var/run/netns/"${ns_name}" ]] && ip netns del "${ns_name}" done } diff --git a/tools/testing/selftests/net/so_incoming_cpu.c b/tools/testing/selftests/net/so_incoming_cpu.c index a14818164102..e9fa14e10732 100644 --- a/tools/testing/selftests/net/so_incoming_cpu.c +++ b/tools/testing/selftests/net/so_incoming_cpu.c @@ -3,19 +3,16 @@ #define _GNU_SOURCE #include <sched.h> +#include <fcntl.h> + #include <netinet/in.h> #include <sys/socket.h> #include <sys/sysinfo.h> #include "../kselftest_harness.h" -#define CLIENT_PER_SERVER 32 /* More sockets, more reliable */ -#define NR_SERVER self->nproc -#define NR_CLIENT (CLIENT_PER_SERVER * NR_SERVER) - FIXTURE(so_incoming_cpu) { - int nproc; int *servers; union { struct sockaddr addr; @@ -56,12 +53,47 @@ FIXTURE_VARIANT_ADD(so_incoming_cpu, after_all_listen) .when_to_set = AFTER_ALL_LISTEN, }; +static void write_sysctl(struct __test_metadata *_metadata, + char *filename, char *string) +{ + int fd, len, ret; + + fd = open(filename, O_WRONLY); + ASSERT_NE(fd, -1); + + len = strlen(string); + ret = write(fd, string, len); + ASSERT_EQ(ret, len); +} + +static void setup_netns(struct __test_metadata *_metadata) +{ + ASSERT_EQ(unshare(CLONE_NEWNET), 0); + ASSERT_EQ(system("ip link set lo up"), 0); + + write_sysctl(_metadata, "/proc/sys/net/ipv4/ip_local_port_range", "10000 60001"); + write_sysctl(_metadata, "/proc/sys/net/ipv4/tcp_tw_reuse", "0"); +} + +#define NR_PORT (60001 - 10000 - 1) +#define NR_CLIENT_PER_SERVER_DEFAULT 32 +static int nr_client_per_server, nr_server, nr_client; + FIXTURE_SETUP(so_incoming_cpu) { - self->nproc = get_nprocs(); - ASSERT_LE(2, self->nproc); + setup_netns(_metadata); + + nr_server = get_nprocs(); + ASSERT_LE(2, nr_server); + + if (NR_CLIENT_PER_SERVER_DEFAULT * nr_server < NR_PORT) + nr_client_per_server = NR_CLIENT_PER_SERVER_DEFAULT; + else + nr_client_per_server = NR_PORT / nr_server; + + nr_client = nr_client_per_server * nr_server; - self->servers = malloc(sizeof(int) * NR_SERVER); + self->servers = malloc(sizeof(int) * nr_server); ASSERT_NE(self->servers, NULL); self->in_addr.sin_family = AF_INET; @@ -74,7 +106,7 @@ FIXTURE_TEARDOWN(so_incoming_cpu) { int i; - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) close(self->servers[i]); free(self->servers); @@ -110,10 +142,10 @@ int create_server(struct __test_metadata *_metadata, if (variant->when_to_set == BEFORE_LISTEN) set_so_incoming_cpu(_metadata, fd, cpu); - /* We don't use CLIENT_PER_SERVER here not to block + /* We don't use nr_client_per_server here not to block * this test at connect() if SO_INCOMING_CPU is broken. */ - ret = listen(fd, NR_CLIENT); + ret = listen(fd, nr_client); ASSERT_EQ(ret, 0); if (variant->when_to_set == AFTER_LISTEN) @@ -128,7 +160,7 @@ void create_servers(struct __test_metadata *_metadata, { int i, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { self->servers[i] = create_server(_metadata, self, variant, i); if (i == 0) { @@ -138,7 +170,7 @@ void create_servers(struct __test_metadata *_metadata, } if (variant->when_to_set == AFTER_ALL_LISTEN) { - for (i = 0; i < NR_SERVER; i++) + for (i = 0; i < nr_server; i++) set_so_incoming_cpu(_metadata, self->servers[i], i); } } @@ -149,7 +181,7 @@ void create_clients(struct __test_metadata *_metadata, cpu_set_t cpu_set; int i, j, fd, ret; - for (i = 0; i < NR_SERVER; i++) { + for (i = 0; i < nr_server; i++) { CPU_ZERO(&cpu_set); CPU_SET(i, &cpu_set); @@ -162,7 +194,7 @@ void create_clients(struct __test_metadata *_metadata, ret = sched_setaffinity(0, sizeof(cpu_set), &cpu_set); ASSERT_EQ(ret, 0); - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (j = 0; j < nr_client_per_server; j++) { fd = socket(AF_INET, SOCK_STREAM, 0); ASSERT_NE(fd, -1); @@ -180,8 +212,8 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, int i, j, fd, cpu, ret, total = 0; socklen_t len = sizeof(int); - for (i = 0; i < NR_SERVER; i++) { - for (j = 0; j < CLIENT_PER_SERVER; j++) { + for (i = 0; i < nr_server; i++) { + for (j = 0; j < nr_client_per_server; j++) { /* If we see -EAGAIN here, SO_INCOMING_CPU is broken */ fd = accept(self->servers[i], &self->addr, &self->addrlen); ASSERT_NE(fd, -1); @@ -195,7 +227,7 @@ void verify_incoming_cpu(struct __test_metadata *_metadata, } } - ASSERT_EQ(total, NR_CLIENT); + ASSERT_EQ(total, nr_client); TH_LOG("SO_INCOMING_CPU is very likely to be " "working correctly with %d sockets.", total); } diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 2672ac0b6d1f..8457b7ccbc09 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -134,8 +134,11 @@ static void do_recv_one(int fdr, struct timed_send *ts) if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); - if (llabs(tstop - texpect) > cfg_variance_us) - error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + if (llabs(tstop - texpect) > cfg_variance_us) { + fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us); + if (!getenv("KSFT_MACHINE_SLOW")) + exit(1); + } } static void do_recv_verify_empty(int fdr) diff --git a/tools/testing/selftests/net/so_txtime.sh b/tools/testing/selftests/net/so_txtime.sh index 3f06f4d286a9..5e861ad32a42 100755 --- a/tools/testing/selftests/net/so_txtime.sh +++ b/tools/testing/selftests/net/so_txtime.sh @@ -5,6 +5,7 @@ set -e +readonly ksft_skip=4 readonly DEV="veth0" readonly BIN="./so_txtime" @@ -46,7 +47,7 @@ ip -netns "${NS2}" addr add 192.168.1.2/24 dev "${DEV}" ip -netns "${NS1}" addr add fd::1/64 dev "${DEV}" nodad ip -netns "${NS2}" addr add fd::2/64 dev "${DEV}" nodad -do_test() { +run_test() { local readonly IP="$1" local readonly CLOCK="$2" local readonly TXARGS="$3" @@ -64,12 +65,25 @@ do_test() { fi local readonly START="$(date +%s%N --date="+ 0.1 seconds")" + ip netns exec "${NS2}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${RXARGS}" -r & ip netns exec "${NS1}" "${BIN}" -"${IP}" -c "${CLOCK}" -t "${START}" -S "${SADDR}" -D "${DADDR}" "${TXARGS}" wait "$!" } +do_test() { + run_test $@ + [ $? -ne 0 ] && ret=1 +} + +do_fail_test() { + run_test $@ + [ $? -eq 0 ] && ret=1 +} + ip netns exec "${NS1}" tc qdisc add dev "${DEV}" root fq +set +e +ret=0 do_test 4 mono a,-1 a,-1 do_test 6 mono a,0 a,0 do_test 6 mono a,10 a,10 @@ -77,13 +91,20 @@ do_test 4 mono a,10,b,20 a,10,b,20 do_test 6 mono a,20,b,10 b,20,a,20 if ip netns exec "${NS1}" tc qdisc replace dev "${DEV}" root etf clockid CLOCK_TAI delta 400000; then - ! do_test 4 tai a,-1 a,-1 - ! do_test 6 tai a,0 a,0 + do_fail_test 4 tai a,-1 a,-1 + do_fail_test 6 tai a,0 a,0 do_test 6 tai a,10 a,10 do_test 4 tai a,10,b,20 a,10,b,20 do_test 6 tai a,20,b,10 b,10,a,20 else echo "tc ($(tc -V)) does not support qdisc etf. skipping" + [ $ret -eq 0 ] && ret=$ksft_skip fi -echo OK. All tests passed +if [ $ret -eq 0 ]; then + echo OK. All tests passed +elif [[ $ret -ne $ksft_skip && -n "$KSFT_MACHINE_SLOW" ]]; then + echo "Ignoring errors due to slow environment" 1>&2 + ret=0 +fi +exit $ret diff --git a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh index 441eededa031..02d617040793 100755 --- a/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt46_l3vpn_test.sh @@ -193,8 +193,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -250,26 +249,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -279,16 +274,14 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 @@ -299,8 +292,8 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -332,10 +325,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6046 @@ -379,18 +370,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -409,8 +403,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -428,8 +423,9 @@ check_hs_ipv6_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } @@ -438,8 +434,9 @@ check_hs_ipv4_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh index f96282362811..79fb81e63c59 100755 --- a/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt4_l3vpn_test.sh @@ -163,8 +163,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -219,27 +218,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} - - ip netns add ${nsname} + local id=$1 + eval local nsname=\${rt_${id}} ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -249,16 +243,13 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} - # set the networking for the host - ip netns add ${hsname} - # disable the rp_filter otherwise the kernel gets confused about how # to route decap ipv4 packets. ip netns exec ${rtname} sysctl -wq net.ipv4.conf.all.rp_filter=0 @@ -266,7 +257,7 @@ setup_hs() ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hs}/24 dev veth0 + ip -netns ${hsname} addr add ${IPv4_HS_NETWORK}.${hid}/24 dev veth0 ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +284,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6004 # set the encap route for encapsulating packets which arrive from the @@ -328,18 +317,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -358,8 +350,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -377,8 +370,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv4_HS_NETWORK}.${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh index b9b06ef80d88..e408406d8489 100755 --- a/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh +++ b/tools/testing/selftests/net/srv6_end_dt6_l3vpn_test.sh @@ -164,8 +164,7 @@ # +---------------------------------------------------+ # -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh readonly LOCALSID_TABLE_ID=90 readonly IPv6_RT_NETWORK=fd00 @@ -220,26 +219,22 @@ cleanup() ip link del veth-rt-1 2>/dev/null || true ip link del veth-rt-2 2>/dev/null || true - # destroy routers rt-* and hosts hs-* - for ns in $(ip netns show | grep -E 'rt-*|hs-*'); do - ip netns del ${ns} || true - done + cleanup_all_ns } # Setup the basic networking for the routers setup_rt_networking() { - local rt=$1 - local nsname=rt-${rt} + local id=$1 + eval local nsname=\${rt_${id}} - ip netns add ${nsname} - ip link set veth-rt-${rt} netns ${nsname} - ip -netns ${nsname} link set veth-rt-${rt} name veth0 + ip link set veth-rt-${id} netns ${nsname} + ip -netns ${nsname} link set veth-rt-${id} name veth0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${nsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 - ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${rt}/64 dev veth0 nodad + ip -netns ${nsname} addr add ${IPv6_RT_NETWORK}::${id}/64 dev veth0 nodad ip -netns ${nsname} link set veth0 up ip -netns ${nsname} link set lo up @@ -248,22 +243,20 @@ setup_rt_networking() setup_hs() { - local hs=$1 - local rt=$2 + local hid=$1 + local rid=$2 local tid=$3 - local hsname=hs-t${tid}-${hs} - local rtname=rt-${rt} + eval local hsname=\${hs_t${tid}_${hid}} + eval local rtname=\${rt_${rid}} local rtveth=veth-t${tid} # set the networking for the host - ip netns add ${hsname} - ip netns exec ${hsname} sysctl -wq net.ipv6.conf.all.accept_dad=0 ip netns exec ${hsname} sysctl -wq net.ipv6.conf.default.accept_dad=0 ip -netns ${hsname} link add veth0 type veth peer name ${rtveth} ip -netns ${hsname} link set ${rtveth} netns ${rtname} - ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hs}/64 dev veth0 nodad + ip -netns ${hsname} addr add ${IPv6_HS_NETWORK}::${hid}/64 dev veth0 nodad ip -netns ${hsname} link set veth0 up ip -netns ${hsname} link set lo up @@ -293,10 +286,8 @@ setup_vpn_config() local rtdst=$4 local tid=$5 - local hssrc_name=hs-t${tid}-${hssrc} - local hsdst_name=hs-t${tid}-${hsdst} - local rtsrc_name=rt-${rtsrc} - local rtdst_name=rt-${rtdst} + eval local rtsrc_name=\${rt_${rtsrc}} + eval local rtdst_name=\${rt_${rtdst}} local rtveth=veth-t${tid} local vpn_sid=${VPN_LOCATOR_SERVICE}:${hssrc}${hsdst}:${tid}::6006 @@ -331,18 +322,21 @@ setup() { ip link add veth-rt-1 type veth peer name veth-rt-2 # setup the networking for router rt-1 and router rt-2 + setup_ns rt_1 rt_2 setup_rt_networking 1 setup_rt_networking 2 # setup two hosts for the tenant 100. # - host hs-1 is directly connected to the router rt-1; # - host hs-2 is directly connected to the router rt-2. + setup_ns hs_t100_1 hs_t100_2 setup_hs 1 1 100 #args: host router tenant setup_hs 2 2 100 # setup two hosts for the tenant 200 # - host hs-3 is directly connected to the router rt-1; # - host hs-4 is directly connected to the router rt-2. + setup_ns hs_t200_3 hs_t200_4 setup_hs 3 1 200 setup_hs 4 2 200 @@ -361,8 +355,9 @@ check_rt_connectivity() { local rtsrc=$1 local rtdst=$2 + eval local nsname=\${rt_${rtsrc}} - ip netns exec rt-${rtsrc} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ + ip netns exec ${nsname} ping -c 1 -W 1 ${IPv6_RT_NETWORK}::${rtdst} \ >/dev/null 2>&1 } @@ -380,8 +375,9 @@ check_hs_connectivity() local hssrc=$1 local hsdst=$2 local tid=$3 + eval local nsname=\${hs_t${tid}_${hssrc}} - ip netns exec hs-t${tid}-${hssrc} ping -c 1 -W ${PING_TIMEOUT_SEC} \ + ip netns exec ${nsname} ping -c 1 -W ${PING_TIMEOUT_SEC} \ ${IPv6_HS_NETWORK}::${hsdst} >/dev/null 2>&1 } diff --git a/tools/testing/selftests/net/stress_reuseport_listen.sh b/tools/testing/selftests/net/stress_reuseport_listen.sh index 4de11da4092b..94d5d1a1c90f 100755 --- a/tools/testing/selftests/net/stress_reuseport_listen.sh +++ b/tools/testing/selftests/net/stress_reuseport_listen.sh @@ -2,18 +2,18 @@ # SPDX-License-Identifier: GPL-2.0 # Copyright (c) 2022 Meta Platforms, Inc. and affiliates. -NS='stress_reuseport_listen_ns' +source lib.sh NR_FILES=24100 SAVED_NR_FILES=$(ulimit -n) setup() { - ip netns add $NS + setup_ns NS ip netns exec $NS sysctl -q -w net.ipv6.ip_nonlocal_bind=1 ulimit -n $NR_FILES } cleanup() { - ip netns del $NS + cleanup_ns $NS ulimit -n $SAVED_NR_FILES } diff --git a/tools/testing/selftests/net/tcp_ao/.gitignore b/tools/testing/selftests/net/tcp_ao/.gitignore new file mode 100644 index 000000000000..e8bb81b715b7 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/.gitignore @@ -0,0 +1,2 @@ +*_ipv4 +*_ipv6 diff --git a/tools/testing/selftests/net/tcp_ao/Makefile b/tools/testing/selftests/net/tcp_ao/Makefile new file mode 100644 index 000000000000..522d991e310e --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/Makefile @@ -0,0 +1,56 @@ +# SPDX-License-Identifier: GPL-2.0 +TEST_BOTH_AF := bench-lookups +TEST_BOTH_AF += connect +TEST_BOTH_AF += connect-deny +TEST_BOTH_AF += icmps-accept icmps-discard +TEST_BOTH_AF += key-management +TEST_BOTH_AF += restore +TEST_BOTH_AF += rst +TEST_BOTH_AF += self-connect +TEST_BOTH_AF += seq-ext +TEST_BOTH_AF += setsockopt-closed +TEST_BOTH_AF += unsigned-md5 + +TEST_IPV4_PROGS := $(TEST_BOTH_AF:%=%_ipv4) +TEST_IPV6_PROGS := $(TEST_BOTH_AF:%=%_ipv6) + +TEST_GEN_PROGS := $(TEST_IPV4_PROGS) $(TEST_IPV6_PROGS) + +top_srcdir := ../../../../.. +include ../../lib.mk + +HOSTAR ?= ar + +LIBDIR := $(OUTPUT)/lib +LIB := $(LIBDIR)/libaotst.a +LDLIBS += $(LIB) -pthread +LIBDEPS := lib/aolib.h Makefile + +CFLAGS := -Wall -O2 -g -D_GNU_SOURCE -fno-strict-aliasing +CFLAGS += $(KHDR_INCLUDES) +CFLAGS += -iquote ./lib/ -I ../../../../include/ + +# Library +LIBSRC := kconfig.c netlink.c proc.c repair.c setup.c sock.c utils.c +LIBOBJ := $(LIBSRC:%.c=$(LIBDIR)/%.o) +EXTRA_CLEAN += $(LIBOBJ) $(LIB) + +$(LIB): $(LIBOBJ) + $(HOSTAR) rcs $@ $^ + +$(LIBDIR)/%.o: ./lib/%.c $(LIBDEPS) + mkdir -p $(LIBDIR) + $(CC) $< $(CFLAGS) $(CPPFLAGS) -o $@ -c + +$(TEST_GEN_PROGS): $(LIB) + +$(OUTPUT)/%_ipv4: %.c + $(LINK.c) $^ $(LDLIBS) -o $@ + +$(OUTPUT)/%_ipv6: %.c + $(LINK.c) -DIPV6_TEST $^ $(LDLIBS) -o $@ + +$(OUTPUT)/icmps-accept_ipv4: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/icmps-accept_ipv6: CFLAGS+= -DTEST_ICMPS_ACCEPT +$(OUTPUT)/bench-lookups_ipv4: LDLIBS+= -lm +$(OUTPUT)/bench-lookups_ipv6: LDLIBS+= -lm diff --git a/tools/testing/selftests/net/tcp_ao/bench-lookups.c b/tools/testing/selftests/net/tcp_ao/bench-lookups.c new file mode 100644 index 000000000000..a1e6e007c291 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/bench-lookups.c @@ -0,0 +1,360 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <arpa/inet.h> +#include <inttypes.h> +#include <math.h> +#include <stdlib.h> +#include <stdio.h> +#include <time.h> + +#include "../../../../include/linux/bits.h" +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +#define BENCH_NR_ITERS 100 /* number of times to run gathering statistics */ + +static void gen_test_ips(union tcp_addr *ips, size_t ips_nr, bool use_rand) +{ + union tcp_addr net = {}; + size_t i, j; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + if (!use_rand) { + for (i = 0; i < ips_nr; i++) + ips[i] = gen_tcp_addr(net, 2 * i + 1); + return; + } + for (i = 0; i < ips_nr; i++) { + size_t r = (size_t)random() | 0x1; + + ips[i] = gen_tcp_addr(net, r); + + for (j = i - 1; j > 0 && i > 0; j--) { + if (!memcmp(&ips[i], &ips[j], sizeof(union tcp_addr))) { + i--; /* collision */ + break; + } + } + } +} + +static void test_add_routes(union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + int err; + + err = ip_route_add(veth_name, TEST_FAMILY, this_ip_addr, *p); + if (err && err != -EEXIST) + test_error("Failed to add route"); + } +} + +static void server_apply_keys(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < ips_nr; i++) { + union tcp_addr *p = (union tcp_addr *)&ips[i]; + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); + } +} + +static const size_t nr_keys[] = { 512, 1024, 2048, 4096, 8192 }; +static union tcp_addr *test_ips; + +struct bench_stats { + uint64_t min; + uint64_t max; + uint64_t nr; + double mean; + double s2; +}; + +static struct bench_tests { + struct bench_stats delete_last_key; + struct bench_stats add_key; + struct bench_stats delete_rand_key; + struct bench_stats connect_last_key; + struct bench_stats connect_rand_key; + struct bench_stats delete_async; +} bench_results[ARRAY_SIZE(nr_keys)]; + +#define NSEC_PER_SEC 1000000000ULL + +static void measure_call(struct bench_stats *st, + void (*f)(int, void *), int sk, void *arg) +{ + struct timespec start = {}, end = {}; + double delta; + uint64_t nsec; + + if (clock_gettime(CLOCK_MONOTONIC, &start)) + test_error("clock_gettime()"); + + f(sk, arg); + + if (clock_gettime(CLOCK_MONOTONIC, &end)) + test_error("clock_gettime()"); + + nsec = (end.tv_sec - start.tv_sec) * NSEC_PER_SEC; + if (end.tv_nsec >= start.tv_nsec) + nsec += end.tv_nsec - start.tv_nsec; + else + nsec -= start.tv_nsec - end.tv_nsec; + + if (st->nr == 0) { + st->min = st->max = nsec; + } else { + if (st->min > nsec) + st->min = nsec; + if (st->max < nsec) + st->max = nsec; + } + + /* Welford-Knuth algorithm */ + st->nr++; + delta = (double)nsec - st->mean; + st->mean += delta / st->nr; + st->s2 += delta * ((double)nsec - st->mean); +} + +static void delete_mkt(int sk, void *arg) +{ + struct tcp_ao_del *ao = arg; + + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, ao, sizeof(*ao))) + test_error("setsockopt(TCP_AO_DEL_KEY)"); +} + +static void add_back_mkt(int sk, void *arg) +{ + union tcp_addr *p = arg; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, *p, -1, 100, 100)) + test_error("setsockopt(TCP_AO)"); +} + +static void bench_delete(int lsk, struct bench_stats *add, + struct bench_stats *del, + union tcp_addr *ips, size_t ips_nr, + bool rand_order, bool async) +{ + struct tcp_ao_del ao_del = {}; + union tcp_addr *p; + size_t i; + + ao_del.sndid = 100; + ao_del.rcvid = 100; + ao_del.del_async = !!async; + ao_del.prefix = DEFAULT_TEST_PREFIX; + + /* Remove the first added */ + p = (union tcp_addr *)&ips[0]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + + for (i = 0; i < BENCH_NR_ITERS; i++) { + measure_call(del, delete_mkt, lsk, (void *)&ao_del); + + /* Restore it back */ + measure_call(add, add_back_mkt, lsk, (void *)p); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + tcp_addr_to_sockaddr_in(&ao_del.addr, p, 0); + } +} + +static void bench_connect_srv(int lsk, union tcp_addr *ips, size_t ips_nr) +{ + size_t i; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk; + + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + close(sk); + } +} + +static void test_print_stats(const char *desc, size_t nr, struct bench_stats *bs) +{ + test_ok("%-20s\t%zu keys: min=%" PRIu64 "ms max=%" PRIu64 "ms mean=%gms stddev=%g", + desc, nr, bs->min / 1000000, bs->max / 1000000, + bs->mean / 1000000, sqrt((bs->mean / 1000000) / bs->nr)); +} + +static void *server_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + int lsk; + + test_ips = malloc(nr_keys[i] * sizeof(union tcp_addr)); + if (!test_ips) + test_error("malloc()"); + + lsk = test_listen_socket(this_ip_addr, test_server_port + i, 1); + + gen_test_ips(test_ips, nr_keys[i], false); + test_add_routes(test_ips, nr_keys[i]); + test_set_optmem(KERNEL_TCP_AO_KEY_SZ_ROUND_UP * nr_keys[i]); + server_apply_keys(lsk, test_ips, nr_keys[i]); + + synchronize_threads(); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + bench_connect_srv(lsk, test_ips, nr_keys[i]); + + /* The worst case for FILO-list */ + bench_delete(lsk, &bt->add_key, &bt->delete_last_key, + test_ips, nr_keys[i], false, false); + test_print_stats("Add a new key", + nr_keys[i], &bt->add_key); + test_print_stats("Delete: worst case", + nr_keys[i], &bt->delete_last_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_rand_key, + test_ips, nr_keys[i], true, false); + test_print_stats("Delete: random-search", + nr_keys[i], &bt->delete_rand_key); + + bench_delete(lsk, &bt->add_key, &bt->delete_async, + test_ips, nr_keys[i], false, true); + test_print_stats("Delete: async", nr_keys[i], &bt->delete_async); + + free(test_ips); + close(lsk); + } + + return NULL; +} + +static void connect_client(int sk, void *arg) +{ + size_t *p = arg; + + if (test_connect_socket(sk, this_ip_dest, test_server_port + *p) <= 0) + test_error("failed to connect()"); +} + +static void client_addr_setup(int sk, union tcp_addr taddr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = taddr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = taddr.a4, + }; +#endif + int ret; + + ret = ip_addr_add(veth_name, TEST_FAMILY, taddr, TEST_PREFIX); + if (ret && ret != -EEXIST) + test_error("Failed to add ip address"); + ret = ip_route_add(veth_name, TEST_FAMILY, taddr, this_ip_dest); + if (ret && ret != -EEXIST) + test_error("Failed to add route"); + + if (bind(sk, &addr, sizeof(addr))) + test_error("bind()"); +} + +static void bench_connect_client(size_t port_off, struct bench_tests *bt, + union tcp_addr *ips, size_t ips_nr, bool rand_order) +{ + struct bench_stats *con; + union tcp_addr *p; + size_t i; + + if (rand_order) + con = &bt->connect_rand_key; + else + con = &bt->connect_last_key; + + p = (union tcp_addr *)&ips[0]; + + for (i = 0; i < BENCH_NR_ITERS; i++) { + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + client_addr_setup(sk, *p); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + + measure_call(con, connect_client, sk, (void *)&port_off); + + close(sk); + + /* + * Slowest for FILO-linked-list: + * on (i) iteration removing ips[i] element. When it gets + * added to the list back - it becomes first to fetch, so + * on (i + 1) iteration go to ips[i + 1] element. + */ + if (rand_order) + p = (union tcp_addr *)&ips[rand() % ips_nr]; + else + p = (union tcp_addr *)&ips[i % ips_nr]; + } +} + +static void *client_fn(void *arg) +{ + size_t i; + + for (i = 0; i < ARRAY_SIZE(nr_keys); i++) { + struct bench_tests *bt = &bench_results[i]; + + synchronize_threads(); + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: worst case", + nr_keys[i], &bt->connect_last_key); + + bench_connect_client(i, bt, test_ips, nr_keys[i], false); + test_print_stats("Connect: random-search", + nr_keys[i], &bt->connect_last_key); + } + synchronize_threads(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(30, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/config b/tools/testing/selftests/net/tcp_ao/config new file mode 100644 index 000000000000..d3277a9de987 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/config @@ -0,0 +1,10 @@ +CONFIG_CRYPTO_HMAC=y +CONFIG_CRYPTO_RMD160=y +CONFIG_CRYPTO_SHA1=y +CONFIG_IPV6_MULTIPLE_TABLES=y +CONFIG_IPV6=y +CONFIG_NET_L3_MASTER_DEV=y +CONFIG_NET_VRF=y +CONFIG_TCP_AO=y +CONFIG_TCP_MD5SIG=y +CONFIG_VETH=m diff --git a/tools/testing/selftests/net/tcp_ao/connect-deny.c b/tools/testing/selftests/net/tcp_ao/connect-deny.c new file mode 100644 index 000000000000..185a2f6e5ff3 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect-deny.c @@ -0,0 +1,264 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) + +static inline int test_add_key_maclen(int sk, const char *key, uint8_t maclen, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, 0, sndid, rcvid, maclen, + 0, strlen(key), key); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void try_accept(const char *tst_name, unsigned int port, const char *pwd, + union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + const char *cnt_name, test_cnt cnt_expected, + fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (pwd && test_add_key_maclen(lsk, pwd, maclen, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (pwd && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + close(lsk); + if (pwd) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + + if (!cnt_name) + goto out; + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + +out: + synchronize_threads(); /* close() */ + if (sk > 0) + close(sk); +} + +static void *server_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_accept("Non-AO server + AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, + "TCPAOKeyNotFound", 0, FAULT_TIMEOUT); + + try_accept("AO server + Non-AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAORequired", TEST_CNT_AO_REQUIRED, FAULT_TIMEOUT); + + try_accept("Wrong password", port++, "something that is not DEFAULT_TEST_PASSWORD", + this_ip_dest, -1, 100, 100, 0, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 101, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 101, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, FAULT_TIMEOUT); + + try_accept("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 8, + "TCPAOBad", TEST_CNT_BAD, FAULT_TIMEOUT); + + try_accept("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, + "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, FAULT_TIMEOUT); + + try_accept("Client: Wrong addr", port++, NULL, + this_ip_dest, -1, 100, 100, 0, NULL, 0, FAULT_TIMEOUT); + + try_accept("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 200, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_accept("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + try_accept("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, + "TCPAOGood", TEST_CNT_GOOD, 0); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static void try_connect(const char *tst_name, unsigned int port, + const char *pwd, union tcp_addr addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid, + test_cnt cnt_expected, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + time_t timeout; + int sk, ret; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (pwd && test_add_key(sk, pwd, addr, prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (pwd && test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) { + test_ok("%s: connect() was prevented", tst_name); + } else if (ret == -ETIMEDOUT && fault(TIMEOUT)) { + test_ok("%s", tst_name); + } else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) { + test_ok("%s: refused to connect", tst_name); + } else { + test_error("%s: connect() returned %d", tst_name, ret); + } + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + if (pwd && ret > 0) { + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + } +out: + synchronize_threads(); /* close() */ + + if (ret > 0) + close(sk); +} + +static void *client_fn(void *arg) +{ + union tcp_addr wrong_addr, network_addr; + unsigned int port = test_server_port; + + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + + try_connect("Non-AO server + AO client", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("AO server + Non-AO client", port++, NULL, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong password", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong rcv id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Wrong snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Different maclen", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Server: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, 0, FAULT_TIMEOUT); + + try_connect("Client: Wrong addr", port++, DEFAULT_TEST_PASSWORD, + wrong_addr, -1, 100, 100, 0, FAULT_KEYREJECT); + + try_connect("rcv id != snd id", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 200, TEST_CNT_GOOD, 0); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_addr) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + try_connect("Server: prefix match", port++, DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100, TEST_CNT_GOOD, 0); + + try_connect("Client: prefix match", port++, DEFAULT_TEST_PASSWORD, + network_addr, 16, 100, 100, TEST_CNT_GOOD, 0); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(21, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/connect.c b/tools/testing/selftests/net/tcp_ao/connect.c new file mode 100644 index 000000000000..81653b47f303 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/connect.c @@ -0,0 +1,90 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static void *server_fn(void *arg) +{ + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); + + bytes = test_server_run(sk, 0, 0); + + test_fail("server served: %zd", bytes); + return NULL; +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + uint64_t before_aogood, after_aogood; + const size_t nr_packets = 20; + struct netstat *ns_before, *ns_after; + struct tcp_ao_counters ao1, ao2; + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + return NULL; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + netstat_print_diff(ns_before, ns_after); + netstat_free(ns_before); + netstat_free(ns_after); + + if (nr_packets > (after_aogood - before_aogood)) { + test_fail("TCPAOGood counter mismatch: %zu > (%zu - %zu)", + nr_packets, after_aogood, before_aogood); + return NULL; + } + if (test_tcp_ao_counters_cmp("connect", &ao1, &ao2, TEST_CNT_GOOD)) + return NULL; + + test_ok("connect TCPAOGood %" PRIu64 "/%" PRIu64 "/%" PRIu64 " => %" PRIu64 "/%" PRIu64 "/%" PRIu64 ", sent %" PRIu64, + before_aogood, ao1.ao_info_pkt_good, + ao1.key_cnts[0].pkt_good, + after_aogood, ao2.ao_info_pkt_good, + ao2.key_cnts[0].pkt_good, + nr_packets); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(1, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/icmps-accept.c b/tools/testing/selftests/net/tcp_ao/icmps-accept.c new file mode 120000 index 000000000000..0a5bb85eb260 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-accept.c @@ -0,0 +1 @@ +icmps-discard.c
\ No newline at end of file diff --git a/tools/testing/selftests/net/tcp_ao/icmps-discard.c b/tools/testing/selftests/net/tcp_ao/icmps-discard.c new file mode 100644 index 000000000000..d69bcba3c929 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/icmps-discard.c @@ -0,0 +1,449 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * Selftest that verifies that incomping ICMPs are ignored, + * the TCP connection stays alive, no hard or soft errors get reported + * to the usespace and the counter for ignored ICMPs is updated. + * + * RFC5925, 7.8: + * >> A TCP-AO implementation MUST default to ignore incoming ICMPv4 + * messages of Type 3 (destination unreachable), Codes 2-4 (protocol + * unreachable, port unreachable, and fragmentation needed -- ’hard + * errors’), and ICMPv6 Type 1 (destination unreachable), Code 1 + * (administratively prohibited) and Code 4 (port unreachable) intended + * for connections in synchronized states (ESTABLISHED, FIN-WAIT-1, FIN- + * WAIT-2, CLOSE-WAIT, CLOSING, LAST-ACK, TIME-WAIT) that match MKTs. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include <linux/icmp.h> +#include <linux/icmpv6.h> +#include <linux/ipv6.h> +#include <netinet/in.h> +#include <netinet/ip.h> +#include <sys/socket.h> +#include "aolib.h" +#include "../../../../include/linux/compiler.h" + +const size_t packets_nr = 20; +const size_t packet_size = 100; +const char *tcpao_icmps = "TCPAODroppedIcmps"; + +#ifdef IPV6_TEST +const char *dst_unreach = "Icmp6InDestUnreachs"; +const int sk_ip_level = SOL_IPV6; +const int sk_recverr = IPV6_RECVERR; +#else +const char *dst_unreach = "InDestUnreachs"; +const int sk_ip_level = SOL_IP; +const int sk_recverr = IP_RECVERR; +#endif + +/* Server is expected to fail with hard error if ::accept_icmp is set */ +#ifdef TEST_ICMPS_ACCEPT +# define test_icmps_fail test_ok +# define test_icmps_ok test_fail +#else +# define test_icmps_fail test_fail +# define test_icmps_ok test_ok +#endif + +static void serve_interfered(int sk) +{ + ssize_t test_quota = packet_size * packets_nr * 10; + uint64_t dest_unreach_a, dest_unreach_b; + uint64_t icmp_ignored_a, icmp_ignored_b; + struct tcp_ao_counters ao_cnt1, ao_cnt2; + bool counter_not_found; + struct netstat *ns_after, *ns_before; + ssize_t bytes; + + ns_before = netstat_read(); + dest_unreach_a = netstat_get(ns_before, dst_unreach, NULL); + icmp_ignored_a = netstat_get(ns_before, tcpao_icmps, NULL); + if (test_get_tcp_ao_counters(sk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + bytes = test_server_run(sk, test_quota, 0); + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + dest_unreach_b = netstat_get(ns_after, dst_unreach, NULL); + icmp_ignored_b = netstat_get(ns_after, tcpao_icmps, + &counter_not_found); + if (test_get_tcp_ao_counters(sk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + + netstat_free(ns_before); + netstat_free(ns_after); + + if (dest_unreach_a >= dest_unreach_b) { + test_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + dst_unreach, dest_unreach_a, dest_unreach_b); + return; + } + test_ok("%s delivered %" PRIu64, + dst_unreach, dest_unreach_b - dest_unreach_a); + if (bytes < 0) + test_icmps_fail("Server failed with %zd: %s", bytes, strerrordesc_np(-bytes)); + else + test_icmps_ok("Server survived %zd bytes of traffic", test_quota); + if (counter_not_found) { + test_fail("Not found %s counter", tcpao_icmps); + return; + } +#ifdef TEST_ICMPS_ACCEPT + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD); +#else + test_tcp_ao_counters_cmp(NULL, &ao_cnt1, &ao_cnt2, TEST_CNT_GOOD | TEST_CNT_AO_DROPPED_ICMP); +#endif + if (icmp_ignored_a >= icmp_ignored_b) { + test_icmps_fail("%s counter didn't change: %" PRIu64 " >= %" PRIu64, + tcpao_icmps, icmp_ignored_a, icmp_ignored_b); + return; + } + test_icmps_ok("ICMPs ignored %" PRIu64, icmp_ignored_b - icmp_ignored_a); +} + +static void *server_fn(void *arg) +{ + int val, sk, lsk; + bool accept_icmps = false; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + +#ifdef TEST_ICMPS_ACCEPT + accept_icmps = true; +#endif + + if (test_set_ao_flags(lsk, false, accept_icmps)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + /* Fail on hard ip errors, such as dest unreachable (RFC1122) */ + val = 1; + if (setsockopt(sk, sk_ip_level, sk_recverr, &val, sizeof(val))) + test_error("setsockopt()"); + + synchronize_threads(); + + serve_interfered(sk); + return NULL; +} + +static size_t packets_sent; +static size_t icmps_sent; + +static uint32_t checksum4_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum += words[i]; + if (len & 1) + sum += ((char *)data)[len - 1]; + return sum; +} + +static uint16_t checksum4_fold(void *data, size_t len, uint32_t sum) +{ + sum = checksum4_nofold(data, len, sum); + while (sum > 0xFFFF) + sum = (sum & 0xFFFF) + (sum >> 16); + return ~sum; +} + +static void set_ip4hdr(struct iphdr *iph, size_t packet_len, int proto, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + iph->version = 4; + iph->ihl = 5; + iph->tos = 0; + iph->tot_len = htons(packet_len); + iph->ttl = 2; + iph->protocol = proto; + iph->saddr = src->sin_addr.s_addr; + iph->daddr = dst->sin_addr.s_addr; + iph->check = checksum4_fold((void *)iph, iph->ihl << 1, 0); +} + +static void icmp_interfere4(uint8_t type, uint8_t code, uint32_t rcv_nxt, + struct sockaddr_in *src, struct sockaddr_in *dst) +{ + int sk = socket(AF_INET, SOCK_RAW, IPPROTO_RAW); + struct { + struct iphdr iph; + struct icmphdr icmph; + struct iphdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + if (sk < 0) + test_error("socket(AF_INET, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet); + set_ip4hdr(&packet.iph, packet_len, IPPROTO_ICMP, src, dst); + + packet.icmph.type = type; + packet.icmph.code = code; + if (code == ICMP_FRAG_NEEDED) { + randomize_buffer(&packet.icmph.un.frag.mtu, + sizeof(packet.icmph.un.frag.mtu)); + } + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip4hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin_port; + packet.tcph.dport = src->sin_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + packet.icmph.checksum = checksum4_fold((void *)&packet.icmph, + packet_len, 0); + + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)dst, sizeof(*dst)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static void set_ip6hdr(struct ipv6hdr *iph, size_t packet_len, int proto, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + iph->version = 6; + iph->payload_len = htons(packet_len); + iph->nexthdr = proto; + iph->hop_limit = 2; + iph->saddr = src->sin6_addr; + iph->daddr = dst->sin6_addr; +} + +static inline uint16_t csum_fold(uint32_t csum) +{ + uint32_t sum = csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (uint16_t)~sum; +} + +static inline uint32_t csum_add(uint32_t csum, uint32_t addend) +{ + uint32_t res = csum; + + res += addend; + return res + (res < addend); +} + +noinline uint32_t checksum6_nofold(void *data, size_t len, uint32_t sum) +{ + uint16_t *words = data; + size_t i; + + for (i = 0; i < len / sizeof(uint16_t); i++) + sum = csum_add(sum, words[i]); + if (len & 1) + sum = csum_add(sum, ((char *)data)[len - 1]); + return sum; +} + +noinline uint16_t icmp6_checksum(struct sockaddr_in6 *src, + struct sockaddr_in6 *dst, + void *ptr, size_t len, uint8_t proto) +{ + struct { + struct in6_addr saddr; + struct in6_addr daddr; + uint32_t payload_len; + uint8_t zero[3]; + uint8_t nexthdr; + } pseudo_header = {}; + uint32_t sum; + + pseudo_header.saddr = src->sin6_addr; + pseudo_header.daddr = dst->sin6_addr; + pseudo_header.payload_len = htonl(len); + pseudo_header.nexthdr = proto; + + sum = checksum6_nofold(&pseudo_header, sizeof(pseudo_header), 0); + sum = checksum6_nofold(ptr, len, sum); + + return csum_fold(sum); +} + +static void icmp6_interfere(int type, int code, uint32_t rcv_nxt, + struct sockaddr_in6 *src, struct sockaddr_in6 *dst) +{ + int sk = socket(AF_INET6, SOCK_RAW, IPPROTO_RAW); + struct sockaddr_in6 dst_raw = *dst; + struct { + struct ipv6hdr iph; + struct icmp6hdr icmph; + struct ipv6hdr iphe; + struct { + uint16_t sport; + uint16_t dport; + uint32_t seq; + } tcph; + } packet = {}; + size_t packet_len; + ssize_t bytes; + + + if (sk < 0) + test_error("socket(AF_INET6, SOCK_RAW, IPPROTO_RAW)"); + + packet_len = sizeof(packet) - sizeof(packet.iph); + set_ip6hdr(&packet.iph, packet_len, IPPROTO_ICMPV6, src, dst); + + packet.icmph.icmp6_type = type; + packet.icmph.icmp6_code = code; + + packet_len = sizeof(packet.iphe) + sizeof(packet.tcph); + set_ip6hdr(&packet.iphe, packet_len, IPPROTO_TCP, dst, src); + + packet.tcph.sport = dst->sin6_port; + packet.tcph.dport = src->sin6_port; + packet.tcph.seq = htonl(rcv_nxt); + + packet_len = sizeof(packet) - sizeof(packet.iph); + + packet.icmph.icmp6_cksum = icmp6_checksum(src, dst, + (void *)&packet.icmph, packet_len, IPPROTO_ICMPV6); + + dst_raw.sin6_port = htons(IPPROTO_RAW); + bytes = sendto(sk, &packet, sizeof(packet), 0, + (struct sockaddr *)&dst_raw, sizeof(dst_raw)); + if (bytes != sizeof(packet)) + test_error("send(): %zd", bytes); + icmps_sent++; + + close(sk); +} + +static uint32_t get_rcv_nxt(int sk) +{ + int val = TCP_REPAIR_ON; + uint32_t ret; + socklen_t sz = sizeof(ret); + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + val = TCP_RECV_QUEUE; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + if (getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &ret, &sz)) + test_error("getsockopt(TCP_QUEUE_SEQ)"); + val = TCP_REPAIR_OFF_NO_WP; + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); + return ret; +} + +static void icmp_interfere(const size_t nr, uint32_t rcv_nxt, void *src, void *dst) +{ + struct sockaddr_in *saddr4 = src; + struct sockaddr_in *daddr4 = dst; + struct sockaddr_in6 *saddr6 = src; + struct sockaddr_in6 *daddr6 = dst; + size_t i; + + if (saddr4->sin_family != daddr4->sin_family) + test_error("Different address families"); + + for (i = 0; i < nr; i++) { + if (saddr4->sin_family == AF_INET) { + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PROT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_PORT_UNREACH, + rcv_nxt, saddr4, daddr4); + icmp_interfere4(ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED, + rcv_nxt, saddr4, daddr4); + icmps_sent += 3; + } else if (saddr4->sin_family == AF_INET6) { + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_ADM_PROHIBITED, + rcv_nxt, saddr6, daddr6); + icmp6_interfere(ICMPV6_DEST_UNREACH, + ICMPV6_PORT_UNREACH, + rcv_nxt, saddr6, daddr6); + icmps_sent += 2; + } else { + test_error("Not ip address family"); + } + } +} + +static void send_interfered(int sk) +{ + const unsigned int timeout = TEST_TIMEOUT_SEC; + struct sockaddr_in6 src, dst; + socklen_t addr_sz; + + addr_sz = sizeof(src); + if (getsockname(sk, &src, &addr_sz)) + test_error("getsockname()"); + addr_sz = sizeof(dst); + if (getpeername(sk, &dst, &addr_sz)) + test_error("getpeername()"); + + while (1) { + uint32_t rcv_nxt; + + if (test_client_verify(sk, packet_size, packets_nr, timeout)) { + test_fail("client: connection is broken"); + return; + } + packets_sent += packets_nr; + rcv_nxt = get_rcv_nxt(sk); + icmp_interfere(packets_nr, rcv_nxt, (void *)&src, (void *)&dst); + } +} + +static void *client_fn(void *arg) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + synchronize_threads(); + + send_interfered(sk); + + /* Not expecting client to quit */ + test_fail("client disconnected"); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(3, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/key-management.c b/tools/testing/selftests/net/tcp_ao/key-management.c new file mode 100644 index 000000000000..24e62120b792 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/key-management.c @@ -0,0 +1,1186 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +union tcp_addr wrong_addr; +#define SECOND_PASSWORD "at all times sincere friends of freedom have been rare" +#define fault(type) (inj == FAULT_ ## type) + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF"); +} + + +static int prepare_sk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("test_add_key()"); + + if (addr && test_add_key(sk, SECOND_PASSWORD, *addr, + DEFAULT_TEST_PREFIX, sndid, rcvid)) + test_error("test_add_key()"); + + return sk; +} + +static int prepare_lsk(union tcp_addr *addr, uint8_t sndid, uint8_t rcvid) +{ + int sk = prepare_sk(addr, sndid, rcvid); + + if (listen(sk, 10)) + test_error("listen()"); + + return sk; +} + +static int test_del_key(int sk, uint8_t sndid, uint8_t rcvid, bool async, + int current_key, int rnext_key) +{ + struct tcp_ao_info_opt ao_info = {}; + struct tcp_ao_getsockopt key = {}; + struct tcp_ao_del del = {}; + sockaddr_af sockaddr; + int err; + + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_dest, 0); + del.prefix = DEFAULT_TEST_PREFIX; + del.sndid = sndid; + del.rcvid = rcvid; + + if (current_key >= 0) { + del.set_current = 1; + del.current_key = (uint8_t)current_key; + } + if (rnext_key >= 0) { + del.set_rnext = 1; + del.rnext = (uint8_t)rnext_key; + } + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_DEL_KEY, &del, sizeof(del)); + if (err < 0) + return -errno; + + if (async) + return 0; + + tcp_addr_to_sockaddr_in(&sockaddr, &this_ip_dest, 0); + err = test_get_one_ao(sk, &key, &sockaddr, sizeof(sockaddr), + DEFAULT_TEST_PREFIX, sndid, rcvid); + if (!err) + return -EEXIST; + if (err != -E2BIG) + test_error("getsockopt()"); + if (current_key < 0 && rnext_key < 0) + return 0; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_key >= 0 && ao_info.current_key != (uint8_t)current_key) + return -ENOTRECOVERABLE; + if (rnext_key >= 0 && ao_info.rnext != (uint8_t)rnext_key) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_delete_key(char *tst_name, int sk, uint8_t sndid, uint8_t rcvid, + bool async, int current_key, int rnext_key, + fault_t inj) +{ + int err; + + err = test_del_key(sk, sndid, rcvid, async, current_key, rnext_key); + if ((err == -EBUSY && fault(BUSY)) || (err == -EINVAL && fault(CURRNEXT))) { + test_ok("%s: key deletion was prevented", tst_name); + return; + } + if (err && fault(FIXME)) { + test_xfail("%s: failed to delete the key %u:%u %d", + tst_name, sndid, rcvid, err); + return; + } + if (!err) { + if (fault(BUSY) || fault(CURRNEXT)) { + test_fail("%s: the key was deleted %u:%u %d", tst_name, + sndid, rcvid, err); + } else { + test_ok("%s: the key was deleted", tst_name); + } + return; + } + test_fail("%s: can't delete the key %u:%u %d", tst_name, sndid, rcvid, err); +} + +static int test_set_key(int sk, int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + if (current_keyid >= 0) { + ao_info.set_current = 1; + ao_info.current_key = (uint8_t)current_keyid; + } + if (rnext_keyid >= 0) { + ao_info.set_rnext = 1; + ao_info.rnext = (uint8_t)rnext_keyid; + } + + err = test_set_ao_info(sk, &ao_info); + if (err) + return err; + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (current_keyid >= 0 && ao_info.current_key != (uint8_t)current_keyid) + return -ENOTRECOVERABLE; + if (rnext_keyid >= 0 && ao_info.rnext != (uint8_t)rnext_keyid) + return -ENOTRECOVERABLE; + return 0; +} + +static int test_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_key(&tmp, DEFAULT_TEST_ALGO, in_addr, + set_current, set_rnext, + prefix, 0, sndid, rcvid, 0, keyflags, + strlen(key), key); + if (err) + return err; + + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static int __try_add_current_rnext_key(int sk, const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_info_opt ao_info = {}; + int err; + + err = test_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (err) + return err; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + if (set_current && ao_info.current_key != sndid) + return -ENOTRECOVERABLE; + if (set_rnext && ao_info.rnext != rcvid) + return -ENOTRECOVERABLE; + return 0; +} + +static void try_add_current_rnext_key(char *tst_name, int sk, const char *key, + uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + bool set_current, bool set_rnext, + uint8_t sndid, uint8_t rcvid, fault_t inj) +{ + int err; + + err = __try_add_current_rnext_key(sk, key, keyflags, in_addr, prefix, + set_current, set_rnext, sndid, rcvid); + if (!err && !fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + if (err == -EINVAL && fault(CURRNEXT)) { + test_ok("%s", tst_name); + return; + } + test_fail("%s", tst_name); +} + +static void check_closed_socket(void) +{ + int sk; + + sk = prepare_sk(&this_ip_dest, 200, 200); + try_delete_key("closed socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("closed socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_delete_key("closed socket, delete current key", sk, 100, 100, 0, -1, -1, FAULT_BUSY); + try_delete_key("closed socket, delete rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("closed socket, delete a key + set current/rnext", sk, 100, 100, 0, 10, 13, 0); + try_delete_key("closed socket, force-delete current key", sk, 10, 11, 0, 200, -1, 0); + try_delete_key("closed socket, force-delete rnext key", sk, 12, 13, 0, -1, 200, 0); + try_delete_key("closed socket, delete current+rnext key", sk, 200, 200, 0, -1, -1, FAULT_BUSY); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + try_add_current_rnext_key("closed socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, 0); + try_add_current_rnext_key("closed socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, 0); + close(sk); +} + +static void assert_no_current_rnext(const char *tst_msg, int sk) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (ao_info.set_current || ao_info.set_rnext) { + test_xfail("%s: the socket has current/rnext keys: %d:%d", + tst_msg, + (ao_info.set_current) ? ao_info.current_key : -1, + (ao_info.set_rnext) ? ao_info.rnext : -1); + } else { + test_ok("%s: the socket has no current/rnext keys", tst_msg); + } +} + +static void assert_no_tcp_repair(void) +{ + struct tcp_ao_repair ao_img = {}; + socklen_t len = sizeof(ao_img); + int sk, err; + + sk = prepare_sk(&this_ip_dest, 200, 200); + test_enable_repair(sk); + if (listen(sk, 10)) + test_error("listen()"); + errno = 0; + err = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, &len); + if (err && errno == EPERM) + test_ok("listen socket, getsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, getsockopt(TCP_AO_REPAIR) works"); + errno = 0; + err = setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, &ao_img, sizeof(ao_img)); + if (err && errno == EPERM) + test_ok("listen socket, setsockopt(TCP_AO_REPAIR) is restricted"); + else + test_fail("listen socket, setsockopt(TCP_AO_REPAIR) works"); + close(sk); +} + +static void check_listen_socket(void) +{ + int sk, err; + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_delete_key("listen socket, delete a key", sk, 200, 200, 0, -1, -1, 0); + try_delete_key("listen socket, delete all keys", sk, 100, 100, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + err = test_set_key(sk, 100, -1); + if (err == -EINVAL) + test_ok("listen socket, setting current key not allowed"); + else + test_fail("listen socket, set current key"); + err = test_set_key(sk, -1, 200); + if (err == -EINVAL) + test_ok("listen socket, setting rnext key not allowed"); + else + test_fail("listen socket, set rnext key"); + close(sk); + + sk = prepare_sk(&this_ip_dest, 200, 200); + if (test_set_key(sk, 100, 200)) + test_error("failed to set current/rnext keys"); + if (listen(sk, 10)) + test_error("listen()"); + assert_no_current_rnext("listen() after current/rnext keys set", sk); + try_delete_key("listen socket, delete current key from before listen()", sk, 100, 100, 0, -1, -1, FAULT_FIXME); + try_delete_key("listen socket, delete rnext key from before listen()", sk, 200, 200, 0, -1, -1, FAULT_FIXME); + close(sk); + + assert_no_tcp_repair(); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + if (test_add_key(sk, "Glory to heros!", this_ip_dest, + DEFAULT_TEST_PREFIX, 10, 11)) + test_error("test_add_key()"); + if (test_add_key(sk, "Glory to Ukraine!", this_ip_dest, + DEFAULT_TEST_PREFIX, 12, 13)) + test_error("test_add_key()"); + try_delete_key("listen socket, delete a key + set current/rnext", sk, + 100, 100, 0, 10, 13, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete current key", sk, + 10, 11, 0, 200, -1, FAULT_CURRNEXT); + try_delete_key("listen socket, force-delete rnext key", sk, + 12, 13, 0, -1, 200, FAULT_CURRNEXT); + try_delete_key("listen socket, delete a key", sk, + 200, 200, 0, -1, -1, 0); + close(sk); + + sk = prepare_lsk(&this_ip_dest, 200, 200); + try_add_current_rnext_key("listen socket, add + change current key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + true, false, 10, 20, FAULT_CURRNEXT); + try_add_current_rnext_key("listen socket, add + change rnext key", + sk, "Laaaa! Lalala-la-la-lalala...", 0, + this_ip_dest, DEFAULT_TEST_PREFIX, + false, true, 20, 10, FAULT_CURRNEXT); + close(sk); +} + +static const char *fips_fpath = "/proc/sys/crypto/fips_enabled"; +static bool is_fips_enabled(void) +{ + static int fips_checked = -1; + FILE *fenabled; + int enabled; + + if (fips_checked >= 0) + return !!fips_checked; + if (access(fips_fpath, R_OK)) { + if (errno != ENOENT) + test_error("Can't open %s", fips_fpath); + fips_checked = 0; + return false; + } + fenabled = fopen(fips_fpath, "r"); + if (!fenabled) + test_error("Can't open %s", fips_fpath); + if (fscanf(fenabled, "%d", &enabled) != 1) + test_error("Can't read from %s", fips_fpath); + fclose(fenabled); + fips_checked = !!enabled; + return !!fips_checked; +} + +struct test_key { + char password[TCP_AO_MAXKEYLEN]; + const char *alg; + unsigned int len; + uint8_t client_keyid; + uint8_t server_keyid; + uint8_t maclen; + uint8_t matches_client : 1, + matches_server : 1, + matches_vrf : 1, + is_current : 1, + is_rnext : 1, + used_on_server_tx : 1, + used_on_client_tx : 1, + skip_counters_checks : 1; +}; + +struct key_collection { + unsigned int nr_keys; + struct test_key *keys; +}; + +static struct key_collection collection; + +#define TEST_MAX_MACLEN 16 +const char *test_algos[] = { + "cmac(aes128)", + "hmac(sha1)", "hmac(sha512)", "hmac(sha384)", "hmac(sha256)", + "hmac(sha224)", "hmac(sha3-512)", + /* only if !CONFIG_FIPS */ +#define TEST_NON_FIPS_ALGOS 2 + "hmac(rmd160)", "hmac(md5)" +}; +const unsigned int test_maclens[] = { 1, 4, 12, 16 }; +#define MACLEN_SHIFT 2 +#define ALGOS_SHIFT 4 + +static unsigned int make_mask(unsigned int shift, unsigned int prev_shift) +{ + unsigned int ret = BIT(shift) - 1; + + return ret << prev_shift; +} + +static void init_key_in_collection(unsigned int index, bool randomized) +{ + struct test_key *key = &collection.keys[index]; + unsigned int algos_nr, algos_index; + + /* Same for randomized and non-randomized test flows */ + key->client_keyid = index; + key->server_keyid = 127 + index; + key->matches_client = 1; + key->matches_server = 1; + key->matches_vrf = 1; + /* not really even random, but good enough for a test */ + key->len = rand() % (TCP_AO_MAXKEYLEN - TEST_TCP_AO_MINKEYLEN); + key->len += TEST_TCP_AO_MINKEYLEN; + randomize_buffer(key->password, key->len); + + if (randomized) { + key->maclen = (rand() % TEST_MAX_MACLEN) + 1; + algos_index = rand(); + } else { + unsigned int shift = MACLEN_SHIFT; + + key->maclen = test_maclens[index & make_mask(shift, 0)]; + algos_index = index & make_mask(ALGOS_SHIFT, shift); + } + algos_nr = ARRAY_SIZE(test_algos); + if (is_fips_enabled()) + algos_nr -= TEST_NON_FIPS_ALGOS; + key->alg = test_algos[algos_index % algos_nr]; +} + +static int init_default_key_collection(unsigned int nr_keys, bool randomized) +{ + size_t key_sz = sizeof(collection.keys[0]); + + if (!nr_keys) { + free(collection.keys); + collection.keys = NULL; + return 0; + } + + /* + * All keys have uniq sndid/rcvid and sndid != rcvid in order to + * check for any bugs/issues for different keyids, visible to both + * peers. Keyid == 254 is unused. + */ + if (nr_keys > 127) + test_error("Test requires too many keys, correct the source"); + + collection.keys = reallocarray(collection.keys, nr_keys, key_sz); + if (!collection.keys) + return -ENOMEM; + + memset(collection.keys, 0, nr_keys * key_sz); + collection.nr_keys = nr_keys; + while (nr_keys--) + init_key_in_collection(nr_keys, randomized); + + return 0; +} + +static void test_key_error(const char *msg, struct test_key *key) +{ + test_error("%s: key: { %s, %u:%u, %u, %u:%u:%u:%u:%u (%u)}", + msg, key->alg, key->client_keyid, key->server_keyid, + key->maclen, key->matches_client, key->matches_server, + key->matches_vrf, key->is_current, key->is_rnext, key->len); +} + +static int test_add_key_cr(int sk, const char *pwd, unsigned int pwd_len, + union tcp_addr addr, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, + uint8_t maclen, const char *alg, + bool set_current, bool set_rnext) +{ + struct tcp_ao_add tmp = {}; + uint8_t keyflags = 0; + int err; + + if (!alg) + alg = DEFAULT_TEST_ALGO; + + if (vrf) + keyflags |= TCP_AO_KEYF_IFINDEX; + err = test_prepare_key(&tmp, alg, addr, set_current, set_rnext, + DEFAULT_TEST_PREFIX, vrf, sndid, rcvid, maclen, + keyflags, pwd_len, pwd); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static void verify_current_rnext(const char *tst, int sk, + int current_keyid, int rnext_keyid) +{ + struct tcp_ao_info_opt ao_info = {}; + + if (test_get_ao_info(sk, &ao_info)) + test_error("getsockopt(TCP_AO_INFO) failed"); + + errno = 0; + if (current_keyid >= 0) { + if (!ao_info.set_current) + test_fail("%s: the socket doesn't have current key", tst); + else if (ao_info.current_key != current_keyid) + test_fail("%s: current key is not the expected one %d != %u", + tst, current_keyid, ao_info.current_key); + else + test_ok("%s: current key %u as expected", + tst, ao_info.current_key); + } + if (rnext_keyid >= 0) { + if (!ao_info.set_rnext) + test_fail("%s: the socket doesn't have rnext key", tst); + else if (ao_info.rnext != rnext_keyid) + test_fail("%s: rnext key is not the expected one %d != %u", + tst, rnext_keyid, ao_info.rnext); + else + test_ok("%s: rnext key %u as expected", tst, ao_info.rnext); + } +} + + +static int key_collection_socket(bool server, unsigned int port) +{ + unsigned int i; + int sk; + + if (server) + sk = test_listen_socket(this_ip_addr, port, 1); + else + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + union tcp_addr *addr = &wrong_addr; + uint8_t sndid, rcvid, vrf; + bool set_current = false, set_rnext = false; + + if (key->matches_vrf) + vrf = 0; + else + vrf = test_vrf_ifindex; + if (server) { + if (key->matches_client) + addr = &this_ip_dest; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + addr = &this_ip_dest; + sndid = key->client_keyid; + rcvid = key->server_keyid; + key->used_on_client_tx = set_current = key->is_current; + key->used_on_server_tx = set_rnext = key->is_rnext; + } + + if (test_add_key_cr(sk, key->password, key->len, + *addr, vrf, sndid, rcvid, key->maclen, + key->alg, set_current, set_rnext)) + test_key_error("setsockopt(TCP_AO_ADD_KEY)", key); +#ifdef DEBUG + test_print("%s [%u/%u] key: { %s, %u:%u, %u, %u:%u:%u:%u (%u)}", + server ? "server" : "client", i, collection.nr_keys, + key->alg, rcvid, sndid, key->maclen, + key->matches_client, key->matches_server, + key->is_current, key->is_rnext, key->len); +#endif + } + return sk; +} + +static void verify_counters(const char *tst_name, bool is_listen_sk, bool server, + struct tcp_ao_counters *a, struct tcp_ao_counters *b) +{ + unsigned int i; + + __test_tcp_ao_counters_cmp(tst_name, a, b, TEST_CNT_GOOD); + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + uint8_t sndid, rcvid; + bool rx_cnt_expected; + + if (key->skip_counters_checks) + continue; + if (server) { + sndid = key->server_keyid; + rcvid = key->client_keyid; + rx_cnt_expected = key->used_on_client_tx; + } else { + sndid = key->client_keyid; + rcvid = key->server_keyid; + rx_cnt_expected = key->used_on_server_tx; + } + + test_tcp_ao_key_counters_cmp(tst_name, a, b, + rx_cnt_expected ? TEST_CNT_KEY_GOOD : 0, + sndid, rcvid); + } + test_tcp_ao_counters_free(a); + test_tcp_ao_counters_free(b); + test_ok("%s: passed counters checks", tst_name); +} + +static struct tcp_ao_getsockopt *lookup_key(struct tcp_ao_getsockopt *buf, + size_t len, int sndid, int rcvid) +{ + size_t i; + + for (i = 0; i < len; i++) { + if (sndid >= 0 && buf[i].sndid != sndid) + continue; + if (rcvid >= 0 && buf[i].rcvid != rcvid) + continue; + return &buf[i]; + } + return NULL; +} + +static void verify_keys(const char *tst_name, int sk, + bool is_listen_sk, bool server) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + struct tcp_ao_getsockopt *keys; + bool passed_test = true; + unsigned int i; + + keys = calloc(collection.nr_keys, len); + if (!keys) + test_error("calloc()"); + + keys->nkeys = collection.nr_keys; + keys->get_all = 1; + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, keys, &len)) { + free(keys); + test_error("getsockopt(TCP_AO_GET_KEYS)"); + } + + for (i = 0; i < collection.nr_keys; i++) { + struct test_key *key = &collection.keys[i]; + struct tcp_ao_getsockopt *dump_key; + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + uint8_t sndid, rcvid; + bool matches = false; + + if (server) { + if (key->matches_client) + matches = true; + sndid = key->server_keyid; + rcvid = key->client_keyid; + } else { + if (key->matches_server) + matches = true; + sndid = key->client_keyid; + rcvid = key->server_keyid; + } + if (!key->matches_vrf) + matches = false; + /* no keys get removed on the original listener socket */ + if (is_listen_sk) + matches = true; + + dump_key = lookup_key(keys, keys->nkeys, sndid, rcvid); + if (matches != !!dump_key) { + test_fail("%s: key %u:%u %s%s on the socket", + tst_name, sndid, rcvid, + key->matches_vrf ? "" : "[vrf] ", + matches ? "disappeared" : "yet present"); + passed_test = false; + goto out; + } + if (!dump_key) + continue; + + if (!strcmp("cmac(aes128)", key->alg)) { + is_kdf_aes_128_cmac = (key->len != 16); + is_cmac_aes = true; + } + + if (is_cmac_aes) { + if (strcmp(dump_key->alg_name, "cmac(aes)")) { + test_fail("%s: key %u:%u cmac(aes) has unexpected alg %s", + tst_name, sndid, rcvid, + dump_key->alg_name); + passed_test = false; + continue; + } + } else if (strcmp(dump_key->alg_name, key->alg)) { + test_fail("%s: key %u:%u has unexpected alg %s != %s", + tst_name, sndid, rcvid, + dump_key->alg_name, key->alg); + passed_test = false; + continue; + } + if (is_kdf_aes_128_cmac) { + if (dump_key->keylen != 16) { + test_fail("%s: key %u:%u cmac(aes128) has unexpected len %u", + tst_name, sndid, rcvid, + dump_key->keylen); + continue; + } + } else if (dump_key->keylen != key->len) { + test_fail("%s: key %u:%u changed password len %u != %u", + tst_name, sndid, rcvid, + dump_key->keylen, key->len); + passed_test = false; + continue; + } + if (!is_kdf_aes_128_cmac && + memcmp(dump_key->key, key->password, key->len)) { + test_fail("%s: key %u:%u has different password", + tst_name, sndid, rcvid); + passed_test = false; + continue; + } + if (dump_key->maclen != key->maclen) { + test_fail("%s: key %u:%u changed maclen %u != %u", + tst_name, sndid, rcvid, + dump_key->maclen, key->maclen); + passed_test = false; + continue; + } + } + + if (passed_test) + test_ok("%s: The socket keys are consistent with the expectations", + tst_name); +out: + free(keys); +} + +static int start_server(const char *tst_name, unsigned int port, size_t quota, + struct tcp_ao_counters *begin, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters lsk_c1, lsk_c2; + ssize_t bytes; + int sk, lsk; + + synchronize_threads(); /* 1: key collection initialized */ + lsk = key_collection_socket(true, port); + if (test_get_tcp_ao_counters(lsk, &lsk_c1)) + test_error("test_get_tcp_ao_counters()"); + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + if (test_get_tcp_ao_counters(sk, begin)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: accepted => send data */ + if (test_get_tcp_ao_counters(lsk, &lsk_c2)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, lsk, true, true); + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + + verify_counters(tst_name, true, true, &lsk_c1, &lsk_c2); + + return sk; +} + +static void end_server(const char *tst_name, int sk, + struct tcp_ao_counters *begin) +{ + struct tcp_ao_counters end; + + if (test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, true); + + synchronize_threads(); /* 4: verified => closed */ + close(sk); + + verify_counters(tst_name, false, true, begin, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_server_run(const char *tst_name, unsigned int port, size_t quota, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + end_server(tst_name, sk, &tmp); +} + +static void server_rotations(const char *tst_name, unsigned int port, + size_t quota, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_server(tst_name, port, quota, &tmp, + current_index, rnext_index); + + for (i = current_index + 1; rotations > 0; i++, rotations--) { + ssize_t bytes; + + if (i >= collection.nr_keys) + i = 0; + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + return; + } + verify_current_rnext(tst_name, sk, + collection.keys[i].server_keyid, -1); + synchronize_threads(); /* verify current/rnext */ + } + end_server(tst_name, sk, &tmp); +} + +static int run_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + int sk; + + synchronize_threads(); /* 1: key collection initialized */ + sk = key_collection_socket(false, port); + + if (current_index >= 0 || rnext_index >= 0) { + int sndid = -1, rcvid = -1; + + if (current_index >= 0) + sndid = collection.keys[current_index].client_keyid; + if (rnext_index >= 0) + rcvid = collection.keys[rnext_index].server_keyid; + if (test_set_key(sk, sndid, rcvid)) + test_error("failed to set current/rnext keys"); + } + if (before && test_get_tcp_ao_counters(sk, before)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 2: MKTs added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port++) <= 0) + test_error("failed to connect()"); + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + collection.keys[current_index].used_on_client_tx = 1; + collection.keys[rnext_index].used_on_server_tx = 1; + + synchronize_threads(); /* 3: accepted => send data */ + if (test_client_verify(sk, msg_sz, msg_nr, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + if (before) + test_tcp_ao_counters_free(before); + return -1; + } + + return sk; +} + +static int start_client(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index, + struct tcp_ao_counters *before, + const size_t msg_sz, const size_t msg_nr) +{ + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, before, msg_sz, msg_nr); +} + +static void end_client(const char *tst_name, int sk, unsigned int nr_keys, + int current_index, int rnext_index, + struct tcp_ao_counters *start) +{ + struct tcp_ao_counters end; + + /* Some application may become dependent on this kernel choice */ + if (current_index < 0) + current_index = nr_keys - 1; + if (rnext_index < 0) + rnext_index = nr_keys - 1; + verify_current_rnext(tst_name, sk, + collection.keys[current_index].client_keyid, + collection.keys[rnext_index].server_keyid); + if (start && test_get_tcp_ao_counters(sk, &end)) + test_error("test_get_tcp_ao_counters()"); + verify_keys(tst_name, sk, false, false); + synchronize_threads(); /* 4: verify => closed */ + close(sk); + if (start) + verify_counters(tst_name, false, false, start, &end); + synchronize_threads(); /* 5: counters */ +} + +static void try_unmatched_keys(int sk, int *rnext_index) +{ + struct test_key *key; + unsigned int i = 0; + int err; + + do { + key = &collection.keys[i]; + if (!key->matches_server) + break; + } while (++i < collection.nr_keys); + if (key->matches_server) + test_error("all keys on client match the server"); + + err = test_add_key_cr(sk, key->password, key->len, wrong_addr, + 0, key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching ip-address for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching ip-address for established sk"); + else + test_error("Failed to add a key"); + + err = test_add_key_cr(sk, key->password, key->len, this_ip_dest, + test_vrf_ifindex, + key->client_keyid, key->server_keyid, + key->maclen, key->alg, 0, 0); + if (!err) { + test_fail("Added a key with non-matching VRF for established sk"); + return; + } + if (err == -EINVAL) + test_ok("Can't add a key with non-matching VRF for established sk"); + else + test_error("Failed to add a key"); + + for (i = 0; i < collection.nr_keys; i++) { + key = &collection.keys[i]; + if (!key->matches_client) + break; + } + if (key->matches_client) + test_error("all keys on server match the client"); + if (test_set_key(sk, -1, key->server_keyid)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + *rnext_index = i; +} + +static int client_non_matching(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index, + const size_t msg_sz, const size_t msg_nr) +{ + unsigned int i; + + if (init_default_key_collection(nr_keys, true)) + test_error("Failed to init the key collection"); + + for (i = 0; i < nr_keys; i++) { + /* key (0, 0) matches */ + collection.keys[i].matches_client = !!((i + 3) % 4); + collection.keys[i].matches_server = !!((i + 2) % 4); + if (kernel_config_has(KCONFIG_NET_VRF)) + collection.keys[i].matches_vrf = !!((i + 1) % 4); + } + + return run_client(tst_name, port, nr_keys, current_index, + rnext_index, NULL, msg_sz, msg_nr); +} + +static void check_current_back(const char *tst_name, unsigned int port, + unsigned int nr_keys, + unsigned int current_index, unsigned int rnext_index, + unsigned int rotate_to_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + if (test_set_key(sk, collection.keys[rotate_to_index].client_keyid, -1)) + test_error("Can't change the current key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("verify failed"); + /* There is a race here: between setting the current_key with + * setsockopt(TCP_AO_INFO) and starting to send some data - there + * might have been a segment received with the desired + * RNext_key set. In turn that would mean that the first outgoing + * segment will have the desired current_key (flipped back). + * Which is what the user/test wants. As it's racy, skip checking + * the counters, yet check what are the resulting current/rnext + * keys on both sides. + */ + collection.keys[rotate_to_index].skip_counters_checks = 1; + + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void roll_over_keys(const char *tst_name, unsigned int port, + unsigned int nr_keys, unsigned int rotations, + unsigned int current_index, unsigned int rnext_index) +{ + struct tcp_ao_counters tmp; + unsigned int i; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + for (i = rnext_index + 1; rotations > 0; i++, rotations--) { + if (i >= collection.nr_keys) + i = 0; + if (test_set_key(sk, -1, collection.keys[i].server_keyid)) + test_error("Can't change the Rnext key"); + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("verify failed"); + close(sk); + test_tcp_ao_counters_free(&tmp); + return; + } + verify_current_rnext(tst_name, sk, -1, + collection.keys[i].server_keyid); + collection.keys[i].used_on_server_tx = 1; + synchronize_threads(); /* verify current/rnext */ + } + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_run(const char *tst_name, unsigned int port, + unsigned int nr_keys, int current_index, int rnext_index) +{ + struct tcp_ao_counters tmp; + int sk; + + sk = start_client(tst_name, port, nr_keys, current_index, rnext_index, + &tmp, msg_len, nr_packets); + if (sk < 0) + return; + end_client(tst_name, sk, nr_keys, current_index, rnext_index, &tmp); +} + +static void try_client_match(const char *tst_name, unsigned int port, + unsigned int nr_keys, + int current_index, int rnext_index) +{ + int sk; + + sk = client_non_matching(tst_name, port, nr_keys, current_index, + rnext_index, msg_len, nr_packets); + if (sk < 0) + return; + try_unmatched_keys(sk, &rnext_index); + end_client(tst_name, sk, nr_keys, current_index, rnext_index, NULL); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_server_run("server: Check current/rnext keys unset before connect()", + port++, quota, 19, 19); + try_server_run("server: Check current/rnext keys set before connect()", + port++, quota, 10, 10); + try_server_run("server: Check current != rnext keys set before connect()", + port++, quota, 5, 10); + try_server_run("server: Check current flapping back on peer's RnextKey request", + port++, quota * 2, 5, 10); + server_rotations("server: Rotate over all different keys", port++, + quota, 20, 0, 0); + try_server_run("server: Check accept() => established key matching", + port++, quota * 2, 0, 0); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void check_established_socket(void) +{ + unsigned int port = test_server_port; + + setup_vrfs(); + try_client_run("client: Check current/rnext keys unset before connect()", + port++, 20, -1, -1); + try_client_run("client: Check current/rnext keys set before connect()", + port++, 20, 10, 10); + try_client_run("client: Check current != rnext keys set before connect()", + port++, 20, 10, 5); + check_current_back("client: Check current flapping back on peer's RnextKey request", + port++, 20, 10, 5, 2); + roll_over_keys("client: Rotate over all different keys", port++, + 20, 20, 0, 0); + try_client_match("client: Check connect() => established key matching", + port++, 20, 0, 0); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, TEST_WRONG_IP, &wrong_addr) != 1) + test_error("Can't convert ip address %s", TEST_WRONG_IP); + check_closed_socket(); + check_listen_socket(); + check_established_socket(); + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/aolib.h b/tools/testing/selftests/net/tcp_ao/lib/aolib.h new file mode 100644 index 000000000000..fbc7f6111815 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/aolib.h @@ -0,0 +1,605 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +/* + * TCP-AO selftest library. Provides helpers to unshare network + * namespaces, create veth, assign ip addresses, set routes, + * manipulate socket options, read network counter and etc. + * Author: Dmitry Safonov <dima@arista.com> + */ +#ifndef _AOLIB_H_ +#define _AOLIB_H_ + +#include <arpa/inet.h> +#include <errno.h> +#include <linux/snmp.h> +#include <linux/tcp.h> +#include <netinet/in.h> +#include <stdarg.h> +#include <stdbool.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <sys/syscall.h> +#include <unistd.h> + +#include "../../../../../include/linux/stringify.h" +#include "../../../../../include/linux/bits.h" + +#ifndef SOL_TCP +/* can't include <netinet/tcp.h> as including <linux/tcp.h> */ +# define SOL_TCP 6 /* TCP level */ +#endif + +/* Working around ksft, see the comment in lib/setup.c */ +extern void __test_msg(const char *buf); +extern void __test_ok(const char *buf); +extern void __test_fail(const char *buf); +extern void __test_xfail(const char *buf); +extern void __test_error(const char *buf); +extern void __test_skip(const char *buf); + +__attribute__((__format__(__printf__, 2, 3))) +static inline void __test_print(void (*fn)(const char *), const char *fmt, ...) +{ +#define TEST_MSG_BUFFER_SIZE 4096 + char buf[TEST_MSG_BUFFER_SIZE]; + va_list arg; + + va_start(arg, fmt); + vsnprintf(buf, sizeof(buf), fmt, arg); + va_end(arg); + fn(buf); +} + +#define test_print(fmt, ...) \ + __test_print(__test_msg, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), \ + __FILE__, __LINE__, ##__VA_ARGS__) + +#define test_ok(fmt, ...) \ + __test_print(__test_ok, fmt "\n", ##__VA_ARGS__) +#define test_skip(fmt, ...) \ + __test_print(__test_skip, fmt "\n", ##__VA_ARGS__) +#define test_xfail(fmt, ...) \ + __test_print(__test_xfail, fmt "\n", ##__VA_ARGS__) + +#define test_fail(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_fail, fmt ": %m\n", ##__VA_ARGS__); \ + else \ + __test_print(__test_fail, fmt "\n", ##__VA_ARGS__); \ + test_failed(); \ +} while (0) + +#define KSFT_FAIL 1 +#define test_error(fmt, ...) \ +do { \ + if (errno) \ + __test_print(__test_error, "%ld[%s:%u] " fmt ": %m\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + else \ + __test_print(__test_error, "%ld[%s:%u] " fmt "\n", \ + syscall(SYS_gettid), __FILE__, __LINE__, \ + ##__VA_ARGS__); \ + exit(KSFT_FAIL); \ +} while (0) + +enum test_fault { + FAULT_TIMEOUT = 1, + FAULT_KEYREJECT, + FAULT_PREINSTALL_AO, + FAULT_PREINSTALL_MD5, + FAULT_POSTINSTALL, + FAULT_BUSY, + FAULT_CURRNEXT, + FAULT_FIXME, +}; +typedef enum test_fault fault_t; + +enum test_needs_kconfig { + KCONFIG_NET_NS = 0, /* required */ + KCONFIG_VETH, /* required */ + KCONFIG_TCP_AO, /* required */ + KCONFIG_TCP_MD5, /* optional, for TCP-MD5 features */ + KCONFIG_NET_VRF, /* optional, for L3/VRF testing */ + __KCONFIG_LAST__ +}; +extern bool kernel_config_has(enum test_needs_kconfig k); +extern const char *tests_skip_reason[__KCONFIG_LAST__]; +static inline bool should_skip_test(const char *tst_name, + enum test_needs_kconfig k) +{ + if (kernel_config_has(k)) + return false; + test_skip("%s: %s", tst_name, tests_skip_reason[k]); + return true; +} + +union tcp_addr { + struct in_addr a4; + struct in6_addr a6; +}; + +typedef void *(*thread_fn)(void *); +extern void test_failed(void); +extern void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2); + +static inline void test_init2(unsigned int ntests, + thread_fn peer1, thread_fn peer2, + int family, unsigned int prefix, + const char *addr1, const char *addr2) +{ + union tcp_addr taddr1, taddr2; + + if (inet_pton(family, addr1, &taddr1) != 1) + test_error("Can't convert ip address %s", addr1); + if (inet_pton(family, addr2, &taddr2) != 1) + test_error("Can't convert ip address %s", addr2); + + __test_init(ntests, family, prefix, taddr1, taddr2, peer1, peer2); +} +extern void test_add_destructor(void (*d)(void)); + +/* To adjust optmem socket limit, approximately estimate a number, + * that is bigger than sizeof(struct tcp_ao_key). + */ +#define KERNEL_TCP_AO_KEY_SZ_ROUND_UP 300 + +extern void test_set_optmem(size_t value); +extern size_t test_get_optmem(void); + +extern const struct sockaddr_in6 addr_any6; +extern const struct sockaddr_in addr_any4; + +#ifdef IPV6_TEST +# define __TEST_CLIENT_IP(n) ("2001:db8:" __stringify(n) "::1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "2001:db8:253::1" +# define TEST_SERVER_IP "2001:db8:254::1" +# define TEST_NETWORK "2001::" +# define TEST_PREFIX 128 +# define TEST_FAMILY AF_INET6 +# define SOCKADDR_ANY addr_any6 +# define sockaddr_af struct sockaddr_in6 +#else +# define __TEST_CLIENT_IP(n) ("10.0." __stringify(n) ".1") +# define TEST_CLIENT_IP __TEST_CLIENT_IP(1) +# define TEST_WRONG_IP "10.0.253.1" +# define TEST_SERVER_IP "10.0.254.1" +# define TEST_NETWORK "10.0.0.0" +# define TEST_PREFIX 32 +# define TEST_FAMILY AF_INET +# define SOCKADDR_ANY addr_any4 +# define sockaddr_af struct sockaddr_in +#endif + +static inline union tcp_addr gen_tcp_addr(union tcp_addr net, size_t n) +{ + union tcp_addr ret = net; + +#ifdef IPV6_TEST + ret.a6.s6_addr32[3] = htonl(n & (BIT(32) - 1)); + ret.a6.s6_addr32[2] = htonl((n >> 32) & (BIT(32) - 1)); +#else + ret.a4.s_addr = htonl(ntohl(net.a4.s_addr) + n); +#endif + + return ret; +} + +static inline void tcp_addr_to_sockaddr_in(void *dest, + const union tcp_addr *src, + unsigned int port) +{ + sockaddr_af *out = dest; + + memset(out, 0, sizeof(*out)); +#ifdef IPV6_TEST + out->sin6_family = AF_INET6; + out->sin6_port = port; + out->sin6_addr = src->a6; +#else + out->sin_family = AF_INET; + out->sin_port = port; + out->sin_addr = src->a4; +#endif +} + +static inline void test_init(unsigned int ntests, + thread_fn peer1, thread_fn peer2) +{ + test_init2(ntests, peer1, peer2, TEST_FAMILY, TEST_PREFIX, + TEST_SERVER_IP, TEST_CLIENT_IP); +} +extern void synchronize_threads(void); +extern void switch_ns(int fd); + +extern __thread union tcp_addr this_ip_addr; +extern __thread union tcp_addr this_ip_dest; +extern int test_family; + +extern void randomize_buffer(void *buf, size_t buflen); +extern int open_netns(void); +extern int unshare_open_netns(void); +extern const char veth_name[]; +extern int add_veth(const char *name, int nsfda, int nsfdb); +extern int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd); +extern int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix); +extern int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst); +extern int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, + uint8_t vrf); +extern int link_set_up(const char *intf); + +extern const unsigned int test_server_port; +extern int test_wait_fd(int sk, time_t sec, bool write); +extern int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout); +extern int __test_listen_socket(int backlog, void *addr, size_t addr_sz); + +static inline int test_listen_socket(const union tcp_addr taddr, + unsigned int port, int backlog) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_listen_socket(backlog, (void *)&addr, sizeof(addr)); +} + +/* + * In order for selftests to work under CONFIG_CRYPTO_FIPS=y, + * the password should be loger than 14 bytes, see hmac_setkey() + */ +#define TEST_TCP_AO_MINKEYLEN 14 +#define DEFAULT_TEST_PASSWORD "In this hour, I do not believe that any darkness will endure." + +#ifndef DEFAULT_TEST_ALGO +#define DEFAULT_TEST_ALGO "cmac(aes128)" +#endif + +#ifdef IPV6_TEST +#define DEFAULT_TEST_PREFIX 128 +#else +#define DEFAULT_TEST_PREFIX 32 +#endif + +/* + * Timeout on syscalls where failure is not expected. + * You may want to rise it if the test machine is very busy. + */ +#ifndef TEST_TIMEOUT_SEC +#define TEST_TIMEOUT_SEC 5 +#endif + +/* + * Timeout on connect() where a failure is expected. + * If set to 0 - kernel will try to retransmit SYN number of times, set in + * /proc/sys/net/ipv4/tcp_syn_retries + * By default set to 1 to make tests pass faster on non-busy machine. + */ +#ifndef TEST_RETRANSMIT_SEC +#define TEST_RETRANSMIT_SEC 1 +#endif + +static inline int _test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port, time_t timeout) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, htons(port)); + return __test_connect_socket(sk, veth_name, + (void *)&addr, sizeof(addr), timeout); +} + +static inline int test_connect_socket(int sk, const union tcp_addr taddr, + unsigned int port) +{ + return _test_connect_socket(sk, taddr, port, TEST_TIMEOUT_SEC); +} + +extern int __test_set_md5(int sk, void *addr, size_t addr_sz, + uint8_t prefix, int vrf, const char *password); +static inline int test_set_md5(int sk, const union tcp_addr in_addr, + uint8_t prefix, int vrf, const char *password) +{ + sockaddr_af addr; + + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + tcp_addr_to_sockaddr_in(&addr, &in_addr, 0); + return __test_set_md5(sk, (void *)&addr, sizeof(addr), + prefix, vrf, password); +} + +extern int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key); + +static inline int test_prepare_key(struct tcp_ao_add *ao, + const char *alg, union tcp_addr taddr, + bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid, uint8_t maclen, + uint8_t keyflags, uint8_t keylen, const char *key) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &taddr, 0); + return test_prepare_key_sockaddr(ao, alg, (void *)&addr, sizeof(addr), + set_current, set_rnext, prefix, vrf, sndid, rcvid, + maclen, keyflags, keylen, key); +} + +static inline int test_prepare_def_key(struct tcp_ao_add *ao, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, uint8_t vrf, + uint8_t sndid, uint8_t rcvid) +{ + if (prefix > DEFAULT_TEST_PREFIX) + prefix = DEFAULT_TEST_PREFIX; + + return test_prepare_key(ao, DEFAULT_TEST_ALGO, in_addr, false, false, + prefix, vrf, sndid, rcvid, 0, keyflags, + strlen(key), key); +} + +extern int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, + uint8_t prefix, uint8_t sndid, uint8_t rcvid); +extern int test_get_ao_info(int sk, struct tcp_ao_info_opt *out); +extern int test_set_ao_info(int sk, struct tcp_ao_info_opt *in); +extern int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b); +extern int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b); + +static inline int test_verify_socket_key(int sk, struct tcp_ao_add *key) +{ + struct tcp_ao_getsockopt key2 = {}; + int err; + + err = test_get_one_ao(sk, &key2, &key->addr, sizeof(key->addr), + key->prefix, key->sndid, key->rcvid); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt(key, &key2); +} + +static inline int test_add_key_vrf(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t vrf, uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + vrf, sndid, rcvid); + if (err) + return err; + + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)); + if (err < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +static inline int test_add_key(int sk, const char *key, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + return test_add_key_vrf(sk, key, 0, in_addr, prefix, 0, sndid, rcvid); +} + +static inline int test_verify_socket_ao(int sk, struct tcp_ao_info_opt *ao) +{ + struct tcp_ao_info_opt ao2 = {}; + int err; + + err = test_get_ao_info(sk, &ao2); + if (err) + return err; + + return test_cmp_getsockopt_setsockopt_ao(ao, &ao2); +} + +static inline int test_set_ao_flags(int sk, bool ao_required, bool accept_icmps) +{ + struct tcp_ao_info_opt ao = {}; + int err; + + err = test_get_ao_info(sk, &ao); + /* Maybe ao_info wasn't allocated yet */ + if (err && err != -ENOENT) + return err; + + ao.ao_required = !!ao_required; + ao.accept_icmps = !!accept_icmps; + err = test_set_ao_info(sk, &ao); + if (err) + return err; + + return test_verify_socket_ao(sk, &ao); +} + +extern ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec); +extern ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec); +extern int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec); + +struct tcp_ao_key_counters { + uint8_t sndid; + uint8_t rcvid; + uint64_t pkt_good; + uint64_t pkt_bad; +}; + +struct tcp_ao_counters { + /* per-netns */ + uint64_t netns_ao_good; + uint64_t netns_ao_bad; + uint64_t netns_ao_key_not_found; + uint64_t netns_ao_required; + uint64_t netns_ao_dropped_icmp; + /* per-socket */ + uint64_t ao_info_pkt_good; + uint64_t ao_info_pkt_bad; + uint64_t ao_info_pkt_key_not_found; + uint64_t ao_info_pkt_ao_required; + uint64_t ao_info_pkt_dropped_icmp; + /* per-key */ + size_t nr_keys; + struct tcp_ao_key_counters *key_cnts; +}; +extern int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out); + +#define TEST_CNT_KEY_GOOD BIT(0) +#define TEST_CNT_KEY_BAD BIT(1) +#define TEST_CNT_SOCK_GOOD BIT(2) +#define TEST_CNT_SOCK_BAD BIT(3) +#define TEST_CNT_SOCK_KEY_NOT_FOUND BIT(4) +#define TEST_CNT_SOCK_AO_REQUIRED BIT(5) +#define TEST_CNT_SOCK_DROPPED_ICMP BIT(6) +#define TEST_CNT_NS_GOOD BIT(7) +#define TEST_CNT_NS_BAD BIT(8) +#define TEST_CNT_NS_KEY_NOT_FOUND BIT(9) +#define TEST_CNT_NS_AO_REQUIRED BIT(10) +#define TEST_CNT_NS_DROPPED_ICMP BIT(11) +typedef uint16_t test_cnt; + +#define TEST_CNT_AO_GOOD (TEST_CNT_SOCK_GOOD | TEST_CNT_NS_GOOD) +#define TEST_CNT_AO_BAD (TEST_CNT_SOCK_BAD | TEST_CNT_NS_BAD) +#define TEST_CNT_AO_KEY_NOT_FOUND (TEST_CNT_SOCK_KEY_NOT_FOUND | \ + TEST_CNT_NS_KEY_NOT_FOUND) +#define TEST_CNT_AO_REQUIRED (TEST_CNT_SOCK_AO_REQUIRED | \ + TEST_CNT_NS_AO_REQUIRED) +#define TEST_CNT_AO_DROPPED_ICMP (TEST_CNT_SOCK_DROPPED_ICMP | \ + TEST_CNT_NS_DROPPED_ICMP) +#define TEST_CNT_GOOD (TEST_CNT_KEY_GOOD | TEST_CNT_AO_GOOD) +#define TEST_CNT_BAD (TEST_CNT_KEY_BAD | TEST_CNT_AO_BAD) + +extern int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected); +extern int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, struct tcp_ao_counters *after, + test_cnt expected, int sndid, int rcvid); +extern void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts); +/* + * Frees buffers allocated in test_get_tcp_ao_counters(). + * The function doesn't expect new keys or keys removed between calls + * to test_get_tcp_ao_counters(). Check key counters manually if they + * may change. + */ +static inline int test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ + int ret; + + ret = __test_tcp_ao_counters_cmp(tst_name, before, after, expected); + if (ret) + goto out; + ret = test_tcp_ao_key_counters_cmp(tst_name, before, after, + expected, -1, -1); +out: + test_tcp_ao_counters_free(before); + test_tcp_ao_counters_free(after); + return ret; +} + +struct netstat; +extern struct netstat *netstat_read(void); +extern void netstat_free(struct netstat *ns); +extern void netstat_print_diff(struct netstat *nsa, struct netstat *nsb); +extern uint64_t netstat_get(struct netstat *ns, + const char *name, bool *not_found); + +static inline uint64_t netstat_get_one(const char *name, bool *not_found) +{ + struct netstat *ns = netstat_read(); + uint64_t ret; + + ret = netstat_get(ns, name, not_found); + + netstat_free(ns); + return ret; +} + +struct tcp_sock_queue { + uint32_t seq; + void *buf; +}; + +struct tcp_sock_state { + struct tcp_info info; + struct tcp_repair_window trw; + struct tcp_sock_queue out; + int outq_len; /* output queue size (not sent + not acked) */ + int outq_nsd_len; /* output queue size (not sent only) */ + struct tcp_sock_queue in; + int inq_len; + int mss; + int timestamp; +}; + +extern void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size); +static inline void test_sock_checkpoint(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr) +{ + __test_sock_checkpoint(sk, state, saddr, sizeof(*saddr)); +} +extern void test_ao_checkpoint(int sk, struct tcp_ao_repair *state); +extern void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size); +static inline void test_sock_restore(int sk, struct tcp_sock_state *state, + sockaddr_af *saddr, + const union tcp_addr daddr, + unsigned int dport) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &daddr, htons(dport)); + __test_sock_restore(sk, veth_name, state, saddr, &addr, sizeof(addr)); +} +extern void test_ao_restore(int sk, struct tcp_ao_repair *state); +extern void test_sock_state_free(struct tcp_sock_state *state); +extern void test_enable_repair(int sk); +extern void test_disable_repair(int sk); +extern void test_kill_sk(int sk); +static inline int test_add_repaired_key(int sk, + const char *key, uint8_t keyflags, + union tcp_addr in_addr, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_add tmp = {}; + int err; + + err = test_prepare_def_key(&tmp, key, keyflags, in_addr, prefix, + 0, sndid, rcvid); + if (err) + return err; + + tmp.set_current = 1; + tmp.set_rnext = 1; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) + return -errno; + + return test_verify_socket_key(sk, &tmp); +} + +#endif /* _AOLIB_H_ */ diff --git a/tools/testing/selftests/net/tcp_ao/lib/kconfig.c b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c new file mode 100644 index 000000000000..f279ffc3843b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/kconfig.c @@ -0,0 +1,148 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check what features does the kernel support (where the selftest is running). + * Somewhat inspired by CRIU kerndat/kdat kernel features detector. + */ +#include <pthread.h> +#include "aolib.h" + +struct kconfig_t { + int _errno; /* the returned error if not supported */ + int (*check_kconfig)(int *error); +}; + +static int has_net_ns(int *err) +{ + if (access("/proc/self/ns/net", F_OK) < 0) { + *err = errno; + if (errno == ENOENT) + return 0; + test_print("Unable to access /proc/self/ns/net: %m"); + return -errno; + } + return *err = errno = 0; +} + +static int has_veth(int *err) +{ + int orig_netns, ns_a, ns_b; + + orig_netns = open_netns(); + ns_a = unshare_open_netns(); + ns_b = unshare_open_netns(); + + *err = add_veth("check_veth", ns_a, ns_b); + + switch_ns(orig_netns); + close(orig_netns); + close(ns_a); + close(ns_b); + return 0; +} + +static int has_tcp_ao(int *err) +{ + struct sockaddr_in addr = { + .sin_family = test_family, + }; + struct tcp_ao_add tmp = {}; + const char *password = DEFAULT_TEST_PASSWORD; + int sk, ret = 0; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + tmp.sndid = 100; + tmp.rcvid = 100; + tmp.keylen = strlen(password); + memcpy(tmp.key, password, strlen(password)); + strcpy(tmp.alg_name, "hmac(sha1)"); + memcpy(&tmp.addr, &addr, sizeof(addr)); + *err = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &tmp, sizeof(tmp)) < 0) { + *err = errno; + if (errno != ENOPROTOOPT) + ret = -errno; + } + close(sk); + return ret; +} + +static int has_tcp_md5(int *err) +{ + union tcp_addr addr_any = {}; + int sk, ret = 0; + + sk = socket(AF_INET, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) { + test_print("socket(): %m"); + return -errno; + } + + /* + * Under CONFIG_CRYPTO_FIPS=y it fails with ENOMEM, rather with + * anything more descriptive. Oh well. + */ + *err = 0; + if (test_set_md5(sk, addr_any, 0, -1, DEFAULT_TEST_PASSWORD)) { + *err = errno; + if (errno != ENOPROTOOPT && errno == ENOMEM) { + test_print("setsockopt(TCP_MD5SIG_EXT): %m"); + ret = -errno; + } + } + close(sk); + return ret; +} + +static int has_vrfs(int *err) +{ + int orig_netns, ns_test, ret = 0; + + orig_netns = open_netns(); + ns_test = unshare_open_netns(); + + *err = add_vrf("ksft-check", 55, 101, ns_test); + if (*err && *err != -EOPNOTSUPP) { + test_print("Failed to add a VRF: %d", *err); + ret = *err; + } + + switch_ns(orig_netns); + close(orig_netns); + close(ns_test); + return ret; +} + +static pthread_mutex_t kconfig_lock = PTHREAD_MUTEX_INITIALIZER; +static struct kconfig_t kconfig[__KCONFIG_LAST__] = { + { -1, has_net_ns }, + { -1, has_veth }, + { -1, has_tcp_ao }, + { -1, has_tcp_md5 }, + { -1, has_vrfs }, +}; + +const char *tests_skip_reason[__KCONFIG_LAST__] = { + "Tests require network namespaces support (CONFIG_NET_NS)", + "Tests require veth support (CONFIG_VETH)", + "Tests require TCP-AO support (CONFIG_TCP_AO)", + "setsockopt(TCP_MD5SIG_EXT) is not supported (CONFIG_TCP_MD5)", + "VRFs are not supported (CONFIG_NET_VRF)", +}; + +bool kernel_config_has(enum test_needs_kconfig k) +{ + bool ret; + + pthread_mutex_lock(&kconfig_lock); + if (kconfig[k]._errno == -1) { + if (kconfig[k].check_kconfig(&kconfig[k]._errno)) + test_error("Failed to initialize kconfig %u", k); + } + ret = kconfig[k]._errno == 0; + pthread_mutex_unlock(&kconfig_lock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/netlink.c b/tools/testing/selftests/net/tcp_ao/lib/netlink.c new file mode 100644 index 000000000000..7f108493a29a --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/netlink.c @@ -0,0 +1,413 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Original from tools/testing/selftests/net/ipsec.c */ +#include <linux/netlink.h> +#include <linux/random.h> +#include <linux/rtnetlink.h> +#include <linux/veth.h> +#include <net/if.h> +#include <stdint.h> +#include <string.h> +#include <sys/socket.h> + +#include "aolib.h" + +#define MAX_PAYLOAD 2048 + +static int netlink_sock(int *sock, uint32_t *seq_nr, int proto) +{ + if (*sock > 0) { + seq_nr++; + return 0; + } + + *sock = socket(AF_NETLINK, SOCK_RAW | SOCK_CLOEXEC, proto); + if (*sock < 0) { + test_print("socket(AF_NETLINK)"); + return -1; + } + + randomize_buffer(seq_nr, sizeof(*seq_nr)); + + return 0; +} + +static int netlink_check_answer(int sock, bool quite) +{ + struct nlmsgerror { + struct nlmsghdr hdr; + int error; + struct nlmsghdr orig_msg; + } answer; + + if (recv(sock, &answer, sizeof(answer), 0) < 0) { + test_print("recv()"); + return -1; + } else if (answer.hdr.nlmsg_type != NLMSG_ERROR) { + test_print("expected NLMSG_ERROR, got %d", + (int)answer.hdr.nlmsg_type); + return -1; + } else if (answer.error) { + if (!quite) { + test_print("NLMSG_ERROR: %d: %s", + answer.error, strerror(-answer.error)); + } + return answer.error; + } + + return 0; +} + +static inline struct rtattr *rtattr_hdr(struct nlmsghdr *nh) +{ + return (struct rtattr *)((char *)(nh) + RTA_ALIGN((nh)->nlmsg_len)); +} + +static int rtattr_pack(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + /* NLMSG_ALIGNTO == RTA_ALIGNTO, nlmsg_len already aligned */ + struct rtattr *attr = rtattr_hdr(nh); + size_t nl_size = RTA_ALIGN(nh->nlmsg_len) + RTA_LENGTH(size); + + if (req_sz < nl_size) { + test_print("req buf is too small: %zu < %zu", req_sz, nl_size); + return -1; + } + nh->nlmsg_len = nl_size; + + attr->rta_len = RTA_LENGTH(size); + attr->rta_type = rta_type; + memcpy(RTA_DATA(attr), payload, size); + + return 0; +} + +static struct rtattr *_rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type, const void *payload, size_t size) +{ + struct rtattr *ret = rtattr_hdr(nh); + + if (rtattr_pack(nh, req_sz, rta_type, payload, size)) + return 0; + + return ret; +} + +static inline struct rtattr *rtattr_begin(struct nlmsghdr *nh, size_t req_sz, + unsigned short rta_type) +{ + return _rtattr_begin(nh, req_sz, rta_type, 0, 0); +} + +static inline void rtattr_end(struct nlmsghdr *nh, struct rtattr *attr) +{ + char *nlmsg_end = (char *)nh + nh->nlmsg_len; + + attr->rta_len = nlmsg_end - (char *)attr; +} + +static int veth_pack_peerb(struct nlmsghdr *nh, size_t req_sz, + const char *peer, int ns) +{ + struct ifinfomsg pi; + struct rtattr *peer_attr; + + memset(&pi, 0, sizeof(pi)); + pi.ifi_family = AF_UNSPEC; + pi.ifi_change = 0xFFFFFFFF; + + peer_attr = _rtattr_begin(nh, req_sz, VETH_INFO_PEER, &pi, sizeof(pi)); + if (!peer_attr) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_IFNAME, peer, strlen(peer))) + return -1; + + if (rtattr_pack(nh, req_sz, IFLA_NET_NS_FD, &ns, sizeof(ns))) + return -1; + + rtattr_end(nh, peer_attr); + + return 0; +} + +static int __add_veth(int sock, uint32_t seq, const char *name, + int ns_a, int ns_b) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char veth_type[] = "veth"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, &ns_a, sizeof(ns_a))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, veth_type, sizeof(veth_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (veth_pack_peerb(&req.nh, sizeof(req), name, ns_b)) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int add_veth(const char *name, int nsfda, int nsfdb) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_veth(route_sock, route_seq++, name, nsfda, nsfdb); + close(route_sock); + return ret; +} + +static int __ip_addr_add(int sock, uint32_t seq, const char *intf, + int family, union tcp_addr addr, uint8_t prefix) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifaddrmsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWADDR; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifa_family = family; + req.info.ifa_prefixlen = prefix; + req.info.ifa_index = if_nametoindex(intf); + req.info.ifa_flags = IFA_F_NODAD; + + if (rtattr_pack(&req.nh, sizeof(req), IFA_LOCAL, &addr, addr_len)) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int ip_addr_add(const char *intf, int family, + union tcp_addr addr, uint8_t prefix) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_addr_add(route_sock, route_seq++, intf, + family, addr, prefix); + + close(route_sock); + return ret; +} + +static int __ip_route_add(int sock, uint32_t seq, const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + struct { + struct nlmsghdr nh; + struct rtmsg rt; + char attrbuf[MAX_PAYLOAD]; + } req; + unsigned int index = if_nametoindex(intf); + size_t addr_len = (family == AF_INET) ? sizeof(struct in_addr) : + sizeof(struct in6_addr); + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.rt)); + req.nh.nlmsg_type = RTM_NEWROUTE; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_CREATE; + req.nh.nlmsg_seq = seq; + req.rt.rtm_family = family; + req.rt.rtm_dst_len = (family == AF_INET) ? 32 : 128; + req.rt.rtm_table = vrf; + req.rt.rtm_protocol = RTPROT_BOOT; + req.rt.rtm_scope = RT_SCOPE_UNIVERSE; + req.rt.rtm_type = RTN_UNICAST; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_DST, &dst, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_PREFSRC, &src, addr_len)) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), RTA_OIF, &index, sizeof(index))) + return -1; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + + return netlink_check_answer(sock, true); +} + +int ip_route_add_vrf(const char *intf, int family, + union tcp_addr src, union tcp_addr dst, uint8_t vrf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __ip_route_add(route_sock, route_seq++, intf, + family, src, dst, vrf); + + close(route_sock); + return ret; +} + +int ip_route_add(const char *intf, int family, + union tcp_addr src, union tcp_addr dst) +{ + return ip_route_add_vrf(intf, family, src, dst, RT_TABLE_MAIN); +} + +static int __link_set_up(int sock, uint32_t seq, const char *intf) +{ + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = NLM_F_REQUEST | NLM_F_ACK; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = if_nametoindex(intf); + req.info.ifi_flags = IFF_UP; + req.info.ifi_change = IFF_UP; + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, false); +} + +int link_set_up(const char *intf) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __link_set_up(route_sock, route_seq++, intf); + + close(route_sock); + return ret; +} + +static int __add_vrf(int sock, uint32_t seq, const char *name, + uint32_t tabid, int ifindex, int nsfd) +{ + uint16_t flags = NLM_F_REQUEST | NLM_F_ACK | NLM_F_EXCL | NLM_F_CREATE; + struct { + struct nlmsghdr nh; + struct ifinfomsg info; + char attrbuf[MAX_PAYLOAD]; + } req; + static const char vrf_type[] = "vrf"; + struct rtattr *link_info, *info_data; + + memset(&req, 0, sizeof(req)); + req.nh.nlmsg_len = NLMSG_LENGTH(sizeof(req.info)); + req.nh.nlmsg_type = RTM_NEWLINK; + req.nh.nlmsg_flags = flags; + req.nh.nlmsg_seq = seq; + req.info.ifi_family = AF_UNSPEC; + req.info.ifi_change = 0xFFFFFFFF; + req.info.ifi_index = ifindex; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_IFNAME, name, strlen(name))) + return -1; + + if (nsfd >= 0) + if (rtattr_pack(&req.nh, sizeof(req), IFLA_NET_NS_FD, + &nsfd, sizeof(nsfd))) + return -1; + + link_info = rtattr_begin(&req.nh, sizeof(req), IFLA_LINKINFO); + if (!link_info) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_INFO_KIND, vrf_type, sizeof(vrf_type))) + return -1; + + info_data = rtattr_begin(&req.nh, sizeof(req), IFLA_INFO_DATA); + if (!info_data) + return -1; + + if (rtattr_pack(&req.nh, sizeof(req), IFLA_VRF_TABLE, + &tabid, sizeof(tabid))) + return -1; + + rtattr_end(&req.nh, info_data); + rtattr_end(&req.nh, link_info); + + if (send(sock, &req, req.nh.nlmsg_len, 0) < 0) { + test_print("send()"); + return -1; + } + return netlink_check_answer(sock, true); +} + +int add_vrf(const char *name, uint32_t tabid, int ifindex, int nsfd) +{ + int route_sock = -1, ret; + uint32_t route_seq; + + if (netlink_sock(&route_sock, &route_seq, NETLINK_ROUTE)) + test_error("Failed to open netlink route socket\n"); + + ret = __add_vrf(route_sock, route_seq++, name, tabid, ifindex, nsfd); + close(route_sock); + return ret; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/proc.c b/tools/testing/selftests/net/tcp_ao/lib/proc.c new file mode 100644 index 000000000000..8b984fa04286 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/proc.c @@ -0,0 +1,273 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <inttypes.h> +#include <pthread.h> +#include <stdio.h> +#include "../../../../../include/linux/compiler.h" +#include "../../../../../include/linux/kernel.h" +#include "aolib.h" + +struct netstat_counter { + uint64_t val; + char *name; +}; + +struct netstat { + char *header_name; + struct netstat *next; + size_t counters_nr; + struct netstat_counter *counters; +}; + +static struct netstat *lookup_type(struct netstat *ns, + const char *type, size_t len) +{ + while (ns != NULL) { + size_t cmp = max(len, strlen(ns->header_name)); + + if (!strncmp(ns->header_name, type, cmp)) + return ns; + ns = ns->next; + } + return NULL; +} + +static struct netstat *lookup_get(struct netstat *ns, + const char *type, const size_t len) +{ + struct netstat *ret; + + ret = lookup_type(ns, type, len); + if (ret != NULL) + return ret; + + ret = malloc(sizeof(struct netstat)); + if (!ret) + test_error("malloc()"); + + ret->header_name = strndup(type, len); + if (ret->header_name == NULL) + test_error("strndup()"); + ret->next = ns; + ret->counters_nr = 0; + ret->counters = NULL; + + return ret; +} + +static struct netstat *lookup_get_column(struct netstat *ns, const char *line) +{ + char *column; + + column = strchr(line, ':'); + if (!column) + test_error("can't parse netstat file"); + + return lookup_get(ns, line, column - line); +} + +static void netstat_read_type(FILE *fnetstat, struct netstat **dest, char *line) +{ + struct netstat *type = lookup_get_column(*dest, line); + const char *pos = line; + size_t i, nr_elems = 0; + char tmp; + + while ((pos = strchr(pos, ' '))) { + nr_elems++; + pos++; + } + + *dest = type; + type->counters = reallocarray(type->counters, + type->counters_nr + nr_elems, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + + pos = strchr(line, ' ') + 1; + + if (fscanf(fnetstat, "%[^ :]", type->header_name) == EOF) + test_error("fscanf(%s)", type->header_name); + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != ':') + test_error("Unexpected netstat format (%c)", tmp); + + for (i = type->counters_nr; i < type->counters_nr + nr_elems; i++) { + struct netstat_counter *nc = &type->counters[i]; + const char *new_pos = strchr(pos, ' '); + const char *fmt = " %" PRIu64; + + if (new_pos == NULL) + new_pos = strchr(pos, '\n'); + + nc->name = strndup(pos, new_pos - pos); + if (nc->name == NULL) + test_error("strndup()"); + + if (unlikely(!strcmp(nc->name, "MaxConn"))) + fmt = " %" PRId64; /* MaxConn is signed, RFC 2012 */ + if (fscanf(fnetstat, fmt, &nc->val) != 1) + test_error("fscanf(%s)", nc->name); + pos = new_pos + 1; + } + type->counters_nr += nr_elems; + + if (fread(&tmp, 1, 1, fnetstat) != 1 || tmp != '\n') + test_error("Unexpected netstat format"); +} + +static const char *snmp6_name = "Snmp6"; +static void snmp6_read(FILE *fnetstat, struct netstat **dest) +{ + struct netstat *type = lookup_get(*dest, snmp6_name, strlen(snmp6_name)); + char *counter_name; + size_t i; + + for (i = type->counters_nr;; i++) { + struct netstat_counter *nc; + uint64_t counter; + + if (fscanf(fnetstat, "%ms", &counter_name) == EOF) + break; + if (fscanf(fnetstat, "%" PRIu64, &counter) == EOF) + test_error("Unexpected snmp6 format"); + type->counters = reallocarray(type->counters, i + 1, + sizeof(struct netstat_counter)); + if (!type->counters) + test_error("reallocarray()"); + nc = &type->counters[i]; + nc->name = counter_name; + nc->val = counter; + } + type->counters_nr = i; + *dest = type; +} + +struct netstat *netstat_read(void) +{ + struct netstat *ret = 0; + size_t line_sz = 0; + char *line = NULL; + FILE *fnetstat; + + /* + * Opening thread-self instead of /proc/net/... as the latter + * points to /proc/self/net/ which instantiates thread-leader's + * net-ns, see: + * commit 155134fef2b6 ("Revert "proc: Point /proc/{mounts,net} at..") + */ + errno = 0; + fnetstat = fopen("/proc/thread-self/net/netstat", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/netstat"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp"); + + while (getline(&line, &line_sz, fnetstat) != -1) + netstat_read_type(fnetstat, &ret, line); + fclose(fnetstat); + + errno = 0; + fnetstat = fopen("/proc/thread-self/net/snmp6", "r"); + if (fnetstat == NULL) + test_error("failed to open /proc/net/snmp6"); + + snmp6_read(fnetstat, &ret); + fclose(fnetstat); + + free(line); + return ret; +} + +void netstat_free(struct netstat *ns) +{ + while (ns != NULL) { + struct netstat *prev = ns; + size_t i; + + free(ns->header_name); + for (i = 0; i < ns->counters_nr; i++) + free(ns->counters[i].name); + free(ns->counters); + ns = ns->next; + free(prev); + } +} + +static inline void +__netstat_print_diff(uint64_t a, struct netstat *nsb, size_t i) +{ + if (unlikely(!strcmp(nsb->header_name, "MaxConn"))) { + test_print("%8s %25s: %" PRId64 " => %" PRId64, + nsb->header_name, nsb->counters[i].name, + a, nsb->counters[i].val); + return; + } + + test_print("%8s %25s: %" PRIu64 " => %" PRIu64, nsb->header_name, + nsb->counters[i].name, a, nsb->counters[i].val); +} + +void netstat_print_diff(struct netstat *nsa, struct netstat *nsb) +{ + size_t i, j; + + while (nsb != NULL) { + if (unlikely(strcmp(nsb->header_name, nsa->header_name))) { + for (i = 0; i < nsb->counters_nr; i++) + __netstat_print_diff(0, nsb, i); + nsb = nsb->next; + continue; + } + + if (nsb->counters_nr < nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + for (j = 0, i = 0; i < nsb->counters_nr; i++) { + if (strcmp(nsb->counters[i].name, nsa->counters[j].name)) { + __netstat_print_diff(0, nsb, i); + continue; + } + + if (nsa->counters[j].val == nsb->counters[i].val) { + j++; + continue; + } + + __netstat_print_diff(nsa->counters[j].val, nsb, i); + j++; + } + if (j != nsa->counters_nr) + test_error("Unexpected: some counters disappeared!"); + + nsb = nsb->next; + nsa = nsa->next; + } +} + +uint64_t netstat_get(struct netstat *ns, const char *name, bool *not_found) +{ + if (not_found) + *not_found = false; + + while (ns != NULL) { + size_t i; + + for (i = 0; i < ns->counters_nr; i++) { + if (!strcmp(name, ns->counters[i].name)) + return ns->counters[i].val; + } + + ns = ns->next; + } + + if (not_found) + *not_found = true; + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/repair.c b/tools/testing/selftests/net/tcp_ao/lib/repair.c new file mode 100644 index 000000000000..9893b3ba69f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/repair.c @@ -0,0 +1,254 @@ +// SPDX-License-Identifier: GPL-2.0 +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <fcntl.h> +#include <linux/sockios.h> +#include <sys/ioctl.h> +#include "aolib.h" + +#ifndef TCPOPT_MAXSEG +# define TCPOPT_MAXSEG 2 +#endif +#ifndef TCPOPT_WINDOW +# define TCPOPT_WINDOW 3 +#endif +#ifndef TCPOPT_SACK_PERMITTED +# define TCPOPT_SACK_PERMITTED 4 +#endif +#ifndef TCPOPT_TIMESTAMP +# define TCPOPT_TIMESTAMP 8 +#endif + +enum { + TCP_ESTABLISHED = 1, + TCP_SYN_SENT, + TCP_SYN_RECV, + TCP_FIN_WAIT1, + TCP_FIN_WAIT2, + TCP_TIME_WAIT, + TCP_CLOSE, + TCP_CLOSE_WAIT, + TCP_LAST_ACK, + TCP_LISTEN, + TCP_CLOSING, /* Now a valid state */ + TCP_NEW_SYN_RECV, + + TCP_MAX_STATES /* Leave at the end! */ +}; + +static void test_sock_checkpoint_queue(int sk, int queue, int qlen, + struct tcp_sock_queue *q) +{ + socklen_t len; + int ret; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + len = sizeof(q->seq); + ret = getsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &q->seq, &len); + if (ret || len != sizeof(q->seq)) + test_error("getsockopt(TCP_QUEUE_SEQ): %d", (int)len); + + if (!qlen) { + q->buf = NULL; + return; + } + + q->buf = malloc(qlen); + if (q->buf == NULL) + test_error("malloc()"); + ret = recv(sk, q->buf, qlen, MSG_PEEK | MSG_DONTWAIT); + if (ret != qlen) + test_error("recv(%d): %d", qlen, ret); +} + +void __test_sock_checkpoint(int sk, struct tcp_sock_state *state, + void *addr, size_t addr_size) +{ + socklen_t len = sizeof(state->info); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_INFO, &state->info, &len); + if (ret || len != sizeof(state->info)) + test_error("getsockopt(TCP_INFO): %d", (int)len); + + len = addr_size; + if (getsockname(sk, addr, &len) || len != addr_size) + test_error("getsockname(): %d", (int)len); + + len = sizeof(state->trw); + ret = getsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, &len); + if (ret || len != sizeof(state->trw)) + test_error("getsockopt(TCP_REPAIR_WINDOW): %d", (int)len); + + if (ioctl(sk, SIOCOUTQ, &state->outq_len)) + test_error("ioctl(SIOCOUTQ)"); + + if (ioctl(sk, SIOCOUTQNSD, &state->outq_nsd_len)) + test_error("ioctl(SIOCOUTQNSD)"); + test_sock_checkpoint_queue(sk, TCP_SEND_QUEUE, state->outq_len, &state->out); + + if (ioctl(sk, SIOCINQ, &state->inq_len)) + test_error("ioctl(SIOCINQ)"); + test_sock_checkpoint_queue(sk, TCP_RECV_QUEUE, state->inq_len, &state->in); + + if (state->info.tcpi_state == TCP_CLOSE) + state->outq_len = state->outq_nsd_len = 0; + + len = sizeof(state->mss); + ret = getsockopt(sk, SOL_TCP, TCP_MAXSEG, &state->mss, &len); + if (ret || len != sizeof(state->mss)) + test_error("getsockopt(TCP_MAXSEG): %d", (int)len); + + len = sizeof(state->timestamp); + ret = getsockopt(sk, SOL_TCP, TCP_TIMESTAMP, &state->timestamp, &len); + if (ret || len != sizeof(state->timestamp)) + test_error("getsockopt(TCP_TIMESTAMP): %d", (int)len); +} + +void test_ao_checkpoint(int sk, struct tcp_ao_repair *state) +{ + socklen_t len = sizeof(*state); + int ret; + + memset(state, 0, sizeof(*state)); + + ret = getsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, &len); + if (ret || len != sizeof(*state)) + test_error("getsockopt(TCP_AO_REPAIR): %d", (int)len); +} + +static void test_sock_restore_seq(int sk, int queue, uint32_t seq) +{ + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + if (setsockopt(sk, SOL_TCP, TCP_QUEUE_SEQ, &seq, sizeof(seq))) + test_error("setsockopt(TCP_QUEUE_SEQ)"); +} + +static void test_sock_restore_queue(int sk, int queue, void *buf, int len) +{ + int chunk = len; + size_t off = 0; + + if (len == 0) + return; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_QUEUE, &queue, sizeof(queue))) + test_error("setsockopt(TCP_REPAIR_QUEUE)"); + + do { + int ret; + + ret = send(sk, buf + off, chunk, 0); + if (ret <= 0) { + if (chunk > 1024) { + chunk >>= 1; + continue; + } + test_error("send()"); + } + off += ret; + len -= ret; + } while (len > 0); +} + +void __test_sock_restore(int sk, const char *device, + struct tcp_sock_state *state, + void *saddr, void *daddr, size_t addr_size) +{ + struct tcp_repair_opt opts[4]; + unsigned int opt_nr = 0; + long flags; + + if (bind(sk, saddr, addr_size)) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + test_sock_restore_seq(sk, TCP_RECV_QUEUE, state->in.seq - state->inq_len); + test_sock_restore_seq(sk, TCP_SEND_QUEUE, state->out.seq - state->outq_len); + + if (device != NULL && setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, + device, strlen(device) + 1)) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + + if (connect(sk, daddr, addr_size)) + test_error("connect()"); + + if (state->info.tcpi_options & TCPI_OPT_SACK) { + opts[opt_nr].opt_code = TCPOPT_SACK_PERMITTED; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_WSCALE) { + opts[opt_nr].opt_code = TCPOPT_WINDOW; + opts[opt_nr].opt_val = state->info.tcpi_snd_wscale + + (state->info.tcpi_rcv_wscale << 16); + opt_nr++; + } + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + opts[opt_nr].opt_code = TCPOPT_TIMESTAMP; + opts[opt_nr].opt_val = 0; + opt_nr++; + } + opts[opt_nr].opt_code = TCPOPT_MAXSEG; + opts[opt_nr].opt_val = state->mss; + opt_nr++; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_OPTIONS, opts, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_REPAIR_OPTIONS)"); + + if (state->info.tcpi_options & TCPI_OPT_TIMESTAMPS) { + if (setsockopt(sk, SOL_TCP, TCP_TIMESTAMP, + &state->timestamp, opt_nr * sizeof(opts[0]))) + test_error("setsockopt(TCP_TIMESTAMP)"); + } + test_sock_restore_queue(sk, TCP_RECV_QUEUE, state->in.buf, state->inq_len); + test_sock_restore_queue(sk, TCP_SEND_QUEUE, state->out.buf, state->outq_len); + if (setsockopt(sk, SOL_TCP, TCP_REPAIR_WINDOW, &state->trw, sizeof(state->trw))) + test_error("setsockopt(TCP_REPAIR_WINDOW)"); +} + +void test_ao_restore(int sk, struct tcp_ao_repair *state) +{ + if (setsockopt(sk, SOL_TCP, TCP_AO_REPAIR, state, sizeof(*state))) + test_error("setsockopt(TCP_AO_REPAIR)"); +} + +void test_sock_state_free(struct tcp_sock_state *state) +{ + free(state->out.buf); + free(state->in.buf); +} + +void test_enable_repair(int sk) +{ + int val = TCP_REPAIR_ON; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_disable_repair(int sk) +{ + int val = TCP_REPAIR_OFF_NO_WP; + + if (setsockopt(sk, SOL_TCP, TCP_REPAIR, &val, sizeof(val))) + test_error("setsockopt(TCP_REPAIR)"); +} + +void test_kill_sk(int sk) +{ + test_enable_repair(sk); + close(sk); +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/setup.c b/tools/testing/selftests/net/tcp_ao/lib/setup.c new file mode 100644 index 000000000000..e408b9243b2c --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/setup.c @@ -0,0 +1,361 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <fcntl.h> +#include <pthread.h> +#include <sched.h> +#include <signal.h> +#include "aolib.h" + +/* + * Can't be included in the header: it defines static variables which + * will be unique to every object. Let's include it only once here. + */ +#include "../../../kselftest.h" + +/* Prevent overriding of one thread's output by another */ +static pthread_mutex_t ksft_print_lock = PTHREAD_MUTEX_INITIALIZER; + +void __test_msg(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_print_msg("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_ok(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_pass("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_fail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_fail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_xfail(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_xfail("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_error(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_error("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} +void __test_skip(const char *buf) +{ + pthread_mutex_lock(&ksft_print_lock); + ksft_test_result_skip("%s", buf); + pthread_mutex_unlock(&ksft_print_lock); +} + +static volatile int failed; +static volatile int skipped; + +void test_failed(void) +{ + failed = 1; +} + +static void test_exit(void) +{ + if (failed) { + ksft_exit_fail(); + } else if (skipped) { + /* ksft_exit_skip() is different from ksft_exit_*() */ + ksft_print_cnts(); + exit(KSFT_SKIP); + } else { + ksft_exit_pass(); + } +} + +struct dlist_t { + void (*destruct)(void); + struct dlist_t *next; +}; +static struct dlist_t *destructors_list; + +void test_add_destructor(void (*d)(void)) +{ + struct dlist_t *p; + + p = malloc(sizeof(struct dlist_t)); + if (p == NULL) + test_error("malloc() failed"); + + p->next = destructors_list; + p->destruct = d; + destructors_list = p; +} + +static void test_destructor(void) __attribute__((destructor)); +static void test_destructor(void) +{ + while (destructors_list) { + struct dlist_t *p = destructors_list->next; + + destructors_list->destruct(); + free(destructors_list); + destructors_list = p; + } + test_exit(); +} + +static void sig_int(int signo) +{ + test_error("Caught SIGINT - exiting"); +} + +int open_netns(void) +{ + const char *netns_path = "/proc/self/ns/net"; + int fd; + + fd = open(netns_path, O_RDONLY); + if (fd < 0) + test_error("open(%s)", netns_path); + return fd; +} + +int unshare_open_netns(void) +{ + if (unshare(CLONE_NEWNET) != 0) + test_error("unshare()"); + + return open_netns(); +} + +void switch_ns(int fd) +{ + if (setns(fd, CLONE_NEWNET)) + test_error("setns()"); +} + +int switch_save_ns(int new_ns) +{ + int ret = open_netns(); + + switch_ns(new_ns); + return ret; +} + +static int nsfd_outside = -1; +static int nsfd_parent = -1; +static int nsfd_child = -1; +const char veth_name[] = "ktst-veth"; + +static void init_namespaces(void) +{ + nsfd_outside = open_netns(); + nsfd_parent = unshare_open_netns(); + nsfd_child = unshare_open_netns(); +} + +static void link_init(const char *veth, int family, uint8_t prefix, + union tcp_addr addr, union tcp_addr dest) +{ + if (link_set_up(veth)) + test_error("Failed to set link up"); + if (ip_addr_add(veth, family, addr, prefix)) + test_error("Failed to add ip address"); + if (ip_route_add(veth, family, addr, dest)) + test_error("Failed to add route"); +} + +static unsigned int nr_threads = 1; + +static pthread_mutex_t sync_lock = PTHREAD_MUTEX_INITIALIZER; +static pthread_cond_t sync_cond = PTHREAD_COND_INITIALIZER; +static volatile unsigned int stage_threads[2]; +static volatile unsigned int stage_nr; + +/* synchronize all threads in the same stage */ +void synchronize_threads(void) +{ + unsigned int q = stage_nr; + + pthread_mutex_lock(&sync_lock); + stage_threads[q]++; + if (stage_threads[q] == nr_threads) { + stage_nr ^= 1; + stage_threads[stage_nr] = 0; + pthread_cond_signal(&sync_cond); + } + while (stage_threads[q] < nr_threads) + pthread_cond_wait(&sync_cond, &sync_lock); + pthread_mutex_unlock(&sync_lock); +} + +__thread union tcp_addr this_ip_addr; +__thread union tcp_addr this_ip_dest; +int test_family; + +struct new_pthread_arg { + thread_fn func; + union tcp_addr my_ip; + union tcp_addr dest_ip; +}; +static void *new_pthread_entry(void *arg) +{ + struct new_pthread_arg *p = arg; + + this_ip_addr = p->my_ip; + this_ip_dest = p->dest_ip; + p->func(NULL); /* shouldn't return */ + exit(KSFT_FAIL); +} + +static void __test_skip_all(const char *msg) +{ + ksft_set_plan(1); + ksft_print_header(); + skipped = 1; + test_skip("%s", msg); + exit(KSFT_SKIP); +} + +void __test_init(unsigned int ntests, int family, unsigned int prefix, + union tcp_addr addr1, union tcp_addr addr2, + thread_fn peer1, thread_fn peer2) +{ + struct sigaction sa = { + .sa_handler = sig_int, + .sa_flags = SA_RESTART, + }; + time_t seed = time(NULL); + + sigemptyset(&sa.sa_mask); + if (sigaction(SIGINT, &sa, NULL)) + test_error("Can't set SIGINT handler"); + + test_family = family; + if (!kernel_config_has(KCONFIG_NET_NS)) + __test_skip_all(tests_skip_reason[KCONFIG_NET_NS]); + if (!kernel_config_has(KCONFIG_VETH)) + __test_skip_all(tests_skip_reason[KCONFIG_VETH]); + if (!kernel_config_has(KCONFIG_TCP_AO)) + __test_skip_all(tests_skip_reason[KCONFIG_TCP_AO]); + + ksft_set_plan(ntests); + test_print("rand seed %u", (unsigned int)seed); + srand(seed); + + + ksft_print_header(); + init_namespaces(); + + if (add_veth(veth_name, nsfd_parent, nsfd_child)) + test_error("Failed to add veth"); + + switch_ns(nsfd_child); + link_init(veth_name, family, prefix, addr2, addr1); + if (peer2) { + struct new_pthread_arg targ; + pthread_t t; + + targ.my_ip = addr2; + targ.dest_ip = addr1; + targ.func = peer2; + nr_threads++; + if (pthread_create(&t, NULL, new_pthread_entry, &targ)) + test_error("Failed to create pthread"); + } + switch_ns(nsfd_parent); + link_init(veth_name, family, prefix, addr1, addr2); + + this_ip_addr = addr1; + this_ip_dest = addr2; + peer1(NULL); + if (failed) + exit(KSFT_FAIL); + else + exit(KSFT_PASS); +} + +/* /proc/sys/net/core/optmem_max artifically limits the amount of memory + * that can be allocated with sock_kmalloc() on each socket in the system. + * It is not virtualized in v6.7, so it has to written outside test + * namespaces. To be nice a test will revert optmem back to the old value. + * Keeping it simple without any file lock, which means the tests that + * need to set/increase optmem value shouldn't run in parallel. + * Also, not re-entrant. + * Since commit f5769faeec36 ("net: Namespace-ify sysctl_optmem_max") + * it is per-namespace, keeping logic for non-virtualized optmem_max + * for v6.7, which supports TCP-AO. + */ +static const char *optmem_file = "/proc/sys/net/core/optmem_max"; +static size_t saved_optmem; +static int optmem_ns = -1; + +static bool is_optmem_namespaced(void) +{ + if (optmem_ns == -1) { + int old_ns = switch_save_ns(nsfd_child); + + optmem_ns = !access(optmem_file, F_OK); + switch_ns(old_ns); + } + return !!optmem_ns; +} + +size_t test_get_optmem(void) +{ + int old_ns = 0; + FILE *foptmem; + size_t ret; + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "r"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fscanf(foptmem, "%zu", &ret) != 1) + test_error("can't read from %s", optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); + return ret; +} + +static void __test_set_optmem(size_t new, size_t *old) +{ + int old_ns = 0; + FILE *foptmem; + + if (old != NULL) + *old = test_get_optmem(); + + if (!is_optmem_namespaced()) + old_ns = switch_save_ns(nsfd_outside); + foptmem = fopen(optmem_file, "w"); + if (!foptmem) + test_error("failed to open %s", optmem_file); + + if (fprintf(foptmem, "%zu", new) <= 0) + test_error("can't write %zu to %s", new, optmem_file); + fclose(foptmem); + if (!is_optmem_namespaced()) + switch_ns(old_ns); +} + +static void test_revert_optmem(void) +{ + if (saved_optmem == 0) + return; + + __test_set_optmem(saved_optmem, NULL); +} + +void test_set_optmem(size_t value) +{ + if (saved_optmem == 0) { + __test_set_optmem(value, &saved_optmem); + test_add_destructor(test_revert_optmem); + } else { + __test_set_optmem(value, NULL); + } +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/sock.c b/tools/testing/selftests/net/tcp_ao/lib/sock.c new file mode 100644 index 000000000000..15aeb0963058 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/sock.c @@ -0,0 +1,596 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <alloca.h> +#include <fcntl.h> +#include <inttypes.h> +#include <string.h> +#include "../../../../../include/linux/kernel.h" +#include "../../../../../include/linux/stringify.h" +#include "aolib.h" + +const unsigned int test_server_port = 7010; +int __test_listen_socket(int backlog, void *addr, size_t addr_sz) +{ + int err, sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + long flags; + + if (sk < 0) + test_error("socket()"); + + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, veth_name, + strlen(veth_name) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE)"); + + if (bind(sk, (struct sockaddr *)addr, addr_sz) < 0) + test_error("bind()"); + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (listen(sk, backlog)) + test_error("listen()"); + + return sk; +} + +int test_wait_fd(int sk, time_t sec, bool write) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set fds, efds; + int ret; + socklen_t slen = sizeof(ret); + + FD_ZERO(&fds); + FD_SET(sk, &fds); + FD_ZERO(&efds); + FD_SET(sk, &efds); + + if (sec) + ptv = &tv; + + errno = 0; + if (write) + ret = select(sk + 1, NULL, &fds, &efds, ptv); + else + ret = select(sk + 1, &fds, NULL, &efds, ptv); + if (ret < 0) + return -errno; + if (ret == 0) { + errno = ETIMEDOUT; + return -ETIMEDOUT; + } + + if (getsockopt(sk, SOL_SOCKET, SO_ERROR, &ret, &slen)) + return -errno; + if (ret) + return -ret; + return 0; +} + +int __test_connect_socket(int sk, const char *device, + void *addr, size_t addr_sz, time_t timeout) +{ + long flags; + int err; + + if (device != NULL) { + err = setsockopt(sk, SOL_SOCKET, SO_BINDTODEVICE, device, + strlen(device) + 1); + if (err < 0) + test_error("setsockopt(SO_BINDTODEVICE, %s)", device); + } + + if (!timeout) { + err = connect(sk, addr, addr_sz); + if (err) { + err = -errno; + goto out; + } + return 0; + } + + flags = fcntl(sk, F_GETFL); + if ((flags < 0) || (fcntl(sk, F_SETFL, flags | O_NONBLOCK) < 0)) + test_error("fcntl()"); + + if (connect(sk, addr, addr_sz) < 0) { + if (errno != EINPROGRESS) { + err = -errno; + goto out; + } + if (timeout < 0) + return sk; + err = test_wait_fd(sk, timeout, 1); + if (err) + goto out; + } + return sk; + +out: + close(sk); + return err; +} + +int __test_set_md5(int sk, void *addr, size_t addr_sz, uint8_t prefix, + int vrf, const char *password) +{ + size_t pwd_len = strlen(password); + struct tcp_md5sig md5sig = {}; + + md5sig.tcpm_keylen = pwd_len; + memcpy(md5sig.tcpm_key, password, pwd_len); + md5sig.tcpm_flags = TCP_MD5SIG_FLAG_PREFIX; + md5sig.tcpm_prefixlen = prefix; + if (vrf >= 0) { + md5sig.tcpm_flags |= TCP_MD5SIG_FLAG_IFINDEX; + md5sig.tcpm_ifindex = (uint8_t)vrf; + } + memcpy(&md5sig.tcpm_addr, addr, addr_sz); + + errno = 0; + return setsockopt(sk, IPPROTO_TCP, TCP_MD5SIG_EXT, + &md5sig, sizeof(md5sig)); +} + + +int test_prepare_key_sockaddr(struct tcp_ao_add *ao, const char *alg, + void *addr, size_t addr_sz, bool set_current, bool set_rnext, + uint8_t prefix, uint8_t vrf, uint8_t sndid, uint8_t rcvid, + uint8_t maclen, uint8_t keyflags, + uint8_t keylen, const char *key) +{ + memset(ao, 0, sizeof(struct tcp_ao_add)); + + ao->set_current = !!set_current; + ao->set_rnext = !!set_rnext; + ao->prefix = prefix; + ao->sndid = sndid; + ao->rcvid = rcvid; + ao->maclen = maclen; + ao->keyflags = keyflags; + ao->keylen = keylen; + ao->ifindex = vrf; + + memcpy(&ao->addr, addr, addr_sz); + + if (strlen(alg) > 64) + return -ENOBUFS; + strncpy(ao->alg_name, alg, 64); + + memcpy(ao->key, key, + (keylen > TCP_AO_MAXKEYLEN) ? TCP_AO_MAXKEYLEN : keylen); + return 0; +} + +static int test_get_ao_keys_nr(int sk) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + tmp.nkeys = 1; + tmp.get_all = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return -errno; + return (int)tmp.nkeys; +} + +int test_get_one_ao(int sk, struct tcp_ao_getsockopt *out, + void *addr, size_t addr_sz, uint8_t prefix, + uint8_t sndid, uint8_t rcvid) +{ + struct tcp_ao_getsockopt tmp = {}; + socklen_t tmp_sz = sizeof(tmp); + int ret; + + memcpy(&tmp.addr, addr, addr_sz); + tmp.prefix = prefix; + tmp.sndid = sndid; + tmp.rcvid = rcvid; + tmp.nkeys = 1; + + ret = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, &tmp, &tmp_sz); + if (ret) + return ret; + if (tmp.nkeys != 1) + return -E2BIG; + *out = tmp; + return 0; +} + +int test_get_ao_info(int sk, struct tcp_ao_info_opt *out) +{ + socklen_t sz = sizeof(*out); + + out->reserved = 0; + out->reserved2 = 0; + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, out, &sz)) + return -errno; + if (sz != sizeof(*out)) + return -EMSGSIZE; + return 0; +} + +int test_set_ao_info(int sk, struct tcp_ao_info_opt *in) +{ + socklen_t sz = sizeof(*in); + + in->reserved = 0; + in->reserved2 = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, in, sz)) + return -errno; + return 0; +} + +int test_cmp_getsockopt_setsockopt(const struct tcp_ao_add *a, + const struct tcp_ao_getsockopt *b) +{ + bool is_kdf_aes_128_cmac = false; + bool is_cmac_aes = false; + + if (!strcmp("cmac(aes128)", a->alg_name)) { + is_kdf_aes_128_cmac = (a->keylen != 16); + is_cmac_aes = true; + } + +#define __cmp_ao(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %u != %u", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + __cmp_ao(sndid); + __cmp_ao(rcvid); + __cmp_ao(prefix); + __cmp_ao(keyflags); + __cmp_ao(ifindex); + if (a->maclen) { + __cmp_ao(maclen); + } else if (b->maclen != 12) { + test_fail("getsockopt(): expected default maclen 12, but it's %u", + b->maclen); + return -1; + } + if (!is_kdf_aes_128_cmac) { + __cmp_ao(keylen); + } else if (b->keylen != 16) { + test_fail("getsockopt(): expected keylen 16 for cmac(aes128), but it's %u", + b->keylen); + return -1; + } +#undef __cmp_ao + if (!is_kdf_aes_128_cmac && memcmp(b->key, a->key, a->keylen)) { + test_fail("getsockopt(): returned key is different `%s' != `%s'", + b->key, a->key); + return -1; + } + if (memcmp(&b->addr, &a->addr, sizeof(b->addr))) { + test_fail("getsockopt(): returned address is different"); + return -1; + } + if (!is_cmac_aes && strcmp(b->alg_name, a->alg_name)) { + test_fail("getsockopt(): returned algorithm %s is different than %s", b->alg_name, a->alg_name); + return -1; + } + if (is_cmac_aes && strcmp(b->alg_name, "cmac(aes)")) { + test_fail("getsockopt(): returned algorithm %s is different than cmac(aes)", b->alg_name); + return -1; + } + /* For a established key rotation test don't add a key with + * set_current = 1, as it's likely to change by peer's request; + * rather use setsockopt(TCP_AO_INFO) + */ + if (a->set_current != b->is_current) { + test_fail("getsockopt(): returned key is not Current_key"); + return -1; + } + if (a->set_rnext != b->is_rnext) { + test_fail("getsockopt(): returned key is not RNext_key"); + return -1; + } + + return 0; +} + +int test_cmp_getsockopt_setsockopt_ao(const struct tcp_ao_info_opt *a, + const struct tcp_ao_info_opt *b) +{ + /* No check for ::current_key, as it may change by the peer */ + if (a->ao_required != b->ao_required) { + test_fail("getsockopt(): returned ao doesn't have ao_required"); + return -1; + } + if (a->accept_icmps != b->accept_icmps) { + test_fail("getsockopt(): returned ao doesn't accept ICMPs"); + return -1; + } + if (a->set_rnext && a->rnext != b->rnext) { + test_fail("getsockopt(): RNext KeyID has changed"); + return -1; + } +#define __cmp_cnt(member) \ +do { \ + if (b->member != a->member) { \ + test_fail("getsockopt(): " __stringify(member) " %llu != %llu", \ + b->member, a->member); \ + return -1; \ + } \ +} while(0) + if (a->set_counters) { + __cmp_cnt(pkt_good); + __cmp_cnt(pkt_bad); + __cmp_cnt(pkt_key_not_found); + __cmp_cnt(pkt_ao_required); + __cmp_cnt(pkt_dropped_icmp); + } +#undef __cmp_cnt + return 0; +} + +int test_get_tcp_ao_counters(int sk, struct tcp_ao_counters *out) +{ + struct tcp_ao_getsockopt *key_dump; + socklen_t key_dump_sz = sizeof(*key_dump); + struct tcp_ao_info_opt info = {}; + bool c1, c2, c3, c4, c5; + struct netstat *ns; + int err, nr_keys; + + memset(out, 0, sizeof(*out)); + + /* per-netns */ + ns = netstat_read(); + out->netns_ao_good = netstat_get(ns, "TCPAOGood", &c1); + out->netns_ao_bad = netstat_get(ns, "TCPAOBad", &c2); + out->netns_ao_key_not_found = netstat_get(ns, "TCPAOKeyNotFound", &c3); + out->netns_ao_required = netstat_get(ns, "TCPAORequired", &c4); + out->netns_ao_dropped_icmp = netstat_get(ns, "TCPAODroppedIcmps", &c5); + netstat_free(ns); + if (c1 || c2 || c3 || c4 || c5) + return -EOPNOTSUPP; + + err = test_get_ao_info(sk, &info); + if (err) + return err; + + /* per-socket */ + out->ao_info_pkt_good = info.pkt_good; + out->ao_info_pkt_bad = info.pkt_bad; + out->ao_info_pkt_key_not_found = info.pkt_key_not_found; + out->ao_info_pkt_ao_required = info.pkt_ao_required; + out->ao_info_pkt_dropped_icmp = info.pkt_dropped_icmp; + + /* per-key */ + nr_keys = test_get_ao_keys_nr(sk); + if (nr_keys < 0) + return nr_keys; + if (nr_keys == 0) + test_error("test_get_ao_keys_nr() == 0"); + out->nr_keys = (size_t)nr_keys; + key_dump = calloc(nr_keys, key_dump_sz); + if (!key_dump) + return -errno; + + key_dump[0].nkeys = nr_keys; + key_dump[0].get_all = 1; + key_dump[0].get_all = 1; + err = getsockopt(sk, IPPROTO_TCP, TCP_AO_GET_KEYS, + key_dump, &key_dump_sz); + if (err) { + free(key_dump); + return -errno; + } + + out->key_cnts = calloc(nr_keys, sizeof(out->key_cnts[0])); + if (!out->key_cnts) { + free(key_dump); + return -errno; + } + + while (nr_keys--) { + out->key_cnts[nr_keys].sndid = key_dump[nr_keys].sndid; + out->key_cnts[nr_keys].rcvid = key_dump[nr_keys].rcvid; + out->key_cnts[nr_keys].pkt_good = key_dump[nr_keys].pkt_good; + out->key_cnts[nr_keys].pkt_bad = key_dump[nr_keys].pkt_bad; + } + free(key_dump); + + return 0; +} + +int __test_tcp_ao_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected) +{ +#define __cmp_ao(cnt, expecting_inc) \ +do { \ + if (before->cnt > after->cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64, \ + tst_name ?: "", before->cnt, after->cnt); \ + return -1; \ + } \ + if ((before->cnt != after->cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64, \ + tst_name ?: "", (expecting_inc) ? "" : "not ", \ + before->cnt, after->cnt); \ + return -1; \ + } \ +} while(0) + + errno = 0; + /* per-netns */ + __cmp_ao(netns_ao_good, !!(expected & TEST_CNT_NS_GOOD)); + __cmp_ao(netns_ao_bad, !!(expected & TEST_CNT_NS_BAD)); + __cmp_ao(netns_ao_key_not_found, + !!(expected & TEST_CNT_NS_KEY_NOT_FOUND)); + __cmp_ao(netns_ao_required, !!(expected & TEST_CNT_NS_AO_REQUIRED)); + __cmp_ao(netns_ao_dropped_icmp, + !!(expected & TEST_CNT_NS_DROPPED_ICMP)); + /* per-socket */ + __cmp_ao(ao_info_pkt_good, !!(expected & TEST_CNT_SOCK_GOOD)); + __cmp_ao(ao_info_pkt_bad, !!(expected & TEST_CNT_SOCK_BAD)); + __cmp_ao(ao_info_pkt_key_not_found, + !!(expected & TEST_CNT_SOCK_KEY_NOT_FOUND)); + __cmp_ao(ao_info_pkt_ao_required, !!(expected & TEST_CNT_SOCK_AO_REQUIRED)); + __cmp_ao(ao_info_pkt_dropped_icmp, + !!(expected & TEST_CNT_SOCK_DROPPED_ICMP)); + return 0; +#undef __cmp_ao +} + +int test_tcp_ao_key_counters_cmp(const char *tst_name, + struct tcp_ao_counters *before, + struct tcp_ao_counters *after, + test_cnt expected, + int sndid, int rcvid) +{ + size_t i; +#define __cmp_ao(i, cnt, expecting_inc) \ +do { \ + if (before->key_cnts[i].cnt > after->key_cnts[i].cnt) { \ + test_fail("%s: Decreased counter " __stringify(cnt) " %" PRIu64 " > %" PRIu64 " for key %u:%u", \ + tst_name ?: "", before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ + if ((before->key_cnts[i].cnt != after->key_cnts[i].cnt) != (expecting_inc)) { \ + test_fail("%s: Counter " __stringify(cnt) " was %sexpected to increase %" PRIu64 " => %" PRIu64 " for key %u:%u", \ + tst_name ?: "", (expecting_inc) ? "" : "not ",\ + before->key_cnts[i].cnt, \ + after->key_cnts[i].cnt, \ + before->key_cnts[i].sndid, \ + before->key_cnts[i].rcvid); \ + return -1; \ + } \ +} while(0) + + if (before->nr_keys != after->nr_keys) { + test_fail("%s: Keys changed on the socket %zu != %zu", + tst_name, before->nr_keys, after->nr_keys); + return -1; + } + + /* per-key */ + i = before->nr_keys; + while (i--) { + if (sndid >= 0 && before->key_cnts[i].sndid != sndid) + continue; + if (rcvid >= 0 && before->key_cnts[i].rcvid != rcvid) + continue; + __cmp_ao(i, pkt_good, !!(expected & TEST_CNT_KEY_GOOD)); + __cmp_ao(i, pkt_bad, !!(expected & TEST_CNT_KEY_BAD)); + } + return 0; +#undef __cmp_ao +} + +void test_tcp_ao_counters_free(struct tcp_ao_counters *cnts) +{ + free(cnts->key_cnts); +} + +#define TEST_BUF_SIZE 4096 +ssize_t test_server_run(int sk, ssize_t quota, time_t timeout_sec) +{ + ssize_t total = 0; + + do { + char buf[TEST_BUF_SIZE]; + ssize_t bytes, sent; + int ret; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + bytes = recv(sk, buf, sizeof(buf), 0); + + if (bytes < 0) + test_error("recv(): %zd", bytes); + if (bytes == 0) + break; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + total += bytes; + } while (!quota || total < quota); + + return total; +} + +ssize_t test_client_loop(int sk, char *buf, size_t buf_sz, + const size_t msg_len, time_t timeout_sec) +{ + char msg[msg_len]; + int nodelay = 1; + size_t i; + + if (setsockopt(sk, IPPROTO_TCP, TCP_NODELAY, &nodelay, sizeof(nodelay))) + test_error("setsockopt(TCP_NODELAY)"); + + for (i = 0; i < buf_sz; i += min(msg_len, buf_sz - i)) { + size_t sent, bytes = min(msg_len, buf_sz - i); + int ret; + + ret = test_wait_fd(sk, timeout_sec, 1); + if (ret) + return ret; + + sent = send(sk, buf + i, bytes, 0); + if (sent == 0) + break; + if (sent != bytes) + test_error("send()"); + + bytes = 0; + do { + ssize_t got; + + ret = test_wait_fd(sk, timeout_sec, 0); + if (ret) + return ret; + + got = recv(sk, msg + bytes, sizeof(msg) - bytes, 0); + if (got <= 0) + return i; + bytes += got; + } while (bytes < sent); + if (bytes > sent) + test_error("recv(): %zd > %zd", bytes, sent); + if (memcmp(buf + i, msg, bytes) != 0) { + test_fail("received message differs"); + return -1; + } + } + return i; +} + +int test_client_verify(int sk, const size_t msg_len, const size_t nr, + time_t timeout_sec) +{ + size_t buf_sz = msg_len * nr; + char *buf = alloca(buf_sz); + ssize_t ret; + + randomize_buffer(buf, buf_sz); + ret = test_client_loop(sk, buf, buf_sz, msg_len, timeout_sec); + if (ret < 0) + return (int)ret; + return ret != buf_sz ? -1 : 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/lib/utils.c b/tools/testing/selftests/net/tcp_ao/lib/utils.c new file mode 100644 index 000000000000..372daca525f5 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/lib/utils.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +#include "aolib.h" +#include <string.h> + +void randomize_buffer(void *buf, size_t buflen) +{ + int *p = (int *)buf; + size_t words = buflen / sizeof(int); + size_t leftover = buflen % sizeof(int); + + if (!buflen) + return; + + while (words--) + *p++ = rand(); + + if (leftover) { + int tmp = rand(); + + memcpy(buf + buflen - leftover, &tmp, leftover); + } +} + +const struct sockaddr_in6 addr_any6 = { + .sin6_family = AF_INET6, +}; + +const struct sockaddr_in addr_any4 = { + .sin_family = AF_INET, +}; diff --git a/tools/testing/selftests/net/tcp_ao/restore.c b/tools/testing/selftests/net/tcp_ao/restore.c new file mode 100644 index 000000000000..8fdc808df325 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/restore.c @@ -0,0 +1,236 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +/* This is over-simplified TCP_REPAIR for TCP_ESTABLISHED sockets + * It tests that TCP-AO enabled connection can be restored. + * For the proper socket repair see: + * https://github.com/checkpoint-restore/criu/blob/criu-dev/soccr/soccr.h + */ +#include <inttypes.h> +#include "aolib.h" + +const size_t nr_packets = 20; +const size_t msg_len = 100; +const size_t quota = nr_packets * msg_len; +#define fault(type) (inj == FAULT_ ## type) + +static void try_server_run(const char *tst_name, unsigned int port, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + int sk, lsk; + time_t timeout; + ssize_t bytes; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + test_fail("%s: server served: %zd", tst_name, bytes); + goto out; + } + + before_cnt = netstat_get_one(cnt_name, NULL); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + bytes = test_server_run(sk, quota, timeout); + if (fault(TIMEOUT)) { + if (bytes > 0) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server couldn't serve", tst_name); + } else { + if (bytes != quota) + test_fail("%s: server served: %zd", tst_name, bytes); + else + test_ok("%s: server alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + + /* + * Before close() as that will send FIN and move the peer in TCP_CLOSE + * and that will prevent reading AO counters from the peer's socket. + */ + synchronize_threads(); /* 3: verified => closed */ +out: + close(sk); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + + try_server_run("TCP-AO migrate to another socket", port++, + 0, TEST_CNT_GOOD); + try_server_run("TCP-AO with wrong send ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive ISN", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong send SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_BAD); + try_server_run("TCP-AO with wrong receive SEQ ext number", port++, + FAULT_TIMEOUT, TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + synchronize_threads(); /* don't race to exit: client exits */ + return NULL; +} + +static void test_get_sk_checkpoint(unsigned int server_port, sockaddr_af *saddr, + struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("pre-migrate verify failed"); + + test_enable_repair(sk); + test_sock_checkpoint(sk, img, saddr); + test_ao_checkpoint(sk, ao_img); + test_kill_sk(sk); +} + +static void test_sk_restore(const char *tst_name, unsigned int server_port, + sockaddr_af *saddr, struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + fault_t inj, test_cnt cnt_expected) +{ + const char *cnt_name = "TCPAOGood"; + struct tcp_ao_counters ao1, ao2; + uint64_t before_cnt, after_cnt; + time_t timeout; + int sk; + + if (fault(TIMEOUT)) + cnt_name = "TCPAOBad"; + + before_cnt = netstat_get_one(cnt_name, NULL); + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, this_ip_dest, server_port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + if (test_client_verify(sk, msg_len, nr_packets, timeout)) { + if (fault(TIMEOUT)) + test_ok("%s: post-migrate connection is broken", tst_name); + else + test_fail("%s: post-migrate connection is working", tst_name); + } else { + if (fault(TIMEOUT)) + test_fail("%s: post-migrate connection still working", tst_name); + else + test_ok("%s: post-migrate connection is alive", tst_name); + } + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_cnt = netstat_get_one(cnt_name, NULL); + + test_tcp_ao_counters_cmp(tst_name, &ao1, &ao2, cnt_expected); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + synchronize_threads(); /* 3: verified => closed */ + close(sk); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + struct tcp_sock_state tcp_img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + test_sk_restore("TCP-AO migrate to another socket", port++, + &saddr, &tcp_img, &ao_img, 0, TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snt_isn += 1; + test_sk_restore("TCP-AO with wrong send ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_isn += 1; + test_sk_restore("TCP-AO with wrong receive ISN", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, TEST_CNT_BAD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.snd_sne += 1; + test_sk_restore("TCP-AO with wrong send SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_BAD | TEST_CNT_GOOD); + + test_get_sk_checkpoint(port, &saddr, &tcp_img, &ao_img); + ao_img.rcv_sne += 1; + test_sk_restore("TCP-AO with wrong receive SEQ ext number", port++, + &saddr, &tcp_img, &ao_img, FAULT_TIMEOUT, + TEST_CNT_NS_GOOD | TEST_CNT_BAD); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(20, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/rst.c b/tools/testing/selftests/net/tcp_ao/rst.c new file mode 100644 index 000000000000..a2fe88d35ac0 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/rst.c @@ -0,0 +1,460 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * The test checks that both active and passive reset have correct TCP-AO + * signature. An "active" reset (abort) here is procured from closing + * listen() socket with non-accepted connections in the queue: + * inet_csk_listen_stop() => inet_child_forget() => + * => tcp_disconnect() => tcp_send_active_reset() + * + * The passive reset is quite hard to get on established TCP connections. + * It could be procured from non-established states, but the synchronization + * part from userspace in order to reliably get RST seems uneasy. + * So, instead it's procured by corrupting SEQ number on TIMED-WAIT state. + * + * It's important to test both passive and active RST as they go through + * different code-paths: + * - tcp_send_active_reset() makes no-data skb, sends it with tcp_transmit_skb() + * - tcp_v*_send_reset() create their reply skbs and send them with + * ip_send_unicast_reply() + * + * In both cases TCP-AO signatures have to be correct, which is verified by + * (1) checking that the TCP-AO connection was reset and (2) TCP-AO counters. + * + * Author: Dmitry Safonov <dima@arista.com> + */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +const size_t quota = 1000; +const size_t packet_sz = 100; +/* + * Backlog == 0 means 1 connection in queue, see: + * commit 64a146513f8f ("[NET]: Revert incorrect accept queue...") + */ +const unsigned int backlog; + +static void netstats_check(struct netstat *before, struct netstat *after, + char *msg) +{ + uint64_t before_cnt, after_cnt; + + before_cnt = netstat_get(before, "TCPAORequired", NULL); + after_cnt = netstat_get(after, "TCPAORequired", NULL); + if (after_cnt > before_cnt) + test_fail("Segments without AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments without AO sign (%s)", msg); + + before_cnt = netstat_get(before, "TCPAOGood", NULL); + after_cnt = netstat_get(after, "TCPAOGood", NULL); + if (after_cnt <= before_cnt) + test_fail("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("Signed AO segments (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + + before_cnt = netstat_get(before, "TCPAOBad", NULL); + after_cnt = netstat_get(after, "TCPAOBad", NULL); + if (after_cnt > before_cnt) + test_fail("Segments with bad AO sign (%s): %" PRIu64 " => %" PRIu64, + msg, before_cnt, after_cnt); + else + test_ok("No segments with bad AO sign (%s)", msg); +} + +/* + * Another way to send RST, but not through tcp_v{4,6}_send_reset() + * is tcp_send_active_reset(), that is not in reply to inbound segment, + * but rather active send. It uses tcp_transmit_skb(), so that should + * work, but as it also sends RST - nice that it can be covered as well. + */ +static void close_forced(int sk) +{ + struct linger sl; + + sl.l_onoff = 1; + sl.l_linger = 0; + if (setsockopt(sk, SOL_SOCKET, SO_LINGER, &sl, sizeof(sl))) + test_error("setsockopt(SO_LINGER)"); + close(sk); +} + +static void test_server_active_rst(unsigned int port) +{ + struct tcp_ao_counters cnt1, cnt2; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, port, backlog); + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_get_tcp_ao_counters(lsk, &cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 1: MKT added */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: connection accept()ed, another queued */ + if (test_get_tcp_ao_counters(lsk, &cnt2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 3: close listen socket */ + close(lsk); + bytes = test_server_run(sk, quota, 0); + if (bytes != quota) + test_error("servered only %zd bytes", bytes); + else + test_ok("servered %zd bytes", bytes); + + synchronize_threads(); /* 4: finishing up */ + close_forced(sk); + + synchronize_threads(); /* 5: closed active sk */ + + synchronize_threads(); /* 6: counters checks */ + if (test_tcp_ao_counters_cmp("active RST server", &cnt1, &cnt2, TEST_CNT_GOOD)) + test_fail("MKT counters (server) have not only good packets"); + else + test_ok("MKT counters are good on server"); +} + +static void test_server_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + int sk, lsk; + ssize_t bytes; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned %zd", bytes); + } + + synchronize_threads(); /* 3: checkpoint the client */ + synchronize_threads(); /* 4: close the server, creating twsk */ + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + close(sk); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_tcp_ao_counters_cmp("passive RST server", &ao1, &ao2, TEST_CNT_GOOD); + + synchronize_threads(); /* 6: server exits */ +} + +static void *server_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_server_active_rst(port++); + test_server_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "server"); + netstat_free(ns_after); + netstat_free(ns_before); + synchronize_threads(); /* exit */ + + synchronize_threads(); /* don't race to exit() - client exits */ + return NULL; +} + +static int test_wait_fds(int sk[], size_t nr, bool is_writable[], + ssize_t wait_for, time_t sec) +{ + struct timeval tv = { .tv_sec = sec }; + struct timeval *ptv = NULL; + fd_set left; + size_t i; + int ret; + + FD_ZERO(&left); + for (i = 0; i < nr; i++) { + FD_SET(sk[i], &left); + if (is_writable) + is_writable[i] = false; + } + + if (sec) + ptv = &tv; + + do { + bool is_empty = true; + fd_set fds, efds; + int nfd = 0; + + FD_ZERO(&fds); + FD_ZERO(&efds); + for (i = 0; i < nr; i++) { + if (!FD_ISSET(sk[i], &left)) + continue; + + if (sk[i] > nfd) + nfd = sk[i]; + + FD_SET(sk[i], &fds); + FD_SET(sk[i], &efds); + is_empty = false; + } + if (is_empty) + return -ENOENT; + + errno = 0; + ret = select(nfd + 1, NULL, &fds, &efds, ptv); + if (ret < 0) + return -errno; + if (!ret) + return -ETIMEDOUT; + for (i = 0; i < nr; i++) { + if (FD_ISSET(sk[i], &fds)) { + if (is_writable) + is_writable[i] = true; + FD_CLR(sk[i], &left); + wait_for--; + continue; + } + if (FD_ISSET(sk[i], &efds)) { + FD_CLR(sk[i], &left); + wait_for--; + } + } + } while (wait_for > 0); + + return 0; +} + +static void test_client_active_rst(unsigned int port) +{ + int i, sk[3], err; + bool is_writable[ARRAY_SIZE(sk)] = {false}; + unsigned int last = ARRAY_SIZE(sk) - 1; + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + sk[i] = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk[i] < 0) + test_error("socket()"); + if (test_add_key(sk[i], DEFAULT_TEST_PASSWORD, + this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + synchronize_threads(); /* 1: MKT added */ + for (i = 0; i < last; i++) { + err = _test_connect_socket(sk[i], this_ip_dest, port, + (i == 0) ? TEST_TIMEOUT_SEC : -1); + if (err < 0) + test_error("failed to connect()"); + } + + synchronize_threads(); /* 2: two connections: one accept()ed, another queued */ + err = test_wait_fds(sk, last, is_writable, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("test_wait_fds(): %d", err); + + /* async connect() with third sk to get into request_sock_queue */ + err = _test_connect_socket(sk[last], this_ip_dest, port, -1); + if (err < 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 3: close listen socket */ + if (test_client_verify(sk[0], packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 4: finishing up */ + + synchronize_threads(); /* 5: closed active sk */ + /* + * Wait for 2 connections: one accepted, another in the accept queue, + * the one in request_sock_queue won't get fully established, so + * doesn't receive an active RST, see inet_csk_listen_stop(). + */ + err = test_wait_fds(sk, last, NULL, last, TEST_TIMEOUT_SEC); + if (err < 0) + test_error("select(): %d", err); + + for (i = 0; i < ARRAY_SIZE(sk); i++) { + socklen_t slen = sizeof(err); + + if (getsockopt(sk[i], SOL_SOCKET, SO_ERROR, &err, &slen)) + test_error("getsockopt()"); + if (is_writable[i] && err != ECONNRESET) { + test_fail("sk[%d] = %d, err = %d, connection wasn't reset", + i, sk[i], err); + } else { + test_ok("sk[%d] = %d%s", i, sk[i], + is_writable[i] ? ", connection was reset" : ""); + } + } + synchronize_threads(); /* 6: counters checks */ +} + +static void test_client_passive_rst(unsigned int port) +{ + struct tcp_ao_counters ao1, ao2; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af saddr; + int sk, err; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, packet_sz, quota / packet_sz, TEST_TIMEOUT_SEC)) + test_fail("Failed to send data on connected socket"); + else + test_ok("Verified established tcp connection"); + + synchronize_threads(); /* 3: checkpoint the client */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_disable_repair(sk); + + synchronize_threads(); /* 4: close the server, creating twsk */ + + /* + * The "corruption" in SEQ has to be small enough to fit into TCP + * window, see tcp_timewait_state_process() for out-of-window + * segments. + */ + img.out.seq += 5; /* 5 is more noticeable in tcpdump than 1 */ + + /* + * FIXME: This is kind-of ugly and dirty, but it works. + * + * At this moment, the server has close'ed(sk). + * The passive RST that is being targeted here is new data after + * half-duplex close, see tcp_timewait_state_process() => TCP_TW_RST + * + * What is needed here is: + * (1) wait for FIN from the server + * (2) make sure that the ACK from the client went out + * (3) make sure that the ACK was received and processed by the server + * + * Otherwise, the data that will be sent from "repaired" socket + * post SEQ corruption may get to the server before it's in + * TCP_FIN_WAIT2. + * + * (1) is easy with select()/poll() + * (2) is possible by polling tcpi_state from TCP_INFO + * (3) is quite complex: as server's socket was already closed, + * probably the way to do it would be tcp-diag. + */ + sleep(TEST_RETRANSMIT_SEC); + + synchronize_threads(); /* 5: restore the socket, send more data */ + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, &img, &saddr, this_ip_dest, port); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, &ao_img); + + if (test_get_tcp_ao_counters(sk, &ao1)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(&img); + + /* + * This is how "passive reset" is acquired in this test from TCP_TW_RST: + * + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [P.], seq 901:1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x10217d6c36a22379086ef3b1], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [F.], seq 1001, ack 1001, win 249, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x104ffc99b98c10a5298cc268], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [.], ack 1002, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0xe496dd4f7f5a8a66873c6f93,nop,nop,sack 1 {1001:1002}], length 0 + * IP 10.0.1.1.59772 > 10.0.254.1.7011: Flags [P.], seq 1006:1106, ack 1001, win 251, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x1b5f3330fb23fbcd0c77d0ca], length 100 + * IP 10.0.254.1.7011 > 10.0.1.1.59772: Flags [R], seq 3215596252, win 0, + * options [tcp-ao keyid 100 rnextkeyid 100 mac 0x0bcfbbf497bce844312304b2], length 0 + */ + err = test_client_verify(sk, packet_sz, quota / packet_sz, 2 * TEST_TIMEOUT_SEC); + /* Make sure that the connection was reset, not timeouted */ + if (err && err == -ECONNRESET) + test_ok("client sock was passively reset post-seq-adjust"); + else if (err) + test_fail("client sock was not reset post-seq-adjust: %d", err); + else + test_fail("client sock is yet connected post-seq-adjust"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* 6: server exits */ + close(sk); + test_tcp_ao_counters_cmp("client passive RST", &ao1, &ao2, TEST_CNT_GOOD); +} + +static void *client_fn(void *arg) +{ + struct netstat *ns_before, *ns_after; + unsigned int port = test_server_port; + + ns_before = netstat_read(); + + test_client_active_rst(port++); + test_client_passive_rst(port++); + + ns_after = netstat_read(); + netstats_check(ns_before, ns_after, "client"); + netstat_free(ns_after); + netstat_free(ns_before); + + synchronize_threads(); /* exit */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(14, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/self-connect.c b/tools/testing/selftests/net/tcp_ao/self-connect.c new file mode 100644 index 000000000000..e154d9e198a9 --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/self-connect.c @@ -0,0 +1,197 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +static union tcp_addr local_addr; + +static void __setup_lo_intf(const char *lo_intf, + const char *addr_str, uint8_t prefix) +{ + if (inet_pton(TEST_FAMILY, addr_str, &local_addr) != 1) + test_error("Can't convert local ip address"); + + if (ip_addr_add(lo_intf, TEST_FAMILY, local_addr, prefix)) + test_error("Failed to add %s ip address", lo_intf); + + if (link_set_up(lo_intf)) + test_error("Failed to bring %s up", lo_intf); +} + +static void setup_lo_intf(const char *lo_intf) +{ +#ifdef IPV6_TEST + __setup_lo_intf(lo_intf, "::1", 128); +#else + __setup_lo_intf(lo_intf, "127.0.0.1", 8); +#endif +} + +static void tcp_self_connect(const char *tst, unsigned int port, + bool different_keyids, bool check_restore) +{ + uint64_t before_challenge_ack, after_challenge_ack; + uint64_t before_syn_challenge, after_syn_challenge; + struct tcp_ao_counters before_ao, after_ao; + uint64_t before_aogood, after_aogood; + struct netstat *ns_before, *ns_after; + const size_t nr_packets = 20; + struct tcp_ao_repair ao_img; + struct tcp_sock_state img; + sockaddr_af addr; + int sk; + + tcp_addr_to_sockaddr_in(&addr, &local_addr, htons(port)); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (different_keyids) { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + + ns_before = netstat_read(); + before_aogood = netstat_get(ns_before, "TCPAOGood", NULL); + before_challenge_ack = netstat_get(ns_before, "TCPChallengeACK", NULL); + before_syn_challenge = netstat_get(ns_before, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &before_ao)) + test_error("test_get_tcp_ao_counters()"); + + if (__test_connect_socket(sk, "lo", (struct sockaddr *)&addr, + sizeof(addr), TEST_TIMEOUT_SEC) < 0) { + ns_after = netstat_read(); + netstat_print_diff(ns_before, ns_after); + test_error("failed to connect()"); + } + + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + after_challenge_ack = netstat_get(ns_after, "TCPChallengeACK", NULL); + after_syn_challenge = netstat_get(ns_after, "TCPSYNChallenge", NULL); + if (test_get_tcp_ao_counters(sk, &after_ao)) + test_error("test_get_tcp_ao_counters()"); + if (!check_restore) { + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + } + netstat_free(ns_after); + + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + close(sk); + return; + } + if (after_challenge_ack <= before_challenge_ack || + after_syn_challenge <= before_syn_challenge) { + /* + * It's also meant to test simultaneous open, so check + * these counters as well. + */ + test_fail("%s: Didn't challenge SYN or ACK: %zu <= %zu OR %zu <= %zu", + tst, after_challenge_ack, before_challenge_ack, + after_syn_challenge, before_syn_challenge); + close(sk); + return; + } + + if (test_tcp_ao_counters_cmp(tst, &before_ao, &after_ao, TEST_CNT_GOOD)) { + close(sk); + return; + } + + if (!check_restore) { + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); + close(sk); + return; + } + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &addr); +#ifdef IPV6_TEST + addr.sin6_port = htons(port + 1); +#else + addr.sin_port = htons(port + 1); +#endif + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + __test_sock_restore(sk, "lo", &img, &addr, &addr, sizeof(addr)); + if (different_keyids) { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 7, 5)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 5, 7)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } else { + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, + local_addr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + test_ao_restore(sk, &ao_img); + test_disable_repair(sk); + test_sock_state_free(&img); + if (test_client_verify(sk, 100, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("%s: tcp connection verify failed", tst); + close(sk); + return; + } + ns_after = netstat_read(); + after_aogood = netstat_get(ns_after, "TCPAOGood", NULL); + /* to debug: netstat_print_diff(ns_before, ns_after); */ + netstat_free(ns_before); + netstat_free(ns_after); + close(sk); + if (after_aogood <= before_aogood) { + test_fail("%s: TCPAOGood counter mismatch: %zu <= %zu", + tst, after_aogood, before_aogood); + return; + } + test_ok("%s: connect TCPAOGood %" PRIu64 " => %" PRIu64, + tst, before_aogood, after_aogood); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + + setup_lo_intf("lo"); + + tcp_self_connect("self-connect(same keyids)", port++, false, false); + tcp_self_connect("self-connect(different keyids)", port++, true, false); + tcp_self_connect("self-connect(restore)", port, false, true); + port += 2; + tcp_self_connect("self-connect(restore, different keyids)", port, true, true); + port += 2; + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(4, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/seq-ext.c b/tools/testing/selftests/net/tcp_ao/seq-ext.c new file mode 100644 index 000000000000..ad4e77d6823e --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/seq-ext.c @@ -0,0 +1,245 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Check that after SEQ number wrap-around: + * 1. SEQ-extension has upper bytes set + * 2. TCP conneciton is alive and no TCPAOBad segments + * In order to test (2), the test doesn't just adjust seq number for a queue + * on a connected socket, but migrates it to another sk+port number, so + * that there won't be any delayed packets that will fail to verify + * with the new SEQ numbers. + */ +#include <inttypes.h> +#include "aolib.h" + +const unsigned int nr_packets = 1000; +const unsigned int msg_len = 1000; +const unsigned int quota = nr_packets * msg_len; +unsigned int client_new_port; + +/* Move them closer to roll-over */ +static void test_adjust_seqs(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, + bool server) +{ + uint32_t new_seq1, new_seq2; + + /* make them roll-over during quota, but on different segments */ + if (server) { + new_seq1 = ((uint32_t)-1) - msg_len; + new_seq2 = ((uint32_t)-1) - (quota - 2 * msg_len); + } else { + new_seq1 = ((uint32_t)-1) - (quota - 2 * msg_len); + new_seq2 = ((uint32_t)-1) - msg_len; + } + + img->in.seq = new_seq1; + img->trw.snd_wl1 = img->in.seq - msg_len; + img->out.seq = new_seq2; + img->trw.rcv_wup = img->in.seq; +} + +static int test_sk_restore(struct tcp_sock_state *img, + struct tcp_ao_repair *ao_img, sockaddr_af *saddr, + const union tcp_addr daddr, unsigned int dport, + struct tcp_ao_counters *cnt) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + test_enable_repair(sk); + test_sock_restore(sk, img, saddr, daddr, dport); + if (test_add_repaired_key(sk, DEFAULT_TEST_PASSWORD, 0, daddr, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + test_ao_restore(sk, ao_img); + + if (test_get_tcp_ao_counters(sk, cnt)) + test_error("test_get_tcp_ao_counters()"); + + test_disable_repair(sk); + test_sock_state_free(img); + return sk; +} + +static void *server_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + ssize_t bytes; + int sk, lsk; + + lsk = test_listen_socket(this_ip_addr, test_server_port, 1); + + if (test_add_key(lsk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + + if (test_wait_fd(lsk, TEST_TIMEOUT_SEC, 0)) + test_error("test_wait_fd()"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) + test_error("accept()"); + + synchronize_threads(); /* 2: accepted => send data */ + close(lsk); + + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + goto out; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, true); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + client_new_port, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + bytes = test_server_run(sk, quota, TEST_TIMEOUT_SEC); + if (bytes != quota) { + if (bytes > 0) + test_fail("server served: %zd", bytes); + else + test_fail("server returned: %zd", bytes); + } else { + test_ok("server alive"); + } + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + test_enable_repair(sk); + test_ao_checkpoint(sk, &ao_img); + if (ao_img.snd_sne && ao_img.rcv_sne) { + test_ok("SEQ extension incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } else { + test_fail("SEQ extension was not incremented: %u/%u", + ao_img.snd_sne, ao_img.rcv_sne); + } + + synchronize_threads(); /* 6: verified => closed */ +out: + close(sk); + return NULL; +} + +static void *client_fn(void *arg) +{ + uint64_t before_good, after_good, after_bad; + struct tcp_ao_counters ao1, ao2; + struct tcp_sock_state img; + struct tcp_ao_repair ao_img; + sockaddr_af saddr; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, -1, 100, 100)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* 1: MKT added => connect() */ + if (test_connect_socket(sk, this_ip_dest, test_server_port) <= 0) + test_error("failed to connect()"); + + synchronize_threads(); /* 2: accepted => send data */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) { + test_fail("pre-migrate verify failed"); + return NULL; + } + + before_good = netstat_get_one("TCPAOGood", NULL); + + synchronize_threads(); /* 3: restore the connection on another port */ + test_enable_repair(sk); + test_sock_checkpoint(sk, &img, &saddr); + test_ao_checkpoint(sk, &ao_img); + test_kill_sk(sk); +#ifdef IPV6_TEST + client_new_port = ntohs(saddr.sin6_port) + 1; + saddr.sin6_port = htons(ntohs(saddr.sin6_port) + 1); +#else + client_new_port = ntohs(saddr.sin_port) + 1; + saddr.sin_port = htons(ntohs(saddr.sin_port) + 1); +#endif + test_adjust_seqs(&img, &ao_img, false); + synchronize_threads(); /* 4: dump finished */ + sk = test_sk_restore(&img, &ao_img, &saddr, this_ip_dest, + test_server_port + 1, &ao1); + + synchronize_threads(); /* 5: verify counters during SEQ-number rollover */ + if (test_client_verify(sk, msg_len, nr_packets, TEST_TIMEOUT_SEC)) + test_fail("post-migrate verify failed"); + else + test_ok("post-migrate connection alive"); + + if (test_get_tcp_ao_counters(sk, &ao2)) + test_error("test_get_tcp_ao_counters()"); + after_good = netstat_get_one("TCPAOGood", NULL); + + test_tcp_ao_counters_cmp(NULL, &ao1, &ao2, TEST_CNT_GOOD); + + if (after_good <= before_good) { + test_fail("TCPAOGood counter did not increase: %zu <= %zu", + after_good, before_good); + } else { + test_ok("TCPAOGood counter increased %zu => %zu", + before_good, after_good); + } + after_bad = netstat_get_one("TCPAOBad", NULL); + if (after_bad) + test_fail("TCPAOBad counter is non-zero: %zu", after_bad); + else + test_ok("TCPAOBad counter didn't increase"); + + synchronize_threads(); /* 6: verified => closed */ + close(sk); + + synchronize_threads(); /* don't race to exit: let server exit() */ + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(7, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c new file mode 100644 index 000000000000..517930f9721b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/setsockopt-closed.c @@ -0,0 +1,835 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "../../../../include/linux/kernel.h" +#include "aolib.h" + +static union tcp_addr tcp_md5_client; + +static int test_port = 7788; +static void make_listen(int sk) +{ + sockaddr_af addr; + + tcp_addr_to_sockaddr_in(&addr, &this_ip_addr, htons(test_port++)); + if (bind(sk, (struct sockaddr *)&addr, sizeof(addr)) < 0) + test_error("bind()"); + if (listen(sk, 1)) + test_error("listen()"); +} + +static void test_vefify_ao_info(int sk, struct tcp_ao_info_opt *info, + const char *tst) +{ + struct tcp_ao_info_opt tmp = {}; + socklen_t len = sizeof(tmp); + + if (getsockopt(sk, IPPROTO_TCP, TCP_AO_INFO, &tmp, &len)) + test_error("getsockopt(TCP_AO_INFO) failed"); + +#define __cmp_ao(member) \ +do { \ + if (info->member != tmp.member) { \ + test_fail("%s: getsockopt(): " __stringify(member) " %zu != %zu", \ + tst, (size_t)info->member, (size_t)tmp.member); \ + return; \ + } \ +} while(0) + if (info->set_current) + __cmp_ao(current_key); + if (info->set_rnext) + __cmp_ao(rnext); + if (info->set_counters) { + __cmp_ao(pkt_good); + __cmp_ao(pkt_bad); + __cmp_ao(pkt_key_not_found); + __cmp_ao(pkt_ao_required); + __cmp_ao(pkt_dropped_icmp); + } + __cmp_ao(ao_required); + __cmp_ao(accept_icmps); + + test_ok("AO info get: %s", tst); +#undef __cmp_ao +} + +static void __setsockopt_checked(int sk, int optname, bool get, + void *optval, socklen_t *len, + int err, const char *tst, const char *tst2) +{ + int ret; + + if (!tst) + tst = ""; + if (!tst2) + tst2 = ""; + + errno = 0; + if (get) + ret = getsockopt(sk, IPPROTO_TCP, optname, optval, len); + else + ret = setsockopt(sk, IPPROTO_TCP, optname, optval, *len); + if (ret == -1) { + if (errno == err) + test_ok("%s%s", tst ?: "", tst2 ?: ""); + else + test_fail("%s%s: %setsockopt() failed", + tst, tst2, get ? "g" : "s"); + close(sk); + return; + } + + if (err) { + test_fail("%s%s: %setsockopt() was expected to fail with %d", + tst, tst2, get ? "g" : "s", err); + } else { + test_ok("%s%s", tst ?: "", tst2 ?: ""); + if (optname == TCP_AO_ADD_KEY) { + test_verify_socket_key(sk, optval); + } else if (optname == TCP_AO_INFO && !get) { + test_vefify_ao_info(sk, optval, tst2); + } else if (optname == TCP_AO_GET_KEYS) { + if (*len != sizeof(struct tcp_ao_getsockopt)) + test_fail("%s%s: get keys returned wrong tcp_ao_getsockopt size", + tst, tst2); + } + } + close(sk); +} + +static void setsockopt_checked(int sk, int optname, void *optval, + int err, const char *tst) +{ + const char *cmd = NULL; + socklen_t len; + + switch (optname) { + case TCP_AO_ADD_KEY: + cmd = "key add: "; + len = sizeof(struct tcp_ao_add); + break; + case TCP_AO_DEL_KEY: + cmd = "key del: "; + len = sizeof(struct tcp_ao_del); + break; + case TCP_AO_INFO: + cmd = "AO info set: "; + len = sizeof(struct tcp_ao_info_opt); + break; + default: + break; + } + + __setsockopt_checked(sk, optname, false, optval, &len, err, cmd, tst); +} + +static int prepare_defs(int cmd, void *optval) +{ + int sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + + if (sk < 0) + test_error("socket()"); + + switch (cmd) { + case TCP_AO_ADD_KEY: { + struct tcp_ao_add *add = optval; + + if (test_prepare_def_key(add, DEFAULT_TEST_PASSWORD, 0, this_ip_dest, + -1, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + break; + } + case TCP_AO_DEL_KEY: { + struct tcp_ao_del *del = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(del, 0, sizeof(struct tcp_ao_del)); + del->sndid = 100; + del->rcvid = 100; + del->prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&del->addr, &this_ip_dest, 0); + break; + } + case TCP_AO_INFO: { + struct tcp_ao_info_opt *info = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(info, 0, sizeof(struct tcp_ao_info_opt)); + break; + } + case TCP_AO_GET_KEYS: { + struct tcp_ao_getsockopt *get = optval; + + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, + DEFAULT_TEST_PREFIX, 100, 100)) + test_error("add default key"); + memset(get, 0, sizeof(struct tcp_ao_getsockopt)); + get->nkeys = 1; + get->get_all = 1; + break; + } + default: + test_error("unknown cmd"); + } + + return sk; +} + +static void test_extend(int cmd, bool get, const char *tst, socklen_t under_size) +{ + struct { + union { + struct tcp_ao_add add; + struct tcp_ao_del del; + struct tcp_ao_getsockopt get; + struct tcp_ao_info_opt info; + }; + char *extend[100]; + } tmp_opt; + socklen_t extended_size = sizeof(tmp_opt); + int sk; + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &under_size, + EINVAL, tst, ": minimum size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, &extended_size, + 0, tst, ": extended size"); + + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, NULL, &extended_size, + EFAULT, tst, ": null optval"); + + if (get) { + memset(&tmp_opt, 0, sizeof(tmp_opt)); + sk = prepare_defs(cmd, &tmp_opt); + __setsockopt_checked(sk, cmd, get, &tmp_opt, NULL, + EFAULT, tst, ": null optlen"); + } +} + +static void extend_tests(void) +{ + test_extend(TCP_AO_ADD_KEY, false, "AO add", + offsetof(struct tcp_ao_add, key)); + test_extend(TCP_AO_DEL_KEY, false, "AO del", + offsetof(struct tcp_ao_del, keyflags)); + test_extend(TCP_AO_INFO, false, "AO set info", + offsetof(struct tcp_ao_info_opt, pkt_dropped_icmp)); + test_extend(TCP_AO_INFO, true, "AO get info", -1); + test_extend(TCP_AO_GET_KEYS, true, "AO get keys", -1); +} + +static void test_optmem_limit(void) +{ + size_t i, keys_limit, current_optmem = test_get_optmem(); + struct tcp_ao_add ao; + union tcp_addr net = {}; + int sk; + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &net) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + keys_limit = current_optmem / KERNEL_TCP_AO_KEY_SZ_ROUND_UP; + for (i = 0;; i++) { + union tcp_addr key_peer; + int err; + + key_peer = gen_tcp_addr(net, i + 1); + tcp_addr_to_sockaddr_in(&ao.addr, &key_peer, 0); + err = setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, + &ao, sizeof(ao)); + if (!err) { + /* + * TCP_AO_ADD_KEY should be the same order as the real + * sizeof(struct tcp_ao_key) in kernel. + */ + if (i <= keys_limit * 10) + continue; + test_fail("optmem limit test failed: added %zu key", i); + break; + } + if (i < keys_limit) { + test_fail("optmem limit test failed: couldn't add %zu key", i); + break; + } + test_ok("optmem limit was hit on adding %zu key", i); + break; + } + close(sk); +} + +static void test_einval_add_key(void) +{ + struct tcp_ao_add ao; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keylen = TCP_AO_MAXKEYLEN + 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big keylen"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "using reserved2 padding"); + + /* tcp_ao_verify_ipv{4,6}() checks */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.addr.ss_family = AF_UNIX; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "wrong address family"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + tcp_addr_to_sockaddr_in(&ao.addr, &this_ip_dest, 1234); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "port (unsupported)"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 0; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 32; + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 129; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.prefix = 2; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + make_listen(sk); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "add current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.set_current = 1; + ao.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, 0, "add key and set as current+rnext"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.keyflags |= TCP_AO_KEYF_IFINDEX; + ao.ifindex = 42; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EINVAL, "non-existent VRF"); + /* + * tcp_md5_do_lookup{,_any_l3index}() are checked in unsigned-md5 + * see client_vrf_tests(). + */ + + test_optmem_limit(); + + /* tcp_ao_parse_crypto() */ + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao.maclen = 100; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EMSGSIZE, "maclen bigger than TCP hdr"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + strcpy(ao.alg_name, "imaginary hash algo"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, ENOENT, "bad algo"); +} + +static void test_einval_del_key(void) +{ + struct tcp_ao_del del; + int sk; + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + make_listen(sk); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "del and set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags = (uint8_t)(-1); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "bad key flags"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.keyflags |= TCP_AO_KEYF_IFINDEX; + del.ifindex = 42; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existent VRF"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + if (test_add_key(sk, DEFAULT_TEST_PASSWORD, this_ip_dest, DEFAULT_TEST_PREFIX, 0, 0)) + test_error("add key"); + del.set_current = 1; + del.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.set_current = 1; + del.current_key = 100; + del.set_rnext = 1; + del.rnext = 100; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "set as current+rnext key to be removed"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.del_async = 1; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, EINVAL, "async on non-listen"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.sndid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing sndid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + del.rcvid = 101; + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "non-existing rcvid"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + tcp_addr_to_sockaddr_in(&del.addr, &this_ip_addr, 0); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, ENOENT, "incorrect addr"); + + sk = prepare_defs(TCP_AO_DEL_KEY, &del); + setsockopt_checked(sk, TCP_AO_DEL_KEY, &del, 0, "correct key delete"); +} + +static void test_einval_ao_info(void) +{ + struct tcp_ao_info_opt info; + int sk; + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + make_listen(sk); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "set current+rnext key on a listen socket"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.reserved2 = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, EINVAL, "using reserved2 padding"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.accept_icmps = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "accept_icmps"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "ao required"); + + if (!should_skip_test("ao required with MD5 key", KCONFIG_TCP_MD5)) { + sk = prepare_defs(TCP_AO_INFO, &info); + info.ao_required = 1; + if (test_set_md5(sk, tcp_md5_client, TEST_PREFIX, -1, + "long long secret")) { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + close(sk); + } else { + setsockopt_checked(sk, TCP_AO_INFO, &info, EKEYREJECTED, + "ao required with MD5 key"); + } + } + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + setsockopt_checked(sk, TCP_AO_INFO, &info, ENOENT, "set non-existing current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.current_key = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_rnext = 1; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_current = 1; + info.set_rnext = 1; + info.current_key = 100; + info.rnext = 100; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set current+rnext key"); + + sk = prepare_defs(TCP_AO_INFO, &info); + info.set_counters = 1; + info.pkt_good = 321; + info.pkt_bad = 888; + info.pkt_key_not_found = 654; + info.pkt_ao_required = 987654; + info.pkt_dropped_icmp = 10000; + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "set counters"); + + sk = prepare_defs(TCP_AO_INFO, &info); + setsockopt_checked(sk, TCP_AO_INFO, &info, 0, "no-op"); +} + +static void getsockopt_checked(int sk, struct tcp_ao_getsockopt *optval, + int err, const char *tst) +{ + socklen_t len = sizeof(struct tcp_ao_getsockopt); + + __setsockopt_checked(sk, TCP_AO_GET_KEYS, true, optval, &len, err, + "get keys: ", tst); +} + +static void test_einval_get_keys(void) +{ + struct tcp_ao_getsockopt out; + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + getsockopt_checked(sk, &out, ENOENT, "no ao_info"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + getsockopt_checked(sk, &out, 0, "proper tcp_ao_get_mkts()"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_good = 643; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_good counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.pkt_bad = 94; + getsockopt_checked(sk, &out, EINVAL, "set out-only pkt_bad counter"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.keyflags = (uint8_t)(-1); + getsockopt_checked(sk, &out, EINVAL, "bad keyflags"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.ifindex = 42; + getsockopt_checked(sk, &out, EINVAL, + "ifindex without TCP_AO_KEYF_IFNINDEX"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.reserved = 1; + getsockopt_checked(sk, &out, EINVAL, "using reserved field"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "no prefix, addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 0; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, 0, "no prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 32; + memcpy(&out.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + getsockopt_checked(sk, &out, EINVAL, "prefix, any addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 129; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too big prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = 2; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "too short prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.prefix = DEFAULT_TEST_PREFIX; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, 0, "prefix + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "get_all + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "get_all + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "current + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "current + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "current + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.prefix = DEFAULT_TEST_PREFIX; + getsockopt_checked(sk, &out, EINVAL, "rnext + prefix"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + tcp_addr_to_sockaddr_in(&out.addr, &this_ip_dest, 0); + getsockopt_checked(sk, &out, EINVAL, "rnext + addr"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.sndid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + sndid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_rnext = 1; + out.rcvid = 1; + getsockopt_checked(sk, &out, EINVAL, "rnext + rcvid"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_current = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + current"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, EINVAL, "get_all + rnext"); + + sk = prepare_defs(TCP_AO_GET_KEYS, &out); + out.get_all = 0; + out.is_current = 1; + out.is_rnext = 1; + getsockopt_checked(sk, &out, 0, "current + rnext"); +} + +static void einval_tests(void) +{ + test_einval_add_key(); + test_einval_del_key(); + test_einval_ao_info(); + test_einval_get_keys(); +} + +static void duplicate_tests(void) +{ + union tcp_addr network_dup; + struct tcp_ao_add ao, ao2; + int sk; + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + ao2 = ao; + memcpy(&ao2.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao2.prefix = 0; + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao2, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: any addr key on the socket"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + memcpy(&ao.addr, &SOCKADDR_ANY, sizeof(SOCKADDR_ANY)); + ao.prefix = 0; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr key"); + + if (inet_pton(TEST_FAMILY, TEST_NETWORK, &network_dup) != 1) + test_error("Can't convert ip address %s", TEST_NETWORK); + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + if (test_prepare_def_key(&ao, "password", 0, network_dup, + 16, 0, 100, 100)) + test_error("prepare default tcp_ao_add"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: add any addr for the same subnet"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: full copy of a key"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.rcvid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: RecvID differs"); + + sk = prepare_defs(TCP_AO_ADD_KEY, &ao); + if (setsockopt(sk, IPPROTO_TCP, TCP_AO_ADD_KEY, &ao, sizeof(ao))) + test_error("setsockopt()"); + ao.sndid = 101; + setsockopt_checked(sk, TCP_AO_ADD_KEY, &ao, EEXIST, "duplicate: SendID differs"); +} + +static void *client_fn(void *arg) +{ + if (inet_pton(TEST_FAMILY, __TEST_CLIENT_IP(2), &tcp_md5_client) != 1) + test_error("Can't convert ip address"); + extend_tests(); + einval_tests(); + duplicate_tests(); + /* + * TODO: check getsockopt(TCP_AO_GET_KEYS) with different filters + * returning proper nr & keys; + */ + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(120, client_fn, NULL); + return 0; +} diff --git a/tools/testing/selftests/net/tcp_ao/settings b/tools/testing/selftests/net/tcp_ao/settings new file mode 100644 index 000000000000..6091b45d226b --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/settings @@ -0,0 +1 @@ +timeout=120 diff --git a/tools/testing/selftests/net/tcp_ao/unsigned-md5.c b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c new file mode 100644 index 000000000000..6b59a652159f --- /dev/null +++ b/tools/testing/selftests/net/tcp_ao/unsigned-md5.c @@ -0,0 +1,741 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Author: Dmitry Safonov <dima@arista.com> */ +#include <inttypes.h> +#include "aolib.h" + +#define fault(type) (inj == FAULT_ ## type) +static const char *md5_password = "Some evil genius, enemy to mankind, must have been the first contriver."; +static const char *ao_password = DEFAULT_TEST_PASSWORD; + +static union tcp_addr client2; +static union tcp_addr client3; + +static const int test_vrf_ifindex = 200; +static const uint8_t test_vrf_tabid = 42; +static void setup_vrfs(void) +{ + int err; + + if (!kernel_config_has(KCONFIG_NET_VRF)) + return; + + err = add_vrf("ksft-vrf", test_vrf_tabid, test_vrf_ifindex, -1); + if (err) + test_error("Failed to add a VRF: %d", err); + + err = link_set_up("ksft-vrf"); + if (err) + test_error("Failed to bring up a VRF"); + + err = ip_route_add_vrf(veth_name, TEST_FAMILY, + this_ip_addr, this_ip_dest, test_vrf_tabid); + if (err) + test_error("Failed to add a route to VRF: %d", err); +} + +static void try_accept(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + bool set_ao_required, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + const char *cnt_name, test_cnt cnt_expected, + int needs_tcp_md5, fault_t inj) +{ + struct tcp_ao_counters ao_cnt1, ao_cnt2; + uint64_t before_cnt = 0, after_cnt = 0; /* silence GCC */ + int lsk, err, sk = 0; + time_t timeout; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + lsk = test_listen_socket(this_ip_addr, port, 1); + + if (md5_addr && test_set_md5(lsk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(lsk, ao_password, + *ao_addr, ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + if (set_ao_required && test_set_ao_flags(lsk, true, false)) + test_error("setsockopt(TCP_AO_INFO)"); + + if (cnt_name) + before_cnt = netstat_get_one(cnt_name, NULL); + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt1)) + test_error("test_get_tcp_ao_counters()"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + err = test_wait_fd(lsk, timeout, 0); + if (err == -ETIMEDOUT) { + if (!fault(TIMEOUT)) + test_fail("timed out for accept()"); + } else if (err < 0) { + test_error("test_wait_fd()"); + } else { + if (fault(TIMEOUT)) + test_fail("ready to accept"); + + sk = accept(lsk, NULL, NULL); + if (sk < 0) { + test_error("accept()"); + } else { + if (fault(TIMEOUT)) + test_fail("%s: accepted", tst_name); + } + } + + if (ao_addr && test_get_tcp_ao_counters(lsk, &ao_cnt2)) + test_error("test_get_tcp_ao_counters()"); + close(lsk); + + if (!cnt_name) { + test_ok("%s: no counter checks", tst_name); + goto out; + } + + after_cnt = netstat_get_one(cnt_name, NULL); + + if (after_cnt <= before_cnt) { + test_fail("%s: %s counter did not increase: %zu <= %zu", + tst_name, cnt_name, after_cnt, before_cnt); + } else { + test_ok("%s: counter %s increased %zu => %zu", + tst_name, cnt_name, before_cnt, after_cnt); + } + if (ao_addr) + test_tcp_ao_counters_cmp(tst_name, &ao_cnt1, &ao_cnt2, cnt_expected); + +out: + synchronize_threads(); /* test_kill_sk() */ + if (sk > 0) + test_kill_sk(sk); +} + +static void server_add_routes(void) +{ + int family = TEST_FAMILY; + + synchronize_threads(); /* client_add_ips() */ + + if (ip_route_add(veth_name, family, this_ip_addr, client2)) + test_error("Failed to add route"); + if (ip_route_add(veth_name, family, this_ip_addr, client3)) + test_error("Failed to add route"); +} + +static void server_add_fail_tests(unsigned int *port) +{ + union tcp_addr addr_any = {}; + + try_accept("TCP-AO established: add TCP-MD5 key", (*port)++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, + 1, 0); + try_accept("TCP-MD5 established: add TCP-AO key", (*port)++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("non-signed established: add TCP-AO key", (*port)++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); +} + +static void server_vrf_tests(unsigned int *port) +{ + setup_vrfs(); +} + +static void *server_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + server_add_routes(); + + try_accept("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAOGood", + TEST_CNT_GOOD, 0, 0); + try_accept("AO server (INADDR_ANY): MD5 client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("AO server (INADDR_ANY): no sign client", port++, NULL, 0, + &addr_any, 0, 0, 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + try_accept("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 0, 0); + try_accept("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + &this_ip_dest, TEST_PREFIX, true, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 0, FAULT_TIMEOUT); + + try_accept("MD5 server (INADDR_ANY): AO client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + 0, 1, FAULT_TIMEOUT); + try_accept("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 0, 0, 0, 0, NULL, 0, 1, 0); + try_accept("MD5 server (INADDR_ANY): no sign client", port++, &addr_any, + 0, NULL, 0, 0, 0, 0, 0, "TCPMD5NotFound", + 0, 1, FAULT_TIMEOUT); + + try_accept("no sign server: AO client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPAOKeyNotFound", + TEST_CNT_AO_KEY_NOT_FOUND, 0, FAULT_TIMEOUT); + try_accept("no sign server: MD5 client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "TCPMD5Unexpected", + 0, 1, FAULT_TIMEOUT); + try_accept("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 0, 0, 0, 0, "CurrEstab", 0, 0, 0); + + try_accept("AO+MD5 server: AO client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOGood", TEST_CNT_GOOD, 1, 0); + try_accept("AO+MD5 server: AO client (misconfig, matching MD5)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: AO client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAOKeyNotFound", TEST_CNT_AO_KEY_NOT_FOUND, + 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, 0); + try_accept("AO+MD5 server: MD5 client (misconfig, matching AO)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: MD5 client (misconfig, non-matching)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5Unexpected", 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (unmatched)", port++, + &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "CurrEstab", 0, 1, 0); + try_accept("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPAORequired", + TEST_CNT_AO_REQUIRED, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, "TCPMD5NotFound", 0, 1, FAULT_TIMEOUT); + + try_accept("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + try_accept("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_dest, TEST_PREFIX, &client2, TEST_PREFIX, 0, + 100, 100, 0, NULL, 0, 1, FAULT_TIMEOUT); + + server_add_fail_tests(&port); + + server_vrf_tests(&port); + + /* client exits */ + synchronize_threads(); + return NULL; +} + +static int client_bind(int sk, union tcp_addr bind_addr) +{ +#ifdef IPV6_TEST + struct sockaddr_in6 addr = { + .sin6_family = AF_INET6, + .sin6_port = 0, + .sin6_addr = bind_addr.a6, + }; +#else + struct sockaddr_in addr = { + .sin_family = AF_INET, + .sin_port = 0, + .sin_addr = bind_addr.a4, + }; +#endif + return bind(sk, &addr, sizeof(addr)); +} + +static void try_connect(const char *tst_name, unsigned int port, + union tcp_addr *md5_addr, uint8_t md5_prefix, + union tcp_addr *ao_addr, uint8_t ao_prefix, + uint8_t sndid, uint8_t rcvid, uint8_t vrf, + fault_t inj, int needs_tcp_md5, union tcp_addr *bind_addr) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (bind_addr && client_bind(sk, *bind_addr)) + test_error("bind()"); + + if (md5_addr && test_set_md5(sk, *md5_addr, md5_prefix, -1, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + + if (ao_addr && test_add_key(sk, ao_password, *ao_addr, + ao_prefix, sndid, rcvid)) + test_error("setsockopt(TCP_AO_ADD_KEY)"); + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret < 0) { + if (fault(KEYREJECT) && ret == -EKEYREJECTED) + test_ok("%s: connect() was prevented", tst_name); + else if (ret == -ETIMEDOUT && fault(TIMEOUT)) + test_ok("%s", tst_name); + else if (ret == -ECONNREFUSED && + (fault(TIMEOUT) || fault(KEYREJECT))) + test_ok("%s: refused to connect", tst_name); + else + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (fault(TIMEOUT) || fault(KEYREJECT)) + test_fail("%s: connected", tst_name); + else + test_ok("%s: connected", tst_name); + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +#define PREINSTALL_MD5_FIRST BIT(0) +#define PREINSTALL_AO BIT(1) +#define POSTINSTALL_AO BIT(2) +#define PREINSTALL_MD5 BIT(3) +#define POSTINSTALL_MD5 BIT(4) + +static int try_add_key_vrf(int sk, union tcp_addr in_addr, uint8_t prefix, + int vrf, uint8_t sndid, uint8_t rcvid, + bool set_ao_required) +{ + uint8_t keyflags = 0; + + if (vrf >= 0) + keyflags |= TCP_AO_KEYF_IFINDEX; + else + vrf = 0; + if (set_ao_required) { + int err = test_set_ao_flags(sk, true, 0); + + if (err) + return err; + } + return test_add_key_vrf(sk, ao_password, keyflags, in_addr, prefix, + (uint8_t)vrf, sndid, rcvid); +} + +static bool test_continue(const char *tst_name, int err, + fault_t inj, bool added_ao) +{ + bool expected_to_fail; + + expected_to_fail = fault(PREINSTALL_AO) && added_ao; + expected_to_fail |= fault(PREINSTALL_MD5) && !added_ao; + + if (!err) { + if (!expected_to_fail) + return true; + test_fail("%s: setsockopt()s were expected to fail", tst_name); + return false; + } + if (err != -EKEYREJECTED || !expected_to_fail) { + test_error("%s: setsockopt(%s) = %d", tst_name, + added_ao ? "TCP_AO_ADD_KEY" : "TCP_MD5SIG_EXT", err); + return false; + } + test_ok("%s: prefailed as expected: %m", tst_name); + return false; +} + +static int open_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + fault_t inj) +{ + int sk; + + sk = socket(test_family, SOCK_STREAM, IPPROTO_TCP); + if (sk < 0) + test_error("socket()"); + + if (client_bind(sk, this_ip_addr)) + test_error("bind()"); + + if (strategy & PREINSTALL_MD5_FIRST) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + + if (strategy & PREINSTALL_AO) { + int err = try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, set_ao_required); + + if (!test_continue(tst_name, err, inj, true)) { + close(sk); + return -1; + } + } + + if (strategy & PREINSTALL_MD5) { + errno = 0; + test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password); + if (!test_continue(tst_name, -errno, inj, false)) { + close(sk); + return -1; + } + } + + return sk; +} + +static void try_to_preadd(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, bool set_ao_required, + uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, int needs_vrf, fault_t inj) +{ + int sk; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + if (needs_vrf && should_skip_test(tst_name, KCONFIG_NET_VRF)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, set_ao_required, + sndid, rcvid, inj); + if (sk < 0) + return; + + test_ok("%s", tst_name); + close(sk); +} + +static void try_to_add(const char *tst_name, unsigned int port, + unsigned int strategy, + union tcp_addr md5_addr, uint8_t md5_prefix, + int md5_vrf, + union tcp_addr ao_addr, uint8_t ao_prefix, + int ao_vrf, uint8_t sndid, uint8_t rcvid, + int needs_tcp_md5, fault_t inj) +{ + time_t timeout; + int sk, ret; + + if (needs_tcp_md5 && should_skip_test(tst_name, KCONFIG_TCP_MD5)) + return; + + sk = open_add(tst_name, port, strategy, md5_addr, md5_prefix, md5_vrf, + ao_addr, ao_prefix, ao_vrf, 0, sndid, rcvid, inj); + if (sk < 0) + return; + + synchronize_threads(); /* preparations done */ + + timeout = fault(TIMEOUT) ? TEST_RETRANSMIT_SEC : TEST_TIMEOUT_SEC; + ret = _test_connect_socket(sk, this_ip_dest, port, timeout); + + if (ret <= 0) { + test_error("%s: connect() returned %d", tst_name, ret); + goto out; + } + + if (strategy & POSTINSTALL_MD5) { + if (test_set_md5(sk, md5_addr, md5_prefix, md5_vrf, md5_password)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_MD5SIG_EXT)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + + if (strategy & POSTINSTALL_AO) { + if (try_add_key_vrf(sk, ao_addr, ao_prefix, ao_vrf, + sndid, rcvid, 0)) { + if (fault(POSTINSTALL)) { + test_ok("%s: postfailed as expected", tst_name); + goto out; + } else { + test_error("setsockopt(TCP_AO_ADD_KEY)"); + } + } else if (fault(POSTINSTALL)) { + test_fail("%s: post setsockopt() was expected to fail", tst_name); + goto out; + } + } + +out: + synchronize_threads(); /* test_kill_sk() */ + /* _test_connect_socket() cleans up on failure */ + if (ret > 0) + test_kill_sk(sk); +} + +static void client_add_ip(union tcp_addr *client, const char *ip) +{ + int err, family = TEST_FAMILY; + + if (inet_pton(family, ip, client) != 1) + test_error("Can't convert ip address %s", ip); + + err = ip_addr_add(veth_name, family, *client, TEST_PREFIX); + if (err) + test_error("Failed to add ip address: %d", err); +} + +static void client_add_ips(void) +{ + client_add_ip(&client2, __TEST_CLIENT_IP(2)); + client_add_ip(&client3, __TEST_CLIENT_IP(3)); + synchronize_threads(); /* server_add_routes() */ +} + +static void client_add_fail_tests(unsigned int *port) +{ + try_to_add("TCP-AO established: add TCP-MD5 key", + (*port)++, POSTINSTALL_MD5 | PREINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("TCP-MD5 established: add TCP-AO key", + (*port)++, PREINSTALL_MD5 | POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 1, FAULT_POSTINSTALL); + try_to_add("non-signed established: add TCP-AO key", + (*port)++, POSTINSTALL_AO, + this_ip_dest, TEST_PREFIX, -1, this_ip_dest, TEST_PREFIX, 0, + 100, 100, 0, FAULT_POSTINSTALL); + + try_to_add("TCP-AO key intersects with existing TCP-MD5 key", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_AO); + try_to_add("TCP-MD5 key intersects with existing TCP-AO key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, this_ip_addr, TEST_PREFIX, -1, + 100, 100, 1, FAULT_PREINSTALL_MD5); + + try_to_preadd("TCP-MD5 key + TCP-AO required", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_AO); + try_to_preadd("TCP-AO required on socket + TCP-MD5 key", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, true, + 100, 100, 1, 0, FAULT_PREINSTALL_MD5); +} + +static void client_vrf_tests(unsigned int *port) +{ + setup_vrfs(); + + /* The following restrictions for setsockopt()s are expected: + * + * |--------------|-----------------|-------------|-------------| + * | | MD5 key without | MD5 key | MD5 key | + * | | l3index | l3index=0 | l3index=N | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | without | reject | reject | reject | + * | l3index | | | | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=0 | reject | reject | allow | + * |--------------|-----------------|-------------|-------------| + * | TCP-AO key | | | | + * | l3index=N | reject | allow | reject | + * |--------------|-----------------|-------------|-------------| + */ + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (no l3index) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (no l3index)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=0) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=0)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, 0, 0, 100, 100, + 1, 1, 0); + + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (no l3index)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, -1, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (no l3index) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, -1, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=0)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-MD5 key (l3index=0) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, 0, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, 0); + try_to_preadd("VRF: TCP-AO key (l3index=N) + TCP-MD5 key (l3index=N)", + (*port)++, PREINSTALL_MD5 | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_MD5); + try_to_preadd("VRF: TCP-MD5 key (l3index=N) + TCP-AO key (l3index=N)", + (*port)++, PREINSTALL_MD5_FIRST | PREINSTALL_AO, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, + this_ip_addr, TEST_PREFIX, test_vrf_ifindex, 0, 100, 100, + 1, 1, FAULT_PREINSTALL_AO); +} + +static void *client_fn(void *arg) +{ + unsigned int port = test_server_port; + union tcp_addr addr_any = {}; + + client_add_ips(); + + try_connect("AO server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO server (INADDR_ANY): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 0, &this_ip_addr); + try_connect("AO server (AO_REQUIRED): unsigned client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &client2); + + try_connect("MD5 server (INADDR_ANY): AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("MD5 server (INADDR_ANY): no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + + try_connect("no sign server: AO client", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, FAULT_TIMEOUT, 0, &this_ip_addr); + try_connect("no sign server: MD5 client", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("no sign server: no sign client", port++, NULL, 0, + NULL, 0, 100, 100, 0, 0, 0, &this_ip_addr); + + try_connect("AO+MD5 server: AO client (matching)", port++, NULL, 0, + &addr_any, 0, 100, 100, 0, 0, 1, &client2); + try_connect("AO+MD5 server: AO client (misconfig, matching MD5)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &this_ip_addr); + try_connect("AO+MD5 server: AO client (misconfig, non-matching)", + port++, NULL, 0, &addr_any, 0, 100, 100, 0, + FAULT_TIMEOUT, 1, &client3); + try_connect("AO+MD5 server: MD5 client (matching)", port++, &addr_any, 0, + NULL, 0, 100, 100, 0, 0, 1, &this_ip_addr); + try_connect("AO+MD5 server: MD5 client (misconfig, matching AO)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: MD5 client (misconfig, non-matching)", + port++, &addr_any, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client3); + try_connect("AO+MD5 server: no sign client (unmatched)", + port++, NULL, 0, NULL, 0, 100, 100, 0, 0, 1, &client3); + try_connect("AO+MD5 server: no sign client (misconfig, matching AO)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &client2); + try_connect("AO+MD5 server: no sign client (misconfig, matching MD5)", + port++, NULL, 0, NULL, 0, 100, 100, 0, FAULT_TIMEOUT, + 1, &this_ip_addr); + + try_connect("AO+MD5 server: client with both [TCP-MD5] and TCP-AO keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &this_ip_addr); + try_connect("AO+MD5 server: client with both TCP-MD5 and [TCP-AO] keys", + port++, &this_ip_addr, TEST_PREFIX, + &client2, TEST_PREFIX, 100, 100, 0, FAULT_KEYREJECT, + 1, &client2); + + client_add_fail_tests(&port); + client_vrf_tests(&port); + + return NULL; +} + +int main(int argc, char *argv[]) +{ + test_init(72, server_fn, client_fn); + return 0; +} diff --git a/tools/testing/selftests/net/test_bridge_backup_port.sh b/tools/testing/selftests/net/test_bridge_backup_port.sh index 112cfd8a10ad..1b3f89e2b86e 100755 --- a/tools/testing/selftests/net/test_bridge_backup_port.sh +++ b/tools/testing/selftests/net/test_bridge_backup_port.sh @@ -35,9 +35,8 @@ # | sw1 | | sw2 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # All tests in this script. Can be overridden with -t option. TESTS=" @@ -125,6 +124,16 @@ tc_check_packets() [[ $pkts == $count ]] } +bridge_link_check() +{ + local ns=$1; shift + local dev=$1; shift + local state=$1; shift + + bridge -n $ns -d -j link show dev $dev | \ + jq -e ".[][\"state\"] == \"$state\"" &> /dev/null +} + ################################################################################ # Setup @@ -132,9 +141,6 @@ setup_topo_ns() { local ns=$1; shift - ip netns add $ns - ip -n $ns link set dev lo up - ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 @@ -145,13 +151,14 @@ setup_topo() { local ns - for ns in sw1 sw2; do + setup_ns sw1 sw2 + for ns in $sw1 $sw2; do setup_topo_ns $ns done ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns sw1 name veth0 - ip link set dev veth1 netns sw2 name veth0 + ip link set dev veth0 netns $sw1 name veth0 + ip link set dev veth1 netns $sw2 name veth0 } setup_sw_common() @@ -190,7 +197,7 @@ setup_sw_common() setup_sw1() { - local ns=sw1 + local ns=$sw1 local local_addr=192.0.2.33 local remote_addr=192.0.2.34 local veth_addr=192.0.2.49 @@ -203,7 +210,7 @@ setup_sw1() setup_sw2() { - local ns=sw2 + local ns=$sw2 local local_addr=192.0.2.34 local remote_addr=192.0.2.33 local veth_addr=192.0.2.50 @@ -229,11 +236,7 @@ setup() cleanup() { - local ns - - for ns in h1 h2 sw1 sw2; do - ip netns del $ns &> /dev/null - done + cleanup_ns $sw1 $sw2 } ################################################################################ @@ -248,85 +251,90 @@ backup_port() echo "Backup port" echo "-----------" - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" # Initial state - check that packets are forwarded out of swp1 when it # has a carrier and not forwarded out of any port when it does not have # a carrier. - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" # Configure vx0 as the backup port of swp1 and check that packets are # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 # does not have a carrier. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 3 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" # Remove vx0 as the backup port of swp1 and check that packets are no # longer forwarded out of vx0 when swp1 does not have a carrier. - run_cmd "bridge -n sw1 link set dev swp1 nobackup_port" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 nobackup_port" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 1 "vx0 not configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" } @@ -339,125 +347,130 @@ backup_nhid() echo "Backup nexthop ID" echo "-----------------" - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "bridge -n sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev vx0 self static dst 192.0.2.36 src_vni 10010" - run_cmd "ip -n sw2 address replace 192.0.2.36/32 dev lo" + run_cmd "ip -n $sw2 address replace 192.0.2.36/32 dev lo" # The first filter matches on packets forwarded using the backup # nexthop ID and the second filter matches on packets forwarded using a # regular VXLAN FDB entry. - run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 102 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.36 action pass" # Configure vx0 as the backup port of swp1 and check that packets are # forwarded out of swp1 when it has a carrier and out of vx0 when swp1 # does not have a carrier. When packets are forwarded out of vx0, check # that they are forwarded by the VXLAN FDB entry. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 0 + tc_check_packets $sw1 "dev vx0 egress" 101 0 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 1 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 1 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 0 + tc_check_packets $sw2 "dev vx0 ingress" 101 0 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "Forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" # Configure nexthop ID 10 as the backup nexthop ID of swp1 and check # that when packets are forwarded out of vx0, they are forwarded using # the backup nexthop ID. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" log_test $? 0 "nexthop ID 10 configured as backup nexthop ID of swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "No forwarding out of vx0" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 2 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 2 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "Forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier on" + run_cmd "ip -n $sw1 link set dev swp1 carrier on" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 forwarding log_test $? 0 "swp1 carrier on" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 3 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 3 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" # Reset the backup nexthop ID to 0 and check that packets are no longer # forwarded using the backup nexthop ID when swp1 does not have a # carrier and are instead forwarded by the VXLAN FDB. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid\"" log_test $? 1 "No backup nexthop ID configured for swp1" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "Forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "No forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 1 + tc_check_packets $sw2 "dev vx0 ingress" 102 1 log_test $? 0 "No forwarding using VXLAN FDB entry" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 4 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 4 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - tc_check_packets sw2 "dev vx0 ingress" 102 2 + tc_check_packets $sw2 "dev vx0 ingress" 102 2 log_test $? 0 "Forwarding using VXLAN FDB entry" } @@ -475,109 +488,110 @@ backup_nhid_invalid() # is forwarded out of the VXLAN port, but dropped by the VXLAN driver # and does not crash the host. - run_cmd "tc -n sw1 qdisc replace dev swp1 clsact" - run_cmd "tc -n sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev swp1 clsact" + run_cmd "tc -n $sw1 filter replace dev swp1 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac action pass" # Drop all other Tx traffic to avoid changes to Tx drop counter. - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 2 handle 102 proto all matchall action drop" - tx_drop=$(ip -n sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') + tx_drop=$(ip -n $sw1 -s -j link show dev vx0 | jq '.[]["stats64"]["tx"]["dropped"]') - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "tc -n sw2 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" + run_cmd "tc -n $sw2 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw2 filter replace dev vx0 ingress pref 1 handle 101 proto ip flower src_mac $smac dst_mac $dmac enc_key_id 10010 enc_dst_ip 192.0.2.34 action pass" # First, check that redirection works. - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_port vx0\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_port vx0\"" log_test $? 0 "vx0 configured as backup port of swp1" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 10\"" log_test $? 0 "Valid nexthop as backup nexthop" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled log_test $? 0 "swp1 carrier off" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "Forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $tx_drop'" log_test $? 0 "No Tx drop increase" # Use a non-existent nexthop ID. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 20" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 20" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 20\"" log_test $? 0 "Non-existent nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 1))'" log_test $? 0 "Tx drop increased" # Use a blckhole nexthop. - run_cmd "ip -n sw1 nexthop replace id 30 blackhole" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 30" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" + run_cmd "ip -n $sw1 nexthop replace id 30 blackhole" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 30" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 30\"" log_test $? 0 "Blackhole nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 2))'" log_test $? 0 "Tx drop increased" # Non-group FDB nexthop. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 1" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 1" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 1\"" log_test $? 0 "Non-group FDB nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 3))'" log_test $? 0 "Tx drop increased" # IPv6 address family nexthop. - run_cmd "ip -n sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" - run_cmd "ip -n sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" - run_cmd "ip -n sw1 nexthop replace id 300 group 100/200 fdb" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 300" - run_cmd "bridge -n sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" + run_cmd "ip -n $sw1 nexthop replace id 100 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 200 via 2001:db8:100::1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 300 group 100/200 fdb" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 300" + run_cmd "bridge -n $sw1 -d link show dev swp1 | grep \"backup_nhid 300\"" log_test $? 0 "IPv6 address family nexthop as backup nexthop" - run_cmd "ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" - tc_check_packets sw1 "dev swp1 egress" 101 0 + run_cmd "ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 1" + tc_check_packets $sw1 "dev swp1 egress" 101 0 log_test $? 0 "No forwarding out of swp1" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "Forwarding out of vx0" - tc_check_packets sw2 "dev vx0 ingress" 101 1 + tc_check_packets $sw2 "dev vx0 ingress" 101 1 log_test $? 0 "No forwarding using backup nexthop ID" - run_cmd "ip -n sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" + run_cmd "ip -n $sw1 -s -j link show dev vx0 | jq -e '.[][\"stats64\"][\"tx\"][\"dropped\"] == $((tx_drop + 4))'" log_test $? 0 "Tx drop increased" } @@ -591,44 +605,46 @@ backup_nhid_ping() echo "------------------------" # Test bidirectional traffic when traffic is redirected in both VTEPs. - sw1_mac=$(ip -n sw1 -j -p link show br0.10 | jq -r '.[]["address"]') - sw2_mac=$(ip -n sw2 -j -p link show br0.10 | jq -r '.[]["address"]') + sw1_mac=$(ip -n $sw1 -j -p link show br0.10 | jq -r '.[]["address"]') + sw2_mac=$(ip -n $sw2 -j -p link show br0.10 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" - run_cmd "bridge -n sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $sw2_mac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw2 fdb replace $sw1_mac dev swp1 master static vlan 10" - run_cmd "ip -n sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" - run_cmd "ip -n sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" + run_cmd "ip -n $sw1 neigh replace 192.0.2.66 lladdr $sw2_mac nud perm dev br0.10" + run_cmd "ip -n $sw2 neigh replace 192.0.2.65 lladdr $sw1_mac nud perm dev br0.10" - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw2 nexthop replace id 1 via 192.0.2.33 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1 fdb" - run_cmd "ip -n sw2 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw2 nexthop replace id 1 via 192.0.2.33 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1 fdb" + run_cmd "ip -n $sw2 nexthop replace id 10 group 1 fdb" - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw2 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 10" - run_cmd "ip -n sw1 link set dev swp1 carrier off" - run_cmd "ip -n sw2 link set dev swp1 carrier off" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw1 swp1 disabled + run_cmd "ip -n $sw2 link set dev swp1 carrier off" + busywait $BUSYWAIT_TIMEOUT bridge_link_check $sw2 swp1 disabled - run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" log_test $? 0 "Ping with backup nexthop ID" # Reset the backup nexthop ID to 0 and check that ping fails. - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 0" - run_cmd "bridge -n sw2 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 0" + run_cmd "bridge -n $sw2 link set dev swp1 backup_nhid 0" - run_cmd "ip netns exec sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" + run_cmd "ip netns exec $sw1 ping -i 0.1 -c 10 -w $PING_TIMEOUT 192.0.2.66" log_test $? 1 "Ping after disabling backup nexthop ID" } backup_nhid_add_del_loop() { while true; do - ip -n sw1 nexthop del id 10 - ip -n sw1 nexthop replace id 10 group 1/2 fdb + ip -n $sw1 nexthop del id 10 + ip -n $sw1 nexthop replace id 10 group 1/2 fdb done >/dev/null 2>&1 } @@ -648,19 +664,19 @@ backup_nhid_torture() # deleting the group. The test is considered successful if nothing # crashed. - run_cmd "ip -n sw1 nexthop replace id 1 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 2 via 192.0.2.34 fdb" - run_cmd "ip -n sw1 nexthop replace id 10 group 1/2 fdb" + run_cmd "ip -n $sw1 nexthop replace id 1 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 2 via 192.0.2.34 fdb" + run_cmd "ip -n $sw1 nexthop replace id 10 group 1/2 fdb" - run_cmd "bridge -n sw1 fdb replace $dmac dev swp1 master static vlan 10" + run_cmd "bridge -n $sw1 fdb replace $dmac dev swp1 master static vlan 10" - run_cmd "bridge -n sw1 link set dev swp1 backup_port vx0" - run_cmd "bridge -n sw1 link set dev swp1 backup_nhid 10" - run_cmd "ip -n sw1 link set dev swp1 carrier off" + run_cmd "bridge -n $sw1 link set dev swp1 backup_port vx0" + run_cmd "bridge -n $sw1 link set dev swp1 backup_nhid 10" + run_cmd "ip -n $sw1 link set dev swp1 carrier off" backup_nhid_add_del_loop & pid1=$! - ip netns exec sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & + ip netns exec $sw1 mausezahn br0.10 -a $smac -b $dmac -A 198.51.100.1 -B 198.51.100.2 -t ip -p 100 -q -c 0 & pid2=$! sleep 30 diff --git a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh index d80f2cd87614..02b986c9c247 100755 --- a/tools/testing/selftests/net/test_bridge_neigh_suppress.sh +++ b/tools/testing/selftests/net/test_bridge_neigh_suppress.sh @@ -45,9 +45,8 @@ # | sw1 | | sw2 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # All tests in this script. Can be overridden with -t option. TESTS=" @@ -140,9 +139,6 @@ setup_topo_ns() { local ns=$1; shift - ip netns add $ns - ip -n $ns link set dev lo up - ip netns exec $ns sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1 ip netns exec $ns sysctl -qw net.ipv6.conf.default.ignore_routes_with_linkdown=1 ip netns exec $ns sysctl -qw net.ipv6.conf.all.accept_dad=0 @@ -153,21 +149,14 @@ setup_topo() { local ns - for ns in h1 h2 sw1 sw2; do + setup_ns h1 h2 sw1 sw2 + for ns in $h1 $h2 $sw1 $sw2; do setup_topo_ns $ns done - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h1 name eth0 - ip link set dev veth1 netns sw1 name swp1 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns sw1 name veth0 - ip link set dev veth1 netns sw2 name veth0 - - ip link add name veth0 type veth peer name veth1 - ip link set dev veth0 netns h2 name eth0 - ip link set dev veth1 netns sw2 name swp1 + ip -n $h1 link add name eth0 type veth peer name swp1 netns $sw1 + ip -n $sw1 link add name veth0 type veth peer name veth0 netns $sw2 + ip -n $h2 link add name eth0 type veth peer name swp1 netns $sw2 } setup_host_common() @@ -190,7 +179,7 @@ setup_host_common() setup_h1() { - local ns=h1 + local ns=$h1 local v4addr1=192.0.2.1/28 local v4addr2=192.0.2.17/28 local v6addr1=2001:db8:1::1/64 @@ -201,7 +190,7 @@ setup_h1() setup_h2() { - local ns=h2 + local ns=$h2 local v4addr1=192.0.2.2/28 local v4addr2=192.0.2.18/28 local v6addr1=2001:db8:1::2/64 @@ -254,7 +243,7 @@ setup_sw_common() setup_sw1() { - local ns=sw1 + local ns=$sw1 local local_addr=192.0.2.33 local remote_addr=192.0.2.34 local veth_addr=192.0.2.49 @@ -265,7 +254,7 @@ setup_sw1() setup_sw2() { - local ns=sw2 + local ns=$sw2 local local_addr=192.0.2.34 local remote_addr=192.0.2.33 local veth_addr=192.0.2.50 @@ -291,11 +280,7 @@ setup() cleanup() { - local ns - - for ns in h1 h2 sw1 sw2; do - ip netns del $ns &> /dev/null - done + cleanup_ns $h1 $h2 $sw1 $sw2 } ################################################################################ @@ -312,80 +297,80 @@ neigh_suppress_arp_common() echo "Per-port ARP suppression - VLAN $vid" echo "----------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip arp_sip $sip arp_op request action pass" # Initial state - check that ARP requests are not suppressed and that # ARP replies are received. - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Install a neighbor on the matching SVI interface and check that ARP # requests are suppressed. - run_cmd "ip -n sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $tip lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are suppressed # and that ARP replies are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "ARP suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that ARP requests are no # longer suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 0 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "ARP suppression" # Take the second host down and check that ARP requests are not # suppressed and that ARP replies are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip -I eth0.$vid $tip" log_test $? 1 "arping" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "ARP suppression" } @@ -415,80 +400,80 @@ neigh_suppress_ns_common() echo "Per-port NS suppression - VLAN $vid" echo "---------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr type 135 code 0 action pass" # Initial state - check that NS messages are not suppressed and that ND # messages are received. - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression" # Enable neighbor suppression and check that nothing changes compared # to the initial state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression" # Install an FDB entry for the remote host and check that nothing # changes compared to the initial state. - h2_mac=$(ip -n h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" + h2_mac=$(ip -n $h2 -j -p link show eth0.$vid | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac dev vx0 master static vlan $vid" log_test $? 0 "FDB entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Install a neighbor on the matching SVI interface and check that NS # messages are suppressed. - run_cmd "ip -n sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" + run_cmd "ip -n $sw1 neigh replace $daddr lladdr $h2_mac nud permanent dev br0.$vid" log_test $? 0 "Neighbor entry installation" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are suppressed # and that ND messages are received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 3 + tc_check_packets $sw1 "dev vx0 egress" 101 3 log_test $? 0 "NS suppression" - run_cmd "ip -n h2 link set dev eth0.$vid up" + run_cmd "ip -n $h2 link set dev eth0.$vid up" log_test $? 0 "H2 up" # Disable neighbor suppression and check that NS messages are no longer # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 0 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 4 + tc_check_packets $sw1 "dev vx0 egress" 101 4 log_test $? 0 "NS suppression" # Take the second host down and check that NS messages are not # suppressed and that ND messages are not received. - run_cmd "ip -n h2 link set dev eth0.$vid down" + run_cmd "ip -n $h2 link set dev eth0.$vid down" log_test $? 0 "H2 down" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr -w 5000 $daddr eth0.$vid" log_test $? 2 "ndisc6" - tc_check_packets sw1 "dev vx0 egress" 101 5 + tc_check_packets $sw1 "dev vx0 egress" 101 5 log_test $? 0 "NS suppression" } @@ -524,118 +509,118 @@ neigh_vlan_suppress_arp() echo "Per-{Port, VLAN} ARP suppression" echo "--------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto 0x0806 flower indev swp1 arp_tip $tip1 arp_sip $sip1 arp_op request action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto 0x0806 flower indev swp1 arp_tip $tip2 arp_sip $sip2 arp_op request action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $tip1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $tip2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that ARP # requests are not suppressed and that ARP replies are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN ARP requests are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "ARP suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that ARP requests # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs ARP requests are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip1 -I eth0.$vid1 $tip1" log_test $? 0 "arping (VLAN $vid1)" - run_cmd "ip netns exec h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" + run_cmd "ip netns exec $h1 arping -q -b -c 1 -w 5 -s $sip2 -I eth0.$vid2 $tip2" log_test $? 0 "arping (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "ARP suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "ARP suppression (VLAN $vid2)" } @@ -655,118 +640,118 @@ neigh_vlan_suppress_ns() echo "Per-{Port, VLAN} NS suppression" echo "-------------------------------" - run_cmd "tc -n sw1 qdisc replace dev vx0 clsact" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" - run_cmd "tc -n sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" + run_cmd "tc -n $sw1 qdisc replace dev vx0 clsact" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 101 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr1 type 135 code 0 action pass" + run_cmd "tc -n $sw1 filter replace dev vx0 egress pref 1 handle 102 proto ipv6 flower indev swp1 ip_proto icmpv6 dst_ip $maddr src_ip $saddr2 type 135 code 0 action pass" - h2_mac1=$(ip -n h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') - h2_mac2=$(ip -n h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') - run_cmd "bridge -n sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" - run_cmd "bridge -n sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" - run_cmd "ip -n sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" - run_cmd "ip -n sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" + h2_mac1=$(ip -n $h2 -j -p link show eth0.$vid1 | jq -r '.[]["address"]') + h2_mac2=$(ip -n $h2 -j -p link show eth0.$vid2 | jq -r '.[]["address"]') + run_cmd "bridge -n $sw1 fdb replace $h2_mac1 dev vx0 master static vlan $vid1" + run_cmd "bridge -n $sw1 fdb replace $h2_mac2 dev vx0 master static vlan $vid2" + run_cmd "ip -n $sw1 neigh replace $daddr1 lladdr $h2_mac1 nud permanent dev br0.$vid1" + run_cmd "ip -n $sw1 neigh replace $daddr2 lladdr $h2_mac2 nud permanent dev br0.$vid2" # Enable per-{Port, VLAN} neighbor suppression and check that NS # messages are not suppressed and that ND messages are received. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress on\"" log_test $? 0 "\"neigh_vlan_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 1 + tc_check_packets $sw1 "dev vx0 egress" 102 1 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on VLAN 10 and check that only on this # VLAN NS messages are suppressed. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on (VLAN $vid1)" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid2 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid2)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 2 + tc_check_packets $sw1 "dev vx0 egress" 102 2 log_test $? 0 "NS suppression (VLAN $vid2)" # Enable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 3 + tc_check_packets $sw1 "dev vx0 egress" 102 3 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on the port and check that it has no # effect compared to previous state. - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 1 + tc_check_packets $sw1 "dev vx0 egress" 101 1 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 4 + tc_check_packets $sw1 "dev vx0 egress" 102 4 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable neighbor suppression on VLAN 10 and check that NS messages # are no longer suppressed on this VLAN. - run_cmd "bridge -n sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" - run_cmd "bridge -n sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" + run_cmd "bridge -n $sw1 vlan set vid $vid1 dev vx0 neigh_suppress off" + run_cmd "bridge -n $sw1 -d vlan show dev vx0 vid $vid1 | grep \"neigh_suppress off\"" log_test $? 0 "\"neigh_suppress\" is off (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" # Disable per-{Port, VLAN} neighbor suppression, enable neighbor # suppression on the port and check that on both VLANs NS messages are # suppressed. - run_cmd "bridge -n sw1 link set dev vx0 neigh_vlan_suppress off" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_vlan_suppress off" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_vlan_suppress off\"" log_test $? 0 "\"neigh_vlan_suppress\" is off" - run_cmd "bridge -n sw1 link set dev vx0 neigh_suppress on" - run_cmd "bridge -n sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" + run_cmd "bridge -n $sw1 link set dev vx0 neigh_suppress on" + run_cmd "bridge -n $sw1 -d link show dev vx0 | grep \"neigh_suppress on\"" log_test $? 0 "\"neigh_suppress\" is on" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr1 -w 5000 $daddr1 eth0.$vid1" log_test $? 0 "ndisc6 (VLAN $vid1)" - run_cmd "ip netns exec h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" + run_cmd "ip netns exec $h1 ndisc6 -q -r 1 -s $saddr2 -w 5000 $daddr2 eth0.$vid2" log_test $? 0 "ndisc6 (VLAN $vid2)" - tc_check_packets sw1 "dev vx0 egress" 101 2 + tc_check_packets $sw1 "dev vx0 egress" 101 2 log_test $? 0 "NS suppression (VLAN $vid1)" - tc_check_packets sw1 "dev vx0 egress" 102 5 + tc_check_packets $sw1 "dev vx0 egress" 102 5 log_test $? 0 "NS suppression (VLAN $vid2)" } diff --git a/tools/testing/selftests/net/test_vxlan_mdb.sh b/tools/testing/selftests/net/test_vxlan_mdb.sh index 6e996f8063cd..58da5de99ac4 100755 --- a/tools/testing/selftests/net/test_vxlan_mdb.sh +++ b/tools/testing/selftests/net/test_vxlan_mdb.sh @@ -55,9 +55,8 @@ # | ns2_v4 | | ns2_v6 | # +------------------------------------+ +------------------------------------+ +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 CONTROL_PATH_TESTS=" basic_star_g_ipv4_ipv4 @@ -80,6 +79,7 @@ CONTROL_PATH_TESTS=" dump_ipv6_ipv4 dump_ipv4_ipv6 dump_ipv6_ipv6 + flush " DATA_PATH_TESTS=" @@ -260,9 +260,6 @@ setup_common() local local_addr1=$1; shift local local_addr2=$1; shift - ip netns add $ns1 - ip netns add $ns2 - ip link add name veth0 type veth peer name veth1 ip link set dev veth0 netns $ns1 name veth0 ip link set dev veth1 netns $ns2 name veth0 @@ -273,36 +270,36 @@ setup_common() setup_v4() { - setup_common ns1_v4 ns2_v4 192.0.2.1 192.0.2.2 + setup_ns ns1_v4 ns2_v4 + setup_common $ns1_v4 $ns2_v4 192.0.2.1 192.0.2.2 - ip -n ns1_v4 address add 192.0.2.17/28 dev veth0 - ip -n ns2_v4 address add 192.0.2.18/28 dev veth0 + ip -n $ns1_v4 address add 192.0.2.17/28 dev veth0 + ip -n $ns2_v4 address add 192.0.2.18/28 dev veth0 - ip -n ns1_v4 route add default via 192.0.2.18 - ip -n ns2_v4 route add default via 192.0.2.17 + ip -n $ns1_v4 route add default via 192.0.2.18 + ip -n $ns2_v4 route add default via 192.0.2.17 } cleanup_v4() { - ip netns del ns2_v4 - ip netns del ns1_v4 + cleanup_ns $ns2_v4 $ns1_v4 } setup_v6() { - setup_common ns1_v6 ns2_v6 2001:db8:1::1 2001:db8:1::2 + setup_ns ns1_v6 ns2_v6 + setup_common $ns1_v6 $ns2_v6 2001:db8:1::1 2001:db8:1::2 - ip -n ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad - ip -n ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad + ip -n $ns1_v6 address add 2001:db8:2::1/64 dev veth0 nodad + ip -n $ns2_v6 address add 2001:db8:2::2/64 dev veth0 nodad - ip -n ns1_v6 route add default via 2001:db8:2::2 - ip -n ns2_v6 route add default via 2001:db8:2::1 + ip -n $ns1_v6 route add default via 2001:db8:2::2 + ip -n $ns2_v6 route add default via 2001:db8:2::1 } cleanup_v6() { - ip netns del ns2_v6 - ip netns del ns1_v6 + cleanup_ns $ns2_v6 $ns1_v6 } setup() @@ -433,7 +430,7 @@ basic_common() basic_star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1" local vtep_ip=198.51.100.100 @@ -446,7 +443,7 @@ basic_star_g_ipv4_ipv4() basic_star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1" local vtep_ip=198.51.100.100 @@ -459,7 +456,7 @@ basic_star_g_ipv6_ipv4() basic_star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1" local vtep_ip=2001:db8:1000::1 @@ -472,7 +469,7 @@ basic_star_g_ipv4_ipv6() basic_star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1" local vtep_ip=2001:db8:1000::1 @@ -485,7 +482,7 @@ basic_star_g_ipv6_ipv6() basic_sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=198.51.100.100 @@ -498,7 +495,7 @@ basic_sg_ipv4_ipv4() basic_sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=198.51.100.100 @@ -511,7 +508,7 @@ basic_sg_ipv6_ipv4() basic_sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp 239.1.1.1 src 192.0.2.129" local vtep_ip=2001:db8:1000::1 @@ -524,7 +521,7 @@ basic_sg_ipv4_ipv6() basic_sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp_key="grp ff0e::1 src 2001:db8:100::1" local vtep_ip=2001:db8:1000::1 @@ -694,7 +691,7 @@ star_g_common() star_g_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -711,7 +708,7 @@ star_g_ipv4_ipv4() star_g_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -728,7 +725,7 @@ star_g_ipv6_ipv4() star_g_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src1=192.0.2.129 local src2=192.0.2.130 @@ -745,7 +742,7 @@ star_g_ipv4_ipv6() star_g_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src1=2001:db8:100::1 local src2=2001:db8:100::2 @@ -793,7 +790,7 @@ sg_common() sg_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=198.51.100.100 @@ -808,7 +805,7 @@ sg_ipv4_ipv4() sg_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=198.51.100.100 @@ -823,7 +820,7 @@ sg_ipv6_ipv4() sg_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=239.1.1.1 local src=192.0.2.129 local vtep_ip=2001:db8:1000::1 @@ -838,7 +835,7 @@ sg_ipv4_ipv6() sg_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local grp=ff0e::1 local src=2001:db8:100::1 local vtep_ip=2001:db8:1000::1 @@ -918,7 +915,7 @@ dump_common() dump_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv4_grps_get @@ -932,7 +929,7 @@ dump_ipv4_ipv4() dump_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local local_addr=192.0.2.1 local remote_prefix=198.51.100. local fn=ipv6_grps_get @@ -946,7 +943,7 @@ dump_ipv6_ipv4() dump_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv4_grps_get @@ -960,7 +957,7 @@ dump_ipv4_ipv6() dump_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local local_addr=2001:db8:1::1 local remote_prefix=2001:db8:1000:: local fn=ipv6_grps_get @@ -972,6 +969,202 @@ dump_ipv6_ipv6() dump_common $ns1 $local_addr $remote_prefix $fn } +flush() +{ + local num_entries + + echo + echo "Control path: Flush" + echo "-------------------" + + # Add entries with different attributes and check that they are all + # flushed when the flush command is given with no parameters. + + # Different source VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.2 permanent dst 198.51.100.1 src_vni 10011" + + # Different routing protocol. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.3 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.4 permanent proto zebra dst 198.51.100.1 src_vni 10010" + + # Different destination IP. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.5 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.6 permanent dst 198.51.100.2 src_vni 10010" + + # Different destination port. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.7 permanent dst 198.51.100.1 dst_port 11111 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.8 permanent dst 198.51.100.1 dst_port 22222 src_vni 10010" + + # Different VNI. + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.9 permanent dst 198.51.100.1 vni 10010 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.10 permanent dst 198.51.100.1 vni 10020 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + num_entries=$(bridge -n $ns1_v4 mdb show dev vx0 | wc -l) + [[ $num_entries -eq 0 ]] + log_test $? 0 "Flush all" + + # Check that entries are flushed when port is specified as the VXLAN + # device and that an error is returned when port is specified as a + # different net device. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port vx0" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by port - matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 port veth0" + log_test $? 255 "Flush by port - non-matching" + + # Check that when flushing by source VNI only entries programmed with + # the specified source VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10011" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10011" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by source VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10011" + log_test $? 0 "Flush by source VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that all entries are flushed when "permanent" is specified and + # that an error is returned when "nopermanent" is specified. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 permanent" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010" + log_test $? 254 "Flush by \"permanent\" state" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 nopermanent" + log_test $? 255 "Flush by \"nopermanent\" state" + + # Check that when flushing by routing protocol only entries programmed + # with the specified routing protocol are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto bgp dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent proto zebra dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 proto bgp" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto bgp\"" + log_test $? 1 "Flush by routing protocol - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"proto zebra\"" + log_test $? 0 "Flush by routing protocol - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination IP only entries programmed + # with the specified destination IP are flushed and the rest are not. + + # IPv4. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 198.51.100.2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 1 "Flush by IPv4 destination IP - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 0 "Flush by IPv4 destination IP - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # IPv6. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 2001:db8:1000::2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst 2001:db8:1000::2" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::2" + log_test $? 1 "Flush by IPv6 destination IP - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 2001:db8:1000::1" + log_test $? 0 "Flush by IPv6 destination IP - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by UDP destination port only entries + # programmed with the specified port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 11111 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 11111" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 11111\"" + log_test $? 1 "Flush by UDP destination port - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \"dst_port 22222\"" + log_test $? 0 "Flush by UDP destination port - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a UDP destination port for an entry, traffic is + # encapsulated with the device's UDP destination port. Check that when + # flushing by the device's UDP destination port only entries programmed + # with this port are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst_port 22222 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 dst_port 4789" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by device's UDP destination port - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by device's UDP destination port - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Check that when flushing by destination VNI only entries programmed + # with the specified destination VNI are flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20011 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 20010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20010\"" + log_test $? 1 "Flush by destination VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep \" vni 20011\"" + log_test $? 0 "Flush by destination VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # When not specifying a destination VNI for an entry, traffic is + # encapsulated with the source VNI. Check that when flushing by a + # destination VNI that is equal to the source VNI only such entries are + # flushed and the rest are not. + + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent dst 198.51.100.1 src_vni 10010" + run_cmd "bridge -n $ns1_v4 mdb add dev vx0 port vx0 grp 239.1.1.1 permanent vni 20010 dst 198.51.100.2 src_vni 10010" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vni 10010" + + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.1" + log_test $? 1 "Flush by destination VNI equal to source VNI - matching" + run_cmd "bridge -n $ns1_v4 -d -s mdb get dev vx0 grp 239.1.1.1 src_vni 10010 | grep 198.51.100.2" + log_test $? 0 "Flush by destination VNI equal to source VNI - non-matching" + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0" + + # Test that an error is returned when trying to flush using VLAN ID. + + run_cmd "bridge -n $ns1_v4 mdb flush dev vx0 vid 10" + log_test $? 255 "Flush by VLAN ID" +} + ################################################################################ # Tests - Data path @@ -984,6 +1177,7 @@ encap_params_common() local plen=$1; shift local enc_ethtype=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1002,11 +1196,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep2_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_dst_ip $vtep1_ip action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Destination IP - no match" @@ -1019,20 +1213,20 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip dst_port 1111 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 4789 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Default destination port - no match" run_cmd "tc -n $ns2 filter replace dev veth0 ingress pref 1 handle 101 proto $enc_ethtype flower ip_proto udp dst_port 1111 action pass" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - match" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev veth0 ingress" 101 1 log_test $? 0 "Non-default destination port - no match" @@ -1045,11 +1239,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10010 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Default destination VNI - no match" @@ -1057,11 +1251,11 @@ encap_params_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent dst $vtep1_ip vni 10010 src_vni 10020" run_cmd "tc -n $ns2 filter replace dev vx0 ingress pref 1 handle 101 proto all flower enc_key_id 10020 action pass" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - match" - run_cmd "ip netns exec $ns1 $mz br0.20 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.20 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Non-default destination VNI - no match" @@ -1072,13 +1266,14 @@ encap_params_common() encap_params_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local enc_ethtype="ip" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1086,18 +1281,19 @@ encap_params_ipv4_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local enc_ethtype="ip" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1105,18 +1301,19 @@ encap_params_ipv6_ipv4() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } encap_params_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local enc_ethtype="ipv6" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1124,18 +1321,19 @@ encap_params_ipv4_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn" + $grp $grp_dmac $src "mausezahn" } encap_params_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local enc_ethtype="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1143,7 +1341,7 @@ encap_params_ipv6_ipv6() echo "------------------------------------------------------------------" encap_params_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $enc_ethtype \ - $grp $src "mausezahn -6" + $grp $grp_dmac $src "mausezahn -6" } starg_exclude_ir_common() @@ -1154,6 +1352,7 @@ starg_exclude_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1175,14 +1374,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1192,14 +1391,14 @@ starg_exclude_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1208,12 +1407,13 @@ starg_exclude_ir_common() starg_exclude_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1222,17 +1422,18 @@ starg_exclude_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1241,17 +1442,18 @@ starg_exclude_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1260,17 +1462,18 @@ starg_exclude_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_exclude_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1279,7 +1482,7 @@ starg_exclude_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_exclude_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_common() @@ -1290,6 +1493,7 @@ starg_include_ir_common() local vtep2_ip=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1311,14 +1515,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 log_test $? 0 "Block excluded source - second VTEP" # Check that valid source is forwarded to both VTEPs. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1328,14 +1532,14 @@ starg_include_ir_common() run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep2_ip src_vni 10010" # Check that invalid source is not forwarded to any VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Block excluded source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 log_test $? 0 "Block excluded source after removal - second VTEP" # Check that valid source is forwarded to the remaining VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Forward valid source after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1344,12 +1548,13 @@ starg_include_ir_common() starg_include_ir_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1358,17 +1563,18 @@ starg_include_ir_ipv4_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1377,17 +1583,18 @@ starg_include_ir_ipv6_ipv4() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_include_ir_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1396,17 +1603,18 @@ starg_include_ir_ipv4_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn" + $grp_dmac $valid_src $invalid_src "mausezahn" } starg_include_ir_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1415,7 +1623,7 @@ starg_include_ir_ipv6_ipv6() echo "-------------------------------------------------------------" starg_include_ir_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $grp \ - $valid_src $invalid_src "mausezahn -6" + $grp_dmac $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_common() @@ -1425,6 +1633,7 @@ starg_exclude_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1442,12 +1651,12 @@ starg_exclude_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode exclude source_list $invalid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1455,18 +1664,19 @@ starg_exclude_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } starg_exclude_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1474,17 +1684,18 @@ starg_exclude_p2mp_ipv4_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_exclude_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1492,17 +1703,18 @@ starg_exclude_p2mp_ipv6_ipv4() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } starg_exclude_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1510,17 +1722,18 @@ starg_exclude_p2mp_ipv4_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_exclude_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1528,7 +1741,7 @@ starg_exclude_p2mp_ipv6_ipv6() echo "Data path: (*, G) EXCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_exclude_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1539,6 +1752,7 @@ starg_include_p2mp_common() local mcast_grp=$1; shift local plen=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local valid_src=$1; shift local invalid_src=$1; shift local mz=$1; shift @@ -1556,12 +1770,12 @@ starg_include_p2mp_common() run_cmd "bridge -n $ns1 mdb replace dev vx0 port vx0 grp $grp permanent filter_mode include source_list $valid_src dst $mcast_grp src_vni 10010 via veth0" # Check that invalid source is not forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $invalid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 0 log_test $? 0 "Block excluded source" # Check that valid source is forwarded. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Forward valid source" @@ -1569,18 +1783,19 @@ starg_include_p2mp_common() run_cmd "ip -n $ns2 address del $mcast_grp/$plen dev veth0" # Check that valid source is not received anymore. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $valid_src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Receive of valid source after removal from group" } starg_include_p2mp_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1588,17 +1803,18 @@ starg_include_p2mp_ipv4_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_include_p2mp_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1606,17 +1822,18 @@ starg_include_p2mp_ipv6_ipv4() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv4 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } starg_include_p2mp_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local valid_src=192.0.2.129 local invalid_src=192.0.2.145 @@ -1624,17 +1841,18 @@ starg_include_p2mp_ipv4_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv4 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn" } starg_include_p2mp_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local valid_src=2001:db8:100::1 local invalid_src=2001:db8:200::1 @@ -1642,7 +1860,7 @@ starg_include_p2mp_ipv6_ipv6() echo "Data path: (*, G) INCLUDE - P2MP - IPv6 overlay / IPv6 underlay" echo "---------------------------------------------------------------" - starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp \ + starg_include_p2mp_common $ns1 $ns2 $mcast_grp $plen $grp $grp_dmac \ $valid_src $invalid_src "mausezahn -6" } @@ -1654,6 +1872,7 @@ egress_vni_translation_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1689,32 +1908,33 @@ egress_vni_translation_common() # Make sure that packets sent from the first VTEP over VLAN 10 are # received by the SVI corresponding to the L3VNI (14000 / VLAN 4000) on # the second VTEP, since it is configured as PVID. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - PVID configured" # Remove PVID flag from VLAN 4000 on the second VTEP and make sure # packets are no longer received by the SVI interface. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 1 log_test $? 0 "Egress VNI translation - no PVID configured" # Reconfigure the PVID and make sure packets are received again. run_cmd "bridge -n $ns2 vlan add vid 4000 dev vx0 pvid" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev br0.4000 ingress" 101 2 log_test $? 0 "Egress VNI translation - PVID reconfigured" } egress_vni_translation_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1722,17 +1942,18 @@ egress_vni_translation_ipv4_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local mcast_grp=238.1.1.1 local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1740,17 +1961,18 @@ egress_vni_translation_ipv6_ipv4() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } egress_vni_translation_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo @@ -1758,17 +1980,18 @@ egress_vni_translation_ipv4_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn" + $grp_dmac $src "mausezahn" } egress_vni_translation_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local mcast_grp=ff0e::2 local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo @@ -1776,7 +1999,7 @@ egress_vni_translation_ipv6_ipv6() echo "----------------------------------------------------------------" egress_vni_translation_common $ns1 $ns2 $mcast_grp $plen $proto $grp \ - $src "mausezahn -6" + $grp_dmac $src "mausezahn -6" } all_zeros_mdb_common() @@ -1789,12 +2012,18 @@ all_zeros_mdb_common() local vtep4_ip=$1; shift local plen=$1; shift local ipv4_grp=239.1.1.1 + local ipv4_grp_dmac=01:00:5e:01:01:01 local ipv4_unreg_grp=239.2.2.2 + local ipv4_unreg_grp_dmac=01:00:5e:02:02:02 local ipv4_ll_grp=224.0.0.100 + local ipv4_ll_grp_dmac=01:00:5e:00:00:64 local ipv4_src=192.0.2.129 local ipv6_grp=ff0e::1 + local ipv6_grp_dmac=33:33:00:00:00:01 local ipv6_unreg_grp=ff0e::2 + local ipv6_unreg_grp_dmac=33:33:00:00:00:02 local ipv6_ll_grp=ff02::1 + local ipv6_ll_grp_dmac=33:33:00:00:00:01 local ipv6_src=2001:db8:100::1 # Install all-zeros (catchall) MDB entries for IPv4 and IPv6 traffic @@ -1830,7 +2059,7 @@ all_zeros_mdb_common() # Send registered IPv4 multicast and make sure it only arrives to the # first VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_grp_dmac -A $ipv4_src -B $ipv4_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "Registered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -1838,7 +2067,7 @@ all_zeros_mdb_common() # Send unregistered IPv4 multicast that is not link-local and make sure # it arrives to the first and second VTEPs. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_unreg_grp_dmac -A $ipv4_src -B $ipv4_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Unregistered IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1846,7 +2075,7 @@ all_zeros_mdb_common() # Send IPv4 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn br0.10 -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn br0.10 -a own -b $ipv4_ll_grp_dmac -A $ipv4_src -B $ipv4_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 2 log_test $? 0 "Link-local IPv4 multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 1 @@ -1881,7 +2110,7 @@ all_zeros_mdb_common() # Send registered IPv6 multicast and make sure it only arrives to the # third VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_grp_dmac -A $ipv6_src -B $ipv6_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 1 log_test $? 0 "Registered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 0 @@ -1889,7 +2118,7 @@ all_zeros_mdb_common() # Send unregistered IPv6 multicast that is not link-local and make sure # it arrives to the third and fourth VTEPs. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_unreg_grp_dmac -A $ipv6_src -B $ipv6_unreg_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Unregistered IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1897,7 +2126,7 @@ all_zeros_mdb_common() # Send IPv6 link-local multicast traffic and make sure it does not # arrive to any VTEP. - run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 mausezahn -6 br0.10 -a own -b $ipv6_ll_grp_dmac -A $ipv6_src -B $ipv6_ll_grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 103 2 log_test $? 0 "Link-local IPv6 multicast - third VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 104 1 @@ -1929,8 +2158,8 @@ all_zeros_mdb_common() all_zeros_mdb_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.101 local vtep2_ip=198.51.100.102 local vtep3_ip=198.51.100.103 @@ -1947,8 +2176,8 @@ all_zeros_mdb_ipv4() all_zeros_mdb_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local vtep3_ip=2001:db8:3000::1 @@ -1972,6 +2201,7 @@ mdb_fdb_common() local plen=$1; shift local proto=$1; shift local grp=$1; shift + local grp_dmac=$1; shift local src=$1; shift local mz=$1; shift @@ -1995,7 +2225,7 @@ mdb_fdb_common() # Send IP multicast traffic and make sure it is forwarded by the MDB # and only arrives to the first VTEP. - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 0 @@ -2012,7 +2242,7 @@ mdb_fdb_common() # Remove the MDB entry and make sure that IP multicast is now forwarded # by the FDB to the second VTEP. run_cmd "bridge -n $ns1 mdb del dev vx0 port vx0 grp $grp dst $vtep1_ip src_vni 10010" - run_cmd "ip netns exec $ns1 $mz br0.10 -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" + run_cmd "ip netns exec $ns1 $mz br0.10 -a own -b $grp_dmac -A $src -B $grp -t udp sp=12345,dp=54321 -p 100 -c 1 -q" tc_check_packets "$ns2" "dev vx0 ingress" 101 1 log_test $? 0 "IP multicast after removal - first VTEP" tc_check_packets "$ns2" "dev vx0 ingress" 102 2 @@ -2021,78 +2251,82 @@ mdb_fdb_common() mdb_fdb_ipv4_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv4() { - local ns1=ns1_v4 - local ns2=ns2_v4 + local ns1=$ns1_v4 + local ns2=$ns2_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local plen=32 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv4 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_fdb_ipv4_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local proto="ipv4" local grp=239.1.1.1 + local grp_dmac=01:00:5e:01:01:01 local src=192.0.2.129 echo echo "Data path: MDB with FDB - IPv4 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn" } mdb_fdb_ipv6_ipv6() { - local ns1=ns1_v6 - local ns2=ns2_v6 + local ns1=$ns1_v6 + local ns2=$ns2_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local plen=128 local proto="ipv6" local grp=ff0e::1 + local grp_dmac=33:33:00:00:00:01 local src=2001:db8:100::1 echo echo "Data path: MDB with FDB - IPv6 overlay / IPv6 underlay" echo "------------------------------------------------------" - mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp $src \ - "mausezahn -6" + mdb_fdb_common $ns1 $ns2 $vtep1_ip $vtep2_ip $plen $proto $grp \ + $grp_dmac $src "mausezahn -6" } mdb_grp1_loop() @@ -2127,7 +2361,9 @@ mdb_torture_common() local vtep1_ip=$1; shift local vtep2_ip=$1; shift local grp1=$1; shift + local grp1_dmac=$1; shift local grp2=$1; shift + local grp2_dmac=$1; shift local src=$1; shift local mz=$1; shift local pid1 @@ -2152,9 +2388,9 @@ mdb_torture_common() pid1=$! mdb_grp2_loop $ns1 $vtep1_ip $vtep2_ip $grp2 & pid2=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp1_dmac -A $src -B $grp1 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid3=$! - ip netns exec $ns1 $mz br0.10 -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & + ip netns exec $ns1 $mz br0.10 -a own -b $grp2_dmac -A $src -B $grp2 -t udp sp=12345,dp=54321 -p 100 -c 0 -q & pid4=$! sleep 30 @@ -2166,70 +2402,78 @@ mdb_torture_common() mdb_torture_ipv4_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv4() { - local ns1=ns1_v4 + local ns1=$ns1_v4 local vtep1_ip=198.51.100.100 local vtep2_ip=198.51.100.200 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv4 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } mdb_torture_ipv4_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=239.1.1.1 + local grp1_dmac=01:00:5e:01:01:01 local grp2=239.2.2.2 + local grp2_dmac=01:00:5e:02:02:02 local src=192.0.2.129 echo echo "Data path: MDB torture test - IPv4 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn" } mdb_torture_ipv6_ipv6() { - local ns1=ns1_v6 + local ns1=$ns1_v6 local vtep1_ip=2001:db8:1000::1 local vtep2_ip=2001:db8:2000::1 local grp1=ff0e::1 + local grp1_dmac=33:33:00:00:00:01 local grp2=ff0e::2 + local grp2_dmac=33:33:00:00:00:02 local src=2001:db8:100::1 echo echo "Data path: MDB torture test - IPv6 overlay / IPv6 underlay" echo "----------------------------------------------------------" - mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp2 $src \ - "mausezahn -6" + mdb_torture_common $ns1 $vtep1_ip $vtep2_ip $grp1 $grp1_dmac $grp2 \ + $grp2_dmac $src "mausezahn -6" } ################################################################################ @@ -2296,9 +2540,9 @@ if [ ! -x "$(command -v jq)" ]; then exit $ksft_skip fi -bridge mdb help 2>&1 | grep -q "get" +bridge mdb help 2>&1 | grep -q "flush" if [ $? -ne 0 ]; then - echo "SKIP: iproute2 bridge too old, missing VXLAN MDB get support" + echo "SKIP: iproute2 bridge too old, missing VXLAN MDB flush support" exit $ksft_skip fi diff --git a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh index f75212bf142c..b8805983b728 100755 --- a/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh +++ b/tools/testing/selftests/net/test_vxlan_nolocalbypass.sh @@ -9,9 +9,8 @@ # option and verifies that packets are no longer received by the second VXLAN # device. +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 TESTS=" nolocalbypass @@ -98,20 +97,19 @@ tc_check_packets() setup() { - ip netns add ns1 + setup_ns ns1 - ip -n ns1 link set dev lo up - ip -n ns1 address add 192.0.2.1/32 dev lo - ip -n ns1 address add 198.51.100.1/32 dev lo + ip -n $ns1 address add 192.0.2.1/32 dev lo + ip -n $ns1 address add 198.51.100.1/32 dev lo - ip -n ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ + ip -n $ns1 link add name vx0 up type vxlan id 100 local 198.51.100.1 \ dstport 4789 nolearning - ip -n ns1 link add name vx1 up type vxlan id 100 dstport 4790 + ip -n $ns1 link add name vx1 up type vxlan id 100 dstport 4790 } cleanup() { - ip netns del ns1 &> /dev/null + cleanup_ns $ns1 } ################################################################################ @@ -122,40 +120,40 @@ nolocalbypass() local smac=00:01:02:03:04:05 local dmac=00:0a:0b:0c:0d:0e - run_cmd "bridge -n ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" + run_cmd "bridge -n $ns1 fdb add $dmac dev vx0 self static dst 192.0.2.1 port 4790" - run_cmd "tc -n ns1 qdisc add dev vx1 clsact" - run_cmd "tc -n ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" + run_cmd "tc -n $ns1 qdisc add dev vx1 clsact" + run_cmd "tc -n $ns1 filter add dev vx1 ingress pref 1 handle 101 proto all flower src_mac $smac dst_mac $dmac action pass" - run_cmd "tc -n ns1 qdisc add dev lo clsact" - run_cmd "tc -n ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" + run_cmd "tc -n $ns1 qdisc add dev lo clsact" + run_cmd "tc -n $ns1 filter add dev lo ingress pref 1 handle 101 proto ip flower ip_proto udp dst_port 4790 action drop" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet received by local VXLAN device - localbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan nolocalbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan nolocalbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == false'" log_test $? 0 "localbypass disabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 1 + tc_check_packets "$ns1" "dev vx1 ingress" 101 1 log_test $? 0 "Packet not received by local VXLAN device - nolocalbypass" - run_cmd "ip -n ns1 link set dev vx0 type vxlan localbypass" + run_cmd "ip -n $ns1 link set dev vx0 type vxlan localbypass" - run_cmd "ip -n ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" + run_cmd "ip -n $ns1 -d -j link show dev vx0 | jq -e '.[][\"linkinfo\"][\"info_data\"][\"localbypass\"] == true'" log_test $? 0 "localbypass enabled" - run_cmd "ip netns exec ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" + run_cmd "ip netns exec $ns1 mausezahn vx0 -a $smac -b $dmac -c 1 -p 100 -q" - tc_check_packets "ns1" "dev vx1 ingress" 101 2 + tc_check_packets "$ns1" "dev vx1 ingress" 101 2 log_test $? 0 "Packet received by local VXLAN device - localbypass" } diff --git a/tools/testing/selftests/net/test_vxlan_under_vrf.sh b/tools/testing/selftests/net/test_vxlan_under_vrf.sh index 1fd1250ebc66..ae8fbe3f0779 100755 --- a/tools/testing/selftests/net/test_vxlan_under_vrf.sh +++ b/tools/testing/selftests/net/test_vxlan_under_vrf.sh @@ -43,15 +43,14 @@ # This tests both the connectivity between vm-1 and vm-2, and that the underlay # can be moved in and out of the vrf by unsetting and setting veth0's master. +source lib.sh set -e cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del veth-tap 2>/dev/null || true - for ns in hv-1 hv-2 vm-1 vm-2; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_1 $vm_2 } # Clean start @@ -60,72 +59,75 @@ cleanup &> /dev/null [[ $1 == "clean" ]] && exit 0 trap cleanup EXIT +setup_ns hv_1 hv_2 vm_1 vm_2 +hv[1]=$hv_1 +hv[2]=$hv_2 +vm[1]=$vm_1 +vm[2]=$vm_2 # Setup "Hypervisors" simulated with netns ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking() { - hv=$1 + id=$1 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 - ip -netns hv-$hv link add vrf-underlay type vrf table 1 - ip -netns hv-$hv link set vrf-underlay up - ip -netns hv-$hv addr add 172.16.0.$hv/24 dev veth0 - ip -netns hv-$hv link set veth0 up + ip -netns ${hv[$id]} link add vrf-underlay type vrf table 1 + ip -netns ${hv[$id]} link set vrf-underlay up + ip -netns ${hv[$id]} addr add 172.16.0.$id/24 dev veth0 + ip -netns ${hv[$id]} link set veth0 up - ip -netns hv-$hv link add br0 type bridge - ip -netns hv-$hv link set br0 up + ip -netns ${hv[$id]} link add br0 type bridge + ip -netns ${hv[$id]} link set br0 up - ip -netns hv-$hv link add vxlan0 type vxlan id 10 local 172.16.0.$hv dev veth0 dstport 4789 - ip -netns hv-$hv link set vxlan0 master br0 - ip -netns hv-$hv link set vxlan0 up + ip -netns ${hv[$id]} link add vxlan0 type vxlan id 10 local 172.16.0.$id dev veth0 dstport 4789 + ip -netns ${hv[$id]} link set vxlan0 master br0 + ip -netns ${hv[$id]} link set vxlan0 up } setup-hv-networking 1 setup-hv-networking 2 # Check connectivity between HVs by pinging hv-2 from hv-1 echo -n "Checking HV connectivity " -ip netns exec hv-1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $hv_1 ping -c 1 -W 1 172.16.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Setups a "VM" simulated by a netns an a veth pair setup-vm() { id=$1 - ip netns add vm-$id ip link add veth-tap type veth peer name veth-hv - ip link set veth-tap netns hv-$id - ip -netns hv-$id link set veth-tap master br0 - ip -netns hv-$id link set veth-tap up + ip link set veth-tap netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-tap master br0 + ip -netns ${hv[$id]} link set veth-tap up ip link set veth-hv address 02:1d:8d:dd:0c:6$id - ip link set veth-hv netns vm-$id - ip -netns vm-$id addr add 10.0.0.$id/24 dev veth-hv - ip -netns vm-$id link set veth-hv up + ip link set veth-hv netns ${vm[$id]} + ip -netns ${vm[$id]} addr add 10.0.0.$id/24 dev veth-hv + ip -netns ${vm[$id]} link set veth-hv up } setup-vm 1 setup-vm 2 # Setup VTEP routes to make ARP work -bridge -netns hv-1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent -bridge -netns hv-2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent +bridge -netns $hv_1 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.2 self permanent +bridge -netns $hv_2 fdb add 00:00:00:00:00:00 dev vxlan0 dst 172.16.0.1 self permanent echo -n "Check VM connectivity through VXLAN (underlay in the default VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" # Move the underlay to a non-default VRF -ip -netns hv-1 link set veth0 vrf vrf-underlay -ip -netns hv-1 link set vxlan0 down -ip -netns hv-1 link set vxlan0 up -ip -netns hv-2 link set veth0 vrf vrf-underlay -ip -netns hv-2 link set vxlan0 down -ip -netns hv-2 link set vxlan0 up +ip -netns $hv_1 link set veth0 vrf vrf-underlay +ip -netns $hv_1 link set vxlan0 down +ip -netns $hv_1 link set vxlan0 up +ip -netns $hv_2 link set veth0 vrf vrf-underlay +ip -netns $hv_2 link set vxlan0 down +ip -netns $hv_2 link set vxlan0 up echo -n "Check VM connectivity through VXLAN (underlay in a VRF) " -ip netns exec vm-1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) +ip netns exec $vm_1 ping -c 1 -W 1 10.0.0.2 &> /dev/null || (echo "[FAIL]"; false) echo "[ OK ]" diff --git a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh index 8c3ac0a72545..6127a78ee988 100755 --- a/tools/testing/selftests/net/test_vxlan_vnifiltering.sh +++ b/tools/testing/selftests/net/test_vxlan_vnifiltering.sh @@ -78,10 +78,8 @@ # # # This test tests the new vxlan vnifiltering api - +source lib.sh ret=0 -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 # all tests in this script. Can be overridden with -t option TESTS=" @@ -148,18 +146,18 @@ run_cmd() } check_hv_connectivity() { - ip netns exec hv-1 ping -c 1 -W 1 $1 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $1 &>/dev/null sleep 1 - ip netns exec hv-1 ping -c 1 -W 1 $2 &>/dev/null + ip netns exec $hv_1 ping -c 1 -W 1 $2 &>/dev/null return $? } check_vm_connectivity() { - run_cmd "ip netns exec vm-11 ping -c 1 -W 1 10.0.10.12" + run_cmd "ip netns exec $vm_11 ping -c 1 -W 1 10.0.10.12" log_test $? 0 "VM connectivity over $1 (ipv4 default rdst)" - run_cmd "ip netns exec vm-21 ping -c 1 -W 1 10.0.10.22" + run_cmd "ip netns exec $vm_21 ping -c 1 -W 1 10.0.10.22" log_test $? 0 "VM connectivity over $1 (ipv6 default rdst)" } @@ -167,26 +165,23 @@ cleanup() { ip link del veth-hv-1 2>/dev/null || true ip link del vethhv-11 vethhv-12 vethhv-21 vethhv-22 2>/dev/null || true - for ns in hv-1 hv-2 vm-11 vm-21 vm-12 vm-22 vm-31 vm-32; do - ip netns del $ns 2>/dev/null || true - done + cleanup_ns $hv_1 $hv_2 $vm_11 $vm_21 $vm_12 $vm_22 $vm_31 $vm_32 } trap cleanup EXIT setup-hv-networking() { - hv=$1 + id=$1 local1=$2 mask1=$3 local2=$4 mask2=$5 - ip netns add hv-$hv - ip link set veth-hv-$hv netns hv-$hv - ip -netns hv-$hv link set veth-hv-$hv name veth0 - ip -netns hv-$hv addr add $local1/$mask1 dev veth0 - ip -netns hv-$hv addr add $local2/$mask2 dev veth0 - ip -netns hv-$hv link set veth0 up + ip link set veth-hv-$id netns ${hv[$id]} + ip -netns ${hv[$id]} link set veth-hv-$id name veth0 + ip -netns ${hv[$id]} addr add $local1/$mask1 dev veth0 + ip -netns ${hv[$id]} addr add $local2/$mask2 dev veth0 + ip -netns ${hv[$id]} link set veth0 up } # Setups a "VM" simulated by a netns an a veth pair @@ -208,21 +203,20 @@ setup-vm() { lastvxlandev="" # create bridge - ip -netns hv-$hvid link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ + ip -netns ${hv[$hvid]} link add br$brid type bridge vlan_filtering 1 vlan_default_pvid 0 \ mcast_snooping 0 - ip -netns hv-$hvid link set br$brid up + ip -netns ${hv[$hvid]} link set br$brid up # create vm namespace and interfaces and connect to hypervisor # namespace - ip netns add vm-$vmid hvvethif="vethhv-$vmid" vmvethif="veth-$vmid" ip link add $hvvethif type veth peer name $vmvethif - ip link set $hvvethif netns hv-$hvid - ip link set $vmvethif netns vm-$vmid - ip -netns hv-$hvid link set $hvvethif up - ip -netns vm-$vmid link set $vmvethif up - ip -netns hv-$hvid link set $hvvethif master br$brid + ip link set $hvvethif netns ${hv[$hvid]} + ip link set $vmvethif netns ${vm[$vmid]} + ip -netns ${hv[$hvid]} link set $hvvethif up + ip -netns ${vm[$vmid]} link set $vmvethif up + ip -netns ${hv[$hvid]} link set $hvvethif master br$brid # configure VM vlan/vni filtering on hypervisor for vmap in $(echo $vattrs | cut -d "," -f1- --output-delimiter=' ') @@ -234,9 +228,9 @@ setup-vm() { local vtype=$(echo $vmap | awk -F'-' '{print ($5)}') local port=$(echo $vmap | awk -F'-' '{print ($6)}') - ip -netns vm-$vmid link add name $vmvethif.$vid link $vmvethif type vlan id $vid - ip -netns vm-$vmid addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid - ip -netns vm-$vmid link set $vmvethif.$vid up + ip -netns ${vm[$vmid]} link add name $vmvethif.$vid link $vmvethif type vlan id $vid + ip -netns ${vm[$vmid]} addr add 10.0.$vid.$vmid/24 dev $vmvethif.$vid + ip -netns ${vm[$vmid]} link set $vmvethif.$vid up tid=$vid vxlandev="vxlan$brid" @@ -268,35 +262,35 @@ setup-vm() { # create vxlan device if [ "$vxlandev" != "$lastvxlandev" ]; then - ip -netns hv-$hvid link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null - ip -netns hv-$hvid link set $vxlandev master br$brid - ip -netns hv-$hvid link set $vxlandev up + ip -netns ${hv[$hvid]} link add $vxlandev type vxlan local $localip $vxlandevflags dev veth0 2>/dev/null + ip -netns ${hv[$hvid]} link set $vxlandev master br$brid + ip -netns ${hv[$hvid]} link set $vxlandev up lastvxlandev=$vxlandev fi # add vlan - bridge -netns hv-$hvid vlan add vid $vid dev $hvvethif - bridge -netns hv-$hvid vlan add vid $vid pvid dev $vxlandev + bridge -netns ${hv[$hvid]} vlan add vid $vid dev $hvvethif + bridge -netns ${hv[$hvid]} vlan add vid $vid pvid dev $vxlandev # Add bridge vni filter for tx if [[ -n $vtype && $vtype == "metadata" || $vtype == "vnifilter" || $vtype == "vnifilterg" ]]; then - bridge -netns hv-$hvid link set dev $vxlandev vlan_tunnel on - bridge -netns hv-$hvid vlan add dev $vxlandev vid $vid tunnel_info id $tid + bridge -netns ${hv[$hvid]} link set dev $vxlandev vlan_tunnel on + bridge -netns ${hv[$hvid]} vlan add dev $vxlandev vid $vid tunnel_info id $tid fi if [[ -n $vtype && $vtype == "metadata" ]]; then - bridge -netns hv-$hvid fdb add 00:00:00:00:00:00 dev $vxlandev \ + bridge -netns ${hv[$hvid]} fdb add 00:00:00:00:00:00 dev $vxlandev \ src_vni $tid vni $tid dst $group self elif [[ -n $vtype && $vtype == "vnifilter" ]]; then # Add per vni rx filter with 'bridge vni' api - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid elif [[ -n $vtype && $vtype == "vnifilterg" ]]; then # Add per vni group config with 'bridge vni' api if [ -n "$group" ]; then if [ $mcast -eq 1 ]; then - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid group $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid group $group else - bridge -netns hv-$hvid vni add dev $vxlandev vni $tid remote $group + bridge -netns ${hv[$hvid]} vni add dev $vxlandev vni $tid remote $group fi fi fi @@ -306,14 +300,14 @@ setup-vm() { setup_vnifilter_api() { ip link add veth-host type veth peer name veth-testns - ip netns add testns - ip link set veth-testns netns testns + setup_ns testns + ip link set veth-testns netns $testns } cleanup_vnifilter_api() { ip link del veth-host 2>/dev/null || true - ip netns del testns 2>/dev/null || true + ip netns del $testns 2>/dev/null || true } # tests vxlan filtering api @@ -331,52 +325,52 @@ vxlan_vnifilter_api() # Duplicate vni test # create non-vnifiltering traditional vni device - run_cmd "ip -netns testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan100 type vxlan id 100 local $localip dev veth-testns dstport 4789" log_test $? 0 "Create traditional vxlan device" # create vni filtering device - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan vnifilter local $localip dev veth-testns dstport 4789" log_test $? 1 "Cannot create vnifilter device without external flag" - run_cmd "ip -netns testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext1 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Creating external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 100" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 100" log_test $? 0 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200" log_test $? 0 "Set new vni id on vnifiltering device" - run_cmd "ip -netns testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" + run_cmd "ip -netns $testns link add vxlan-ext2 type vxlan external vnifilter local $localip dev veth-testns dstport 4789" log_test $? 0 "Create second external vxlan device with vnifilter flag" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 200" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 200" log_test $? 255 "Cannot set in-use vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Set new vni id on vnifiltering device" # check in bridge vni show - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300" log_test $? 0 "Update vni id on vnifiltering device" - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 400" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 400" log_test $? 0 "Add new vni id on vnifiltering device" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext1 vni 200 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext1 vni 200 group $group" log_test $? 0 "Set multicast group on existing vni" # add multicast group per vni - run_cmd "bridge -netns testns vni add dev vxlan-ext2 vni 300 group $group" + run_cmd "bridge -netns $testns vni add dev vxlan-ext2 vni 300 group $group" log_test $? 0 "Set multicast group on existing vni" # set vnifilter on an existing external vxlan device - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external vnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external vnifilter" log_test $? 2 "Cannot set vnifilter flag on a device" # change vxlan vnifilter flag - run_cmd "ip -netns testns link set dev vxlan-ext1 type vxlan external novnifilter" + run_cmd "ip -netns $testns link set dev vxlan-ext1 type vxlan external novnifilter" log_test $? 2 "Cannot unset vnifilter flag on a device" } @@ -390,12 +384,20 @@ vxlan_vnifilter_datapath() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 $hv2addr1 $hv2addr2 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 $hv1addr1 $hv1addr2 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 @@ -415,12 +417,20 @@ vxlan_vnifilter_datapath_pervni() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilterg,20-v4-$hv1addr1-$hv2addr1-vnifilterg 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilterg,20-v6-$hv1addr2-$hv2addr2-vnifilterg 0 @@ -440,12 +450,20 @@ vxlan_vnifilter_datapath_mgroup() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilter,20-v4-$hv1addr1-$group-vnifilter 1 setup-vm 1 21 2 "10-v6-$hv1addr2-$group6-vnifilter,20-v6-$hv1addr2-$group6-vnifilter" 1 @@ -464,12 +482,20 @@ vxlan_vnifilter_datapath_mgroup_pervni() group="239.1.1.100" group6="ff07::1" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_12 vm_22 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[12]=$vm_12 + vm[22]=$vm_22 setup-vm 1 11 1 10-v4-$hv1addr1-$group-vnifilterg,20-v4-$hv1addr1-$group-vnifilterg 1 setup-vm 1 21 2 10-v6-$hv1addr2-$group6-vnifilterg,20-v6-$hv1addr2-$group6-vnifilterg 1 @@ -486,12 +512,22 @@ vxlan_vnifilter_metadata_and_traditional_mix() hv1addr2="2002:fee1::1" hv2addr2="2002:fee1::2" + setup_ns hv_1 hv_2 + hv[1]=$hv_1 + hv[2]=$hv_2 ip link add veth-hv-1 type veth peer name veth-hv-2 setup-hv-networking 1 $hv1addr1 24 $hv1addr2 64 setup-hv-networking 2 $hv2addr1 24 $hv2addr2 64 check_hv_connectivity hv2addr1 hv2addr2 + setup_ns vm_11 vm_21 vm_31 vm_12 vm_22 vm_32 + vm[11]=$vm_11 + vm[21]=$vm_21 + vm[31]=$vm_31 + vm[12]=$vm_12 + vm[22]=$vm_22 + vm[32]=$vm_32 setup-vm 1 11 1 10-v4-$hv1addr1-$hv2addr1-vnifilter,20-v4-$hv1addr1-$hv2addr1-vnifilter 0 setup-vm 1 21 2 10-v6-$hv1addr2-$hv2addr2-vnifilter,20-v6-$hv1addr2-$hv2addr2-vnifilter 0 setup-vm 1 31 3 30-v4-$hv1addr1-$hv2addr1-default-4790,40-v6-$hv1addr2-$hv2addr2-default-4790,50-v4-$hv1addr1-$hv2addr1-metadata-4791 0 @@ -504,13 +540,13 @@ vxlan_vnifilter_metadata_and_traditional_mix() check_vm_connectivity "vnifiltering vxlan pervni remote mix" # check VM connectivity over traditional/non-vxlan filtering vxlan devices - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.30.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.30.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv4 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.40.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.40.32" log_test $? 0 "VM connectivity over traditional vxlan (ipv6 default rdst)" - run_cmd "ip netns exec vm-31 ping -c 1 -W 1 10.0.50.32" + run_cmd "ip netns exec $vm_31 ping -c 1 -W 1 10.0.50.32" log_test $? 0 "VM connectivity over metadata nonfiltering vxlan (ipv4 default rdst)" } diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c index 464853a7f982..f27a12d2a2c9 100644 --- a/tools/testing/selftests/net/tls.c +++ b/tools/testing/selftests/net/tls.c @@ -707,6 +707,20 @@ TEST_F(tls, splice_from_pipe) EXPECT_EQ(memcmp(mem_send, mem_recv, send_len), 0); } +TEST_F(tls, splice_more) +{ + unsigned int f = SPLICE_F_NONBLOCK | SPLICE_F_MORE | SPLICE_F_GIFT; + int send_len = TLS_PAYLOAD_MAX_LEN; + char mem_send[TLS_PAYLOAD_MAX_LEN]; + int i, send_pipe = 1; + int p[2]; + + ASSERT_GE(pipe(p), 0); + EXPECT_GE(write(p[1], mem_send, send_len), 0); + for (i = 0; i < 32; i++) + EXPECT_EQ(splice(p[0], NULL, self->fd, NULL, send_pipe, f), 1); +} + TEST_F(tls, splice_from_pipe2) { int send_len = 16000; @@ -988,12 +1002,12 @@ TEST_F(tls, recv_partial) memset(recv_mem, 0, sizeof(recv_mem)); EXPECT_EQ(send(self->fd, test_str, send_len, 0), send_len); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_first), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_first), + MSG_WAITALL), strlen(test_str_first)); EXPECT_EQ(memcmp(test_str_first, recv_mem, strlen(test_str_first)), 0); memset(recv_mem, 0, sizeof(recv_mem)); - EXPECT_NE(recv(self->cfd, recv_mem, strlen(test_str_second), - MSG_WAITALL), -1); + EXPECT_EQ(recv(self->cfd, recv_mem, strlen(test_str_second), + MSG_WAITALL), strlen(test_str_second)); EXPECT_EQ(memcmp(test_str_second, recv_mem, strlen(test_str_second)), 0); } @@ -1471,6 +1485,51 @@ TEST_F(tls, control_msg) EXPECT_EQ(memcmp(buf, test_str, send_len), 0); } +TEST_F(tls, control_msg_nomerge) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec1, send_len), 0); + + EXPECT_EQ(tls_recv_cmsg(_metadata, self->cfd, 100, buf, sizeof(buf), 0), send_len); + EXPECT_EQ(memcmp(buf, rec2, send_len), 0); +} + +TEST_F(tls, data_control_data) +{ + char *rec1 = "1111"; + char *rec2 = "2222"; + char *rec3 = "3333"; + int send_len = 5; + char buf[15]; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, send_len, 0), send_len); + EXPECT_EQ(tls_send_cmsg(self->fd, 100, rec2, send_len, 0), send_len); + EXPECT_EQ(send(self->fd, rec3, send_len, 0), send_len); + + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); + EXPECT_EQ(recv(self->cfd, buf, sizeof(buf), MSG_PEEK), send_len); +} + TEST_F(tls, shutdown) { char const *test_str = "test_read"; @@ -1556,6 +1615,40 @@ TEST_F(tls, getsockopt) EXPECT_EQ(errno, EINVAL); } +TEST_F(tls, recv_efault) +{ + char *rec1 = "1111111111"; + char *rec2 = "2222222222"; + struct msghdr hdr = {}; + struct iovec iov[2]; + char recv_mem[12]; + int ret; + + if (self->notls) + SKIP(return, "no TLS support"); + + EXPECT_EQ(send(self->fd, rec1, 10, 0), 10); + EXPECT_EQ(send(self->fd, rec2, 10, 0), 10); + + iov[0].iov_base = recv_mem; + iov[0].iov_len = sizeof(recv_mem); + iov[1].iov_base = NULL; /* broken iov to make process_rx_list fail */ + iov[1].iov_len = 1; + + hdr.msg_iovlen = 2; + hdr.msg_iov = iov; + + EXPECT_EQ(recv(self->cfd, recv_mem, 1, 0), 1); + EXPECT_EQ(recv_mem[0], rec1[0]); + + ret = recvmsg(self->cfd, &hdr, 0); + EXPECT_LE(ret, sizeof(recv_mem)); + EXPECT_GE(ret, 9); + EXPECT_EQ(memcmp(rec1, recv_mem, 9), 0); + if (ret > 9) + EXPECT_EQ(memcmp(rec2, recv_mem + 9, ret - 9), 0); +} + FIXTURE(tls_err) { int fd, cfd; @@ -1860,15 +1953,15 @@ TEST_F(tls_err, poll_partial_rec_async) /* Child should sleep in poll(), never get a wake */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 0); + EXPECT_EQ(poll(&pfd, 1, 20), 0); EXPECT_EQ(write(p[1], &token, 1), 1); /* Barrier #1 */ pfd.fd = self->cfd2; pfd.events = POLLIN; - EXPECT_EQ(poll(&pfd, 1, 5), 1); + EXPECT_EQ(poll(&pfd, 1, 20), 1); - exit(!_metadata->passed); + exit(!__test_passed(_metadata)); } } diff --git a/tools/testing/selftests/net/toeplitz.sh b/tools/testing/selftests/net/toeplitz.sh index da5bfd834eff..8ff172f7bb1b 100755 --- a/tools/testing/selftests/net/toeplitz.sh +++ b/tools/testing/selftests/net/toeplitz.sh @@ -147,14 +147,14 @@ setup() { setup_loopback_environment "${DEV}" # Set up server_ns namespace and client_ns namespace - setup_macvlan_ns "${DEV}" server_ns server \ + setup_macvlan_ns "${DEV}" $server_ns server \ "${SERVER_MAC}" "${SERVER_IP}" - setup_macvlan_ns "${DEV}" client_ns client \ + setup_macvlan_ns "${DEV}" $client_ns client \ "${CLIENT_MAC}" "${CLIENT_IP}" } cleanup() { - cleanup_macvlan_ns server_ns server client_ns client + cleanup_macvlan_ns $server_ns server $client_ns client cleanup_loopback "${DEV}" } @@ -170,22 +170,22 @@ if [[ "${TEST_RSS}" = true ]]; then # RPS/RFS must be disabled because they move packets between cpus, # which breaks the PACKET_FANOUT_CPU identification of RSS decisions. eval "$(get_disable_rfs_cmd) $(get_disable_rps_cmd)" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -C "$(get_rx_irq_cpus)" -s -v & elif [[ ! -z "${RPS_MAP}" ]]; then eval "$(get_disable_rfs_cmd) $(get_set_rps_bitmaps_cmd ${RPS_MAP})" \ - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 \ -r "0x${RPS_MAP}" -s -v & else - ip netns exec server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ + ip netns exec $server_ns ./toeplitz "${IP_FLAG}" "${PROTO_FLAG}" \ -d "${PORT}" -i "${DEV}" -k "${KEY}" -T 1000 -s -v & fi server_pid=$! -ip netns exec client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ +ip netns exec $client_ns ./toeplitz_client.sh "${PROTO_FLAG}" \ "${IP_FLAG}" "${SERVER_IP%%/*}" "${PORT}" & client_pid=$! diff --git a/tools/testing/selftests/net/traceroute.sh b/tools/testing/selftests/net/traceroute.sh index de9ca97abc30..282f14760940 100755 --- a/tools/testing/selftests/net/traceroute.sh +++ b/tools/testing/selftests/net/traceroute.sh @@ -4,6 +4,7 @@ # Run traceroute/traceroute6 tests # +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no @@ -69,9 +70,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip netns exec ${ns} ip link set lo up if [ "${addr}" != "-" ]; then ip netns exec ${ns} ip addr add dev lo ${addr} fi @@ -160,12 +158,7 @@ connect_ns() cleanup_traceroute6() { - local ns - - for ns in host-1 host-2 router-1 router-2 - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_traceroute6() @@ -176,33 +169,34 @@ setup_traceroute6() cleanup_traceroute6 set -e - create_ns host-1 - create_ns host-2 - create_ns router-1 - create_ns router-2 + setup_ns h1 h2 r1 r2 + create_ns $h1 + create_ns $h2 + create_ns $r1 + create_ns $r2 # Setup N3 - connect_ns router-2 eth3 - 2000:103::2/64 host-2 eth3 - 2000:103::4/64 - ip netns exec host-2 ip route add default via 2000:103::2 + connect_ns $r2 eth3 - 2000:103::2/64 $h2 eth3 - 2000:103::4/64 + ip netns exec $h2 ip route add default via 2000:103::2 # Setup N2 - connect_ns router-1 eth2 - 2000:102::1/64 router-2 eth2 - 2000:102::2/64 - ip netns exec router-1 ip route add default via 2000:102::2 + connect_ns $r1 eth2 - 2000:102::1/64 $r2 eth2 - 2000:102::2/64 + ip netns exec $r1 ip route add default via 2000:102::2 # Setup N1. host-1 and router-2 connect to a bridge in router-1. - ip netns exec router-1 ip link add name ${brdev} type bridge - ip netns exec router-1 ip link set ${brdev} up - ip netns exec router-1 ip addr add 2000:101::1/64 dev ${brdev} + ip netns exec $r1 ip link add name ${brdev} type bridge + ip netns exec $r1 ip link set ${brdev} up + ip netns exec $r1 ip addr add 2000:101::1/64 dev ${brdev} - connect_ns host-1 eth0 - 2000:101::3/64 router-1 eth0 - - - ip netns exec router-1 ip link set dev eth0 master ${brdev} - ip netns exec host-1 ip route add default via 2000:101::1 + connect_ns $h1 eth0 - 2000:101::3/64 $r1 eth0 - - + ip netns exec $r1 ip link set dev eth0 master ${brdev} + ip netns exec $h1 ip route add default via 2000:101::1 - connect_ns router-2 eth1 - 2000:101::2/64 router-1 eth1 - - - ip netns exec router-1 ip link set dev eth1 master ${brdev} + connect_ns $r2 eth1 - 2000:101::2/64 $r1 eth1 - - + ip netns exec $r1 ip link set dev eth1 master ${brdev} # Prime the network - ip netns exec host-1 ping6 -c5 2000:103::4 >/dev/null 2>&1 + ip netns exec $h1 ping6 -c5 2000:103::4 >/dev/null 2>&1 set +e } @@ -217,7 +211,7 @@ run_traceroute6() setup_traceroute6 # traceroute6 host-2 from host-1 (expects 2000:102::2) - run_cmd host-1 "traceroute6 2000:103::4 | grep -q 2000:102::2" + run_cmd $h1 "traceroute6 2000:103::4 | grep -q 2000:102::2" log_test $? 0 "IPV6 traceroute" cleanup_traceroute6 @@ -240,12 +234,7 @@ run_traceroute6() cleanup_traceroute() { - local ns - - for ns in host-1 host-2 router - do - ip netns del ${ns} 2>/dev/null - done + cleanup_ns $h1 $h2 $router } setup_traceroute() @@ -254,24 +243,25 @@ setup_traceroute() cleanup_traceroute set -e - create_ns host-1 - create_ns host-2 - create_ns router + setup_ns h1 h2 router + create_ns $h1 + create_ns $h2 + create_ns $router - connect_ns host-1 eth0 1.0.1.3/24 - \ - router eth1 1.0.3.1/24 - - ip netns exec host-1 ip route add default via 1.0.1.1 + connect_ns $h1 eth0 1.0.1.3/24 - \ + $router eth1 1.0.3.1/24 - + ip netns exec $h1 ip route add default via 1.0.1.1 - ip netns exec router ip addr add 1.0.1.1/24 dev eth1 - ip netns exec router sysctl -qw \ + ip netns exec $router ip addr add 1.0.1.1/24 dev eth1 + ip netns exec $router sysctl -qw \ net.ipv4.icmp_errors_use_inbound_ifaddr=1 - connect_ns host-2 eth0 1.0.2.4/24 - \ - router eth2 1.0.2.1/24 - - ip netns exec host-2 ip route add default via 1.0.2.1 + connect_ns $h2 eth0 1.0.2.4/24 - \ + $router eth2 1.0.2.1/24 - + ip netns exec $h2 ip route add default via 1.0.2.1 # Prime the network - ip netns exec host-1 ping -c5 1.0.2.4 >/dev/null 2>&1 + ip netns exec $h1 ping -c5 1.0.2.4 >/dev/null 2>&1 set +e } @@ -286,7 +276,7 @@ run_traceroute() setup_traceroute # traceroute host-2 from host-1 (expects 1.0.1.1). Takes a while. - run_cmd host-1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" + run_cmd $h1 "traceroute 1.0.2.4 | grep -q 1.0.1.1" log_test $? 0 "IPV4 traceroute" cleanup_traceroute diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 10f2fde3686b..ec60a16c9307 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -163,7 +163,8 @@ static void validate_timestamp(struct timespec *cur, int min_delay) if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); - test_failed = true; + if (!getenv("KSFT_MACHINE_SLOW")) + test_failed = true; } } diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 31637769f59f..25baca4b148e 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -8,13 +8,13 @@ set -e setup() { # set 1ms delay on lo egress - tc qdisc add dev lo root netem delay 1ms + tc qdisc add dev lo root netem delay 10ms # set 2ms delay on ifb0 egress modprobe ifb ip link add ifb_netem0 type ifb ip link set dev ifb_netem0 up - tc qdisc add dev ifb_netem0 root netem delay 2ms + tc qdisc add dev ifb_netem0 root netem delay 20ms # redirect lo ingress through ifb0 egress tc qdisc add dev lo handle ffff: ingress @@ -24,9 +24,11 @@ setup() { } run_test_v4v6() { - # SND will be delayed 1000us - # ACK will be delayed 6000us: 1 + 2 ms round-trip - local -r args="$@ -v 1000 -V 6000" + # SND will be delayed 10ms + # ACK will be delayed 60ms: 10 + 20 ms round-trip + # allow +/- tolerance of 8ms + # wait for ACK to be queued + local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000" ./txtimestamp ${args} -4 -L 127.0.0.1 ./txtimestamp ${args} -6 -L ::1 diff --git a/tools/testing/selftests/net/udpgro.sh b/tools/testing/selftests/net/udpgro.sh index 0c743752669a..11a1ebda564f 100755 --- a/tools/testing/selftests/net/udpgro.sh +++ b/tools/testing/selftests/net/udpgro.sh @@ -3,9 +3,11 @@ # # Run a series of udpgro functional tests. +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.bpf.o" # set global exit status, but never reset nonzero one. check_err() @@ -51,8 +53,7 @@ run_one() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen ${PEER_NS} 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -97,7 +98,7 @@ run_one_nat() { echo "ok" || \ echo "failed"& - sleep 0.1 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? kill -INT $pid @@ -118,11 +119,9 @@ run_one_2sock() { echo "ok" || \ echo "failed" & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 12345 udp ./udpgso_bench_tx ${tx_args} -p 12345 - sleep 0.1 - # first UDP GSO socket should be closed at this point + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} ret=$? wait $(jobs -p) @@ -198,7 +197,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_bench.sh b/tools/testing/selftests/net/udpgro_bench.sh index 894972877e8b..c51ea90a1395 100755 --- a/tools/testing/selftests/net/udpgro_bench.sh +++ b/tools/testing/selftests/net/udpgro_bench.sh @@ -3,9 +3,11 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" @@ -40,8 +42,7 @@ run_one() { ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & ip netns exec "${PEER_NS}" ./udpgso_bench_rx -t ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } @@ -83,7 +84,7 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_frglist.sh b/tools/testing/selftests/net/udpgro_frglist.sh index 0a6359bed0b9..17404f49cdb6 100755 --- a/tools/testing/selftests/net/udpgro_frglist.sh +++ b/tools/testing/selftests/net/udpgro_frglist.sh @@ -3,9 +3,11 @@ # # Run a series of udpgro benchmarks +source net_helper.sh + readonly PEER_NS="ns-peer-$(mktemp -u XXXXXX)" -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.bpf.o" cleanup() { local -r jobs="$(jobs -p)" @@ -40,13 +42,12 @@ run_one() { ip -n "${PEER_NS}" link set veth1 xdp object ${BPF_FILE} section xdp tc -n "${PEER_NS}" qdisc add dev veth1 clsact - tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.o section schedcls/ingress6/nat_6 direct-action - tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.o section schedcls/egress4/snat4 direct-action + tc -n "${PEER_NS}" filter add dev veth1 ingress prio 4 protocol ipv6 bpf object-file nat6to4.bpf.o section schedcls/ingress6/nat_6 direct-action + tc -n "${PEER_NS}" filter add dev veth1 egress prio 4 protocol ip bpf object-file nat6to4.bpf.o section schedcls/egress4/snat4 direct-action echo ${rx_args} ip netns exec "${PEER_NS}" ./udpgso_bench_rx ${rx_args} -r & - # Hack: let bg programs complete the startup - sleep 0.2 + wait_local_port_listen "${PEER_NS}" 8000 udp ./udpgso_bench_tx ${tx_args} } @@ -84,12 +85,12 @@ run_all() { } if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit -1 fi -if [ ! -f nat6to4.o ]; then - echo "Missing nat6to4 helper. Build bpf nat6to4.o selftest first" +if [ ! -f nat6to4.bpf.o ]; then + echo "Missing nat6to4 helper. Run 'make' first" exit -1 fi diff --git a/tools/testing/selftests/net/udpgro_fwd.sh b/tools/testing/selftests/net/udpgro_fwd.sh index c079565add39..550d8eb3e224 100755 --- a/tools/testing/selftests/net/udpgro_fwd.sh +++ b/tools/testing/selftests/net/udpgro_fwd.sh @@ -1,7 +1,9 @@ #!/bin/bash # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +source net_helper.sh + +BPF_FILE="xdp_dummy.bpf.o" readonly BASE="ns-$(mktemp -u XXXXXX)" readonly SRC=2 readonly DST=1 @@ -37,6 +39,10 @@ create_ns() { for ns in $NS_SRC $NS_DST; do ip netns add $ns ip -n $ns link set dev lo up + + # disable route solicitations to decrease 'noise' traffic + ip netns exec $ns sysctl -qw net.ipv6.conf.default.router_solicitations=0 + ip netns exec $ns sysctl -qw net.ipv6.conf.all.router_solicitations=0 done ip link add name veth$SRC type veth peer name veth$DST @@ -78,6 +84,12 @@ create_vxlan_pair() { create_vxlan_endpoint $BASE$ns veth$ns $BM_NET_V6$((3 - $ns)) vxlan6$ns 6 ip -n $BASE$ns addr add dev vxlan6$ns $OL_NET_V6$ns/24 nodad done + + # preload neighbur cache, do avoid some noisy traffic + local addr_dst=$(ip -j -n $BASE$DST link show dev vxlan6$DST |jq -r '.[]["address"]') + local addr_src=$(ip -j -n $BASE$SRC link show dev vxlan6$SRC |jq -r '.[]["address"]') + ip -n $BASE$DST neigh add dev vxlan6$DST lladdr $addr_src $OL_NET_V6$SRC + ip -n $BASE$SRC neigh add dev vxlan6$SRC lladdr $addr_dst $OL_NET_V6$DST } is_ipv6() { @@ -117,9 +129,9 @@ run_test() { # not enable GRO ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 4789 ip netns exec $NS_DST $ipt -A INPUT -p udp --dport 8000 - ip netns exec $NS_DST ./udpgso_bench_rx -C 1000 -R 10 -n 10 -l 1300 $rx_args & + ip netns exec $NS_DST ./udpgso_bench_rx -C 2000 -R 100 -n 10 -l 1300 $rx_args & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC ./udpgso_bench_tx $family -M 1 -s 13000 -S 1300 -D $dst local retc=$? wait $spid @@ -166,9 +178,9 @@ run_bench() { # bind the sender and the receiver to different CPUs to try # get reproducible results ip netns exec $NS_DST bash -c "echo 2 > /sys/class/net/veth$DST/queues/rx-0/rps_cpus" - ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 1000 -R 10 & + ip netns exec $NS_DST taskset 0x2 ./udpgso_bench_rx -C 2000 -R 100 & local spid=$! - sleep 0.1 + wait_local_port_listen "$NS_DST" 8000 udp ip netns exec $NS_SRC taskset 0x1 ./udpgso_bench_tx $family -l 3 -S 1300 -D $dst local retc=$? wait $spid @@ -205,6 +217,7 @@ for family in 4 6; do cleanup create_ns + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on run_test "GRO frag list" $BM_NET$DST 1 0 cleanup @@ -215,6 +228,7 @@ for family in 4 6; do # use NAT to circumvent GRO FWD check create_ns ip -n $NS_DST addr add dev veth$DST $BM_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $BM_NET$DST_NAT \ -j DNAT --to-destination $BM_NET$DST @@ -228,13 +242,15 @@ for family in 4 6; do cleanup create_vxlan_pair + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-gro-list on - run_test "GRO frag list over UDP tunnel" $OL_NET$DST 1 1 + run_test "GRO frag list over UDP tunnel" $OL_NET$DST 10 10 cleanup # use NAT to circumvent GRO FWD check create_vxlan_pair ip -n $NS_DST addr add dev $VXDEV$DST $OL_NET$DST_NAT/$SUFFIX + ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on ip netns exec $NS_DST $IPT -t nat -I PREROUTING -d $OL_NET$DST_NAT \ -j DNAT --to-destination $OL_NET$DST @@ -242,13 +258,7 @@ for family in 4 6; do # load arp cache before running the test to reduce the amount of # stray traffic on top of the UDP tunnel ip netns exec $NS_SRC $PING -q -c 1 $OL_NET$DST_NAT >/dev/null - run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 1 1 $OL_NET$DST - cleanup - - create_vxlan_pair - run_bench "UDP tunnel fwd perf" $OL_NET$DST - ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on - run_bench "UDP tunnel GRO fwd perf" $OL_NET$DST + run_test "GRO fwd over UDP tunnel" $OL_NET$DST_NAT 10 10 $OL_NET$DST cleanup done diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 7badaf215de2..85b3baa3f7f3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -34,7 +34,7 @@ #endif #ifndef UDP_MAX_SEGMENTS -#define UDP_MAX_SEGMENTS (1 << 6UL) +#define UDP_MAX_SEGMENTS (1 << 7UL) #endif #define CONST_MTU_TEST 1500 @@ -56,7 +56,6 @@ static bool cfg_do_msgmore; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; -static const char cfg_ifname[] = "lo"; static unsigned short cfg_port = 9000; static char buf[ETH_MAX_MTU]; @@ -69,8 +68,13 @@ struct testcase { int r_len_last; /* recv(): size of last non-mss dgram, if any */ }; -const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; -const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = { + { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */ +}; + +const struct in_addr addr4 = { + __constant_htonl(0x0a000001), /* 10.0.0.1 */ +}; struct testcase testcases_v4[] = { { @@ -274,48 +278,6 @@ struct testcase testcases_v6[] = { } }; -static unsigned int get_device_mtu(int fd, const char *ifname) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFMTU, &ifr)) - error(1, errno, "ioctl get mtu"); - - return ifr.ifr_mtu; -} - -static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_mtu = mtu; - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCSIFMTU, &ifr)) - error(1, errno, "ioctl set mtu"); -} - -static void set_device_mtu(int fd, int mtu) -{ - int val; - - val = get_device_mtu(fd, cfg_ifname); - fprintf(stderr, "device mtu (orig): %u\n", val); - - __set_device_mtu(fd, cfg_ifname, mtu); - val = get_device_mtu(fd, cfg_ifname); - if (val != mtu) - error(1, 0, "unable to set device mtu to %u\n", val); - - fprintf(stderr, "device mtu (test): %u\n", val); -} - static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val; @@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4) return mtu; } -/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ -static void set_route_mtu(int mtu, bool is_ipv4) -{ - struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - struct nlmsghdr *nh; - struct rtattr *rta; - struct rtmsg *rt; - char data[NLMSG_ALIGN(sizeof(*nh)) + - NLMSG_ALIGN(sizeof(*rt)) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + - NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; - int fd, ret, alen, off = 0; - - alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); - - fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd == -1) - error(1, errno, "socket netlink"); - - memset(data, 0, sizeof(data)); - - nh = (void *)data; - nh->nlmsg_type = RTM_NEWROUTE; - nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; - off += NLMSG_ALIGN(sizeof(*nh)); - - rt = (void *)(data + off); - rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; - rt->rtm_table = RT_TABLE_MAIN; - rt->rtm_dst_len = alen << 3; - rt->rtm_protocol = RTPROT_BOOT; - rt->rtm_scope = RT_SCOPE_UNIVERSE; - rt->rtm_type = RTN_UNICAST; - off += NLMSG_ALIGN(sizeof(*rt)); - - rta = (void *)(data + off); - rta->rta_type = RTA_DST; - rta->rta_len = RTA_LENGTH(alen); - if (is_ipv4) - memcpy(RTA_DATA(rta), &addr4, alen); - else - memcpy(RTA_DATA(rta), &addr6, alen); - off += NLMSG_ALIGN(rta->rta_len); - - rta = (void *)(data + off); - rta->rta_type = RTA_OIF; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); - off += NLMSG_ALIGN(rta->rta_len); - - /* MTU is a subtype in a metrics type */ - rta = (void *)(data + off); - rta->rta_type = RTA_METRICS; - rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); - off += NLMSG_ALIGN(rta->rta_len); - - /* now fill MTU subtype. Note that it fits within above rta_len */ - rta = (void *)(((char *) rta) + RTA_LENGTH(0)); - rta->rta_type = RTAX_MTU; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = mtu; - - nh->nlmsg_len = off; - - ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); - if (ret != off) - error(1, errno, "send netlink: %uB != %uB\n", ret, off); - - if (close(fd)) - error(1, errno, "close netlink"); - - fprintf(stderr, "route mtu (test): %u\n", mtu); -} - static bool __send_one(int fd, struct msghdr *msg, int flags) { int ret; @@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) /* Do not fragment these datagrams: only succeed if GSO works */ set_pmtu_discover(fdt, addr->sa_family == AF_INET); - if (cfg_do_connectionless) { - set_device_mtu(fdt, CONST_MTU_TEST); + if (cfg_do_connectionless) run_all(fdt, fdr, addr, alen); - } if (cfg_do_connected) { - set_device_mtu(fdt, CONST_MTU_TEST + 100); - set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); - if (connect(fdt, addr, alen)) error(1, errno, "connect"); diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index fec24f584fe9..6c63178086b0 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -3,27 +3,56 @@ # # Run a series of udpgso regression tests +set -o errexit +set -o nounset + +setup_loopback() { + ip addr add dev lo 10.0.0.1/32 + ip addr add dev lo fd00::1/128 nodad noprefixroute +} + +test_dev_mtu() { + setup_loopback + # Reduce loopback MTU + ip link set dev lo mtu 1500 +} + +test_route_mtu() { + setup_loopback + # Remove default local routes + ip route del local 10.0.0.1/32 table local dev lo + ip route del local fd00::1/128 table local dev lo + # Install local routes with reduced MTU + ip route add local 10.0.0.1/32 table local dev lo mtu 1500 + ip route add local fd00::1/128 table local dev lo mtu 1500 +} + +if [ "$#" -gt 0 ]; then + "$1" + shift 2 # pop "test_*" arg and "--" delimiter + exec "$@" +fi + echo "ipv4 cmsg" -./in_netns.sh ./udpgso -4 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C echo "ipv4 setsockopt" -./in_netns.sh ./udpgso -4 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s echo "ipv6 cmsg" -./in_netns.sh ./udpgso -6 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C echo "ipv6 setsockopt" -./in_netns.sh ./udpgso -6 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s echo "ipv4 connected" -./in_netns.sh ./udpgso -4 -c +./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c -# blocked on 2nd loopback address -# echo "ipv6 connected" -# ./in_netns.sh ./udpgso -6 -c +echo "ipv6 connected" +./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c echo "ipv4 msg_more" -./in_netns.sh ./udpgso -4 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m echo "ipv6 msg_more" -./in_netns.sh ./udpgso -6 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m diff --git a/tools/testing/selftests/net/udpgso_bench_rx.c b/tools/testing/selftests/net/udpgso_bench_rx.c index f35a924d4a30..1cbadd267c96 100644 --- a/tools/testing/selftests/net/udpgso_bench_rx.c +++ b/tools/testing/selftests/net/udpgso_bench_rx.c @@ -375,7 +375,7 @@ static void do_recv(void) do_flush_udp(fd); tnow = gettimeofday_ms(); - if (tnow > treport) { + if (!cfg_expected_pkt_nr && tnow > treport) { if (packets) fprintf(stderr, "%s rx: %6lu MB/s %8lu calls/s\n", diff --git a/tools/testing/selftests/net/unicast_extensions.sh b/tools/testing/selftests/net/unicast_extensions.sh index 2d10ccac898a..f52aa5f7da52 100755 --- a/tools/testing/selftests/net/unicast_extensions.sh +++ b/tools/testing/selftests/net/unicast_extensions.sh @@ -1,4 +1,4 @@ -#!/bin/sh +#!/bin/bash # SPDX-License-Identifier: GPL-2.0 # # By Seth Schoen (c) 2021, for the IPv4 Unicast Extensions Project @@ -28,8 +28,7 @@ # These tests provide an easy way to flip the expected result of any # of these behaviors for testing kernel patches that change them. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh # nettest can be run from PATH or from same directory as this selftest if ! which nettest >/dev/null; then @@ -61,20 +60,20 @@ _do_segmenttest(){ # foo --- bar # Arguments: ip_a ip_b prefix_length test_description # - # Caller must set up foo-ns and bar-ns namespaces + # Caller must set up $foo_ns and $bar_ns namespaces # containing linked veth devices foo and bar, # respectively. - ip -n foo-ns address add $1/$3 dev foo || return 1 - ip -n foo-ns link set foo up || return 1 - ip -n bar-ns address add $2/$3 dev bar || return 1 - ip -n bar-ns link set bar up || return 1 + ip -n $foo_ns address add $1/$3 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $bar_ns address add $2/$3 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 - ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 - nettest -B -N bar-ns -O foo-ns -r $1 || return 1 - nettest -B -N foo-ns -O bar-ns -r $2 || return 1 + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $2 || return 1 return 0 } @@ -88,31 +87,31 @@ _do_route_test(){ # Arguments: foo_ip foo1_ip bar1_ip bar_ip prefix_len test_description # Displays test result and returns success or failure. - # Caller must set up foo-ns, bar-ns, and router-ns + # Caller must set up $foo_ns, $bar_ns, and $router_ns # containing linked veth devices foo-foo1, bar1-bar - # (foo in foo-ns, foo1 and bar1 in router-ns, and - # bar in bar-ns). - - ip -n foo-ns address add $1/$5 dev foo || return 1 - ip -n foo-ns link set foo up || return 1 - ip -n foo-ns route add default via $2 || return 1 - ip -n bar-ns address add $4/$5 dev bar || return 1 - ip -n bar-ns link set bar up || return 1 - ip -n bar-ns route add default via $3 || return 1 - ip -n router-ns address add $2/$5 dev foo1 || return 1 - ip -n router-ns link set foo1 up || return 1 - ip -n router-ns address add $3/$5 dev bar1 || return 1 - ip -n router-ns link set bar1 up || return 1 - - echo 1 | ip netns exec router-ns tee /proc/sys/net/ipv4/ip_forward - - ip netns exec foo-ns timeout 2 ping -c 1 $2 || return 1 - ip netns exec foo-ns timeout 2 ping -c 1 $4 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $3 || return 1 - ip netns exec bar-ns timeout 2 ping -c 1 $1 || return 1 - - nettest -B -N bar-ns -O foo-ns -r $1 || return 1 - nettest -B -N foo-ns -O bar-ns -r $4 || return 1 + # (foo in $foo_ns, foo1 and bar1 in $router_ns, and + # bar in $bar_ns). + + ip -n $foo_ns address add $1/$5 dev foo || return 1 + ip -n $foo_ns link set foo up || return 1 + ip -n $foo_ns route add default via $2 || return 1 + ip -n $bar_ns address add $4/$5 dev bar || return 1 + ip -n $bar_ns link set bar up || return 1 + ip -n $bar_ns route add default via $3 || return 1 + ip -n $router_ns address add $2/$5 dev foo1 || return 1 + ip -n $router_ns link set foo1 up || return 1 + ip -n $router_ns address add $3/$5 dev bar1 || return 1 + ip -n $router_ns link set bar1 up || return 1 + + echo 1 | ip netns exec $router_ns tee /proc/sys/net/ipv4/ip_forward + + ip netns exec $foo_ns timeout 2 ping -c 1 $2 || return 1 + ip netns exec $foo_ns timeout 2 ping -c 1 $4 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $3 || return 1 + ip netns exec $bar_ns timeout 2 ping -c 1 $1 || return 1 + + nettest -B -N $bar_ns -O $foo_ns -r $1 || return 1 + nettest -B -N $foo_ns -O $bar_ns -r $4 || return 1 return 0 } @@ -121,17 +120,15 @@ segmenttest(){ # Sets up veth link and tries to connect over it. # Arguments: ip_a ip_b prefix_len test_description hide_output - ip netns add foo-ns - ip netns add bar-ns - ip link add foo netns foo-ns type veth peer name bar netns bar-ns + setup_ns foo_ns bar_ns + ip link add foo netns $foo_ns type veth peer name bar netns $bar_ns test_result=0 _do_segmenttest "$@" || test_result=1 - ip netns pids foo-ns | xargs -r kill -9 - ip netns pids bar-ns | xargs -r kill -9 - ip netns del foo-ns - ip netns del bar-ns + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns show_output # inverted tests will expect failure instead of success @@ -147,21 +144,17 @@ route_test(){ # Returns success or failure. hide_output - ip netns add foo-ns - ip netns add bar-ns - ip netns add router-ns - ip link add foo netns foo-ns type veth peer name foo1 netns router-ns - ip link add bar netns bar-ns type veth peer name bar1 netns router-ns + setup_ns foo_ns bar_ns router_ns + ip link add foo netns $foo_ns type veth peer name foo1 netns $router_ns + ip link add bar netns $bar_ns type veth peer name bar1 netns $router_ns test_result=0 _do_route_test "$@" || test_result=1 - ip netns pids foo-ns | xargs -r kill -9 - ip netns pids bar-ns | xargs -r kill -9 - ip netns pids router-ns | xargs -r kill -9 - ip netns del foo-ns - ip netns del bar-ns - ip netns del router-ns + ip netns pids $foo_ns | xargs -r kill -9 + ip netns pids $bar_ns | xargs -r kill -9 + ip netns pids $router_ns | xargs -r kill -9 + cleanup_ns $foo_ns $bar_ns $router_ns show_output diff --git a/tools/testing/selftests/net/veth.sh b/tools/testing/selftests/net/veth.sh index 2d073595c620..4f1edbafb946 100755 --- a/tools/testing/selftests/net/veth.sh +++ b/tools/testing/selftests/net/veth.sh @@ -1,7 +1,7 @@ #!/bin/sh # SPDX-License-Identifier: GPL-2.0 -BPF_FILE="../bpf/xdp_dummy.bpf.o" +BPF_FILE="xdp_dummy.bpf.o" readonly STATS="$(mktemp -p /tmp ns-XXXXXX)" readonly BASE=`basename $STATS` readonly SRC=2 @@ -218,7 +218,7 @@ while getopts "hs:" option; do done if [ ! -f ${BPF_FILE} ]; then - echo "Missing ${BPF_FILE}. Build bpf selftest first" + echo "Missing ${BPF_FILE}. Run 'make' first" exit 1 fi @@ -247,6 +247,35 @@ chk_gro " - aggregation with TSO off" 1 cleanup create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +chk_gro_flag "gro vs xdp while down - gro flag off" $DST off +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST off +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST off +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST off +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST off +cleanup + +create_ns +ip -n $NS_DST link set dev veth$DST up +ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp +ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on +chk_gro_flag "gro vs xdp while down - gro flag on" $DST on +ip -n $NS_DST link set dev veth$DST down +chk_gro_flag " - after down" $DST on +ip -n $NS_DST link set dev veth$DST xdp off +chk_gro_flag " - after xdp off" $DST on +ip -n $NS_DST link set dev veth$DST up +chk_gro_flag " - after up" $DST on +ip -n $NS_SRC link set dev veth$SRC xdp object ${BPF_FILE} section xdp +chk_gro_flag " - after peer xdp" $DST on +cleanup + +create_ns chk_channels "default channels" $DST 1 1 ip -n $NS_DST link set dev veth$DST down @@ -313,11 +342,14 @@ if [ $CPUS -gt 2 ]; then fi ip -n $NS_DST link set dev veth$DST xdp object ${BPF_FILE} section xdp 2>/dev/null -chk_gro_flag "with xdp attached - gro flag" $DST on +chk_gro_flag "with xdp attached - gro flag" $DST off chk_gro_flag " - peer gro flag" $SRC off chk_tso_flag " - tso flag" $SRC off chk_tso_flag " - peer tso flag" $DST on ip netns exec $NS_DST ethtool -K veth$DST rx-udp-gro-forwarding on +chk_gro " - no aggregation" 10 +ip netns exec $NS_DST ethtool -K veth$DST generic-receive-offload on +chk_gro_flag " - gro flag with GRO on" $DST on chk_gro " - aggregation" 1 diff --git a/tools/testing/selftests/net/vlan_hw_filter.sh b/tools/testing/selftests/net/vlan_hw_filter.sh new file mode 100755 index 000000000000..7bc804ffaf7c --- /dev/null +++ b/tools/testing/selftests/net/vlan_hw_filter.sh @@ -0,0 +1,29 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0 + +readonly NETNS="ns-$(mktemp -u XXXXXX)" + +ret=0 + +cleanup() { + ip netns del $NETNS +} + +trap cleanup EXIT + +fail() { + echo "ERROR: ${1:-unexpected return code} (ret: $_)" >&2 + ret=1 +} + +ip netns add ${NETNS} +ip netns exec ${NETNS} ip link add bond0 type bond mode 0 +ip netns exec ${NETNS} ip link add bond_slave_1 type veth peer veth2 +ip netns exec ${NETNS} ip link set bond_slave_1 master bond0 +ip netns exec ${NETNS} ethtool -K bond0 rx-vlan-filter off +ip netns exec ${NETNS} ip link add link bond_slave_1 name bond_slave_1.0 type vlan id 0 +ip netns exec ${NETNS} ip link add link bond0 name bond0.0 type vlan id 0 +ip netns exec ${NETNS} ip link set bond_slave_1 nomaster +ip netns exec ${NETNS} ip link del veth2 || fail "Please check vlan HW filter function" + +exit $ret diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh index 452638ae8aed..b64dd891699d 100755 --- a/tools/testing/selftests/net/vrf-xfrm-tests.sh +++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh @@ -3,9 +3,7 @@ # # Various combinations of VRF with xfrms and qdisc. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh PAUSE_ON_FAIL=no VERBOSE=0 ret=0 @@ -67,7 +65,7 @@ run_cmd_host1() printf " COMMAND: $cmd\n" fi - out=$(eval ip netns exec host1 $cmd 2>&1) + out=$(eval ip netns exec $host1 $cmd 2>&1) rc=$? if [ "$VERBOSE" = "1" ]; then if [ -n "$out" ]; then @@ -116,9 +114,6 @@ create_ns() [ -z "${addr}" ] && addr="-" [ -z "${addr6}" ] && addr6="-" - ip netns add ${ns} - - ip -netns ${ns} link set lo up if [ "${addr}" != "-" ]; then ip -netns ${ns} addr add dev lo ${addr} fi @@ -177,25 +172,25 @@ connect_ns() cleanup() { - ip netns del host1 - ip netns del host2 + cleanup_ns $host1 $host2 } setup() { - create_ns "host1" - create_ns "host2" + setup_ns host1 host2 + create_ns "$host1" + create_ns "$host2" - connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ - "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 + connect_ns "$host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \ + "$host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64 - create_vrf "host1" ${VRF} ${TABLE} - ip -netns host1 link set dev eth0 master ${VRF} + create_vrf "$host1" ${VRF} ${TABLE} + ip -netns $host1 link set dev eth0 master ${VRF} } cleanup_xfrm() { - for ns in host1 host2 + for ns in $host1 $host2 do for x in state policy do @@ -218,57 +213,57 @@ setup_xfrm() # # host1 - IPv4 out - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h1_4} dst ${h2_4} ${devarg} dir out \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host2 - IPv4 in - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h1_4} dst ${h2_4} dir in \ tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel # host1 - IPv4 in - ip -netns host1 xfrm policy add \ + ip -netns $host1 xfrm policy add \ src ${h2_4} dst ${h1_4} ${devarg} dir in \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host2 - IPv4 out - ip -netns host2 xfrm policy add \ + ip -netns $host2 xfrm policy add \ src ${h2_4} dst ${h1_4} dir out \ tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel # host1 - IPv6 out - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h1_6} dst ${h2_6} ${devarg} dir out \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host2 - IPv6 in - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h1_6} dst ${h2_6} dir in \ tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel # host1 - IPv6 in - ip -6 -netns host1 xfrm policy add \ + ip -6 -netns $host1 xfrm policy add \ src ${h2_6} dst ${h1_6} ${devarg} dir in \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # host2 - IPv6 out - ip -6 -netns host2 xfrm policy add \ + ip -6 -netns $host2 xfrm policy add \ src ${h2_6} dst ${h1_6} dir out \ tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel # # state # - ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_4} dst ${h2_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ + ip -netns $host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -276,14 +271,14 @@ setup_xfrm() sel src ${h1_4} dst ${h2_4} - ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_4} dst ${h1_4} ${devarg} - ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ + ip -netns $host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -291,14 +286,14 @@ setup_xfrm() sel src ${h2_4} dst ${h1_4} - ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ enc 'cbc(aes)' ${ENC_1} \ sel src ${h1_6} dst ${h2_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \ proto esp spi ${SPI_1} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_1} 96 \ @@ -306,14 +301,14 @@ setup_xfrm() sel src ${h1_6} dst ${h2_6} - ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ enc 'cbc(aes)' ${ENC_2} \ sel src ${h2_6} dst ${h1_6} ${devarg} - ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ + ip -6 -netns $host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \ proto esp spi ${SPI_2} reqid 0 mode tunnel \ replay-window 4 replay-oseq 0x4 \ auth-trunc 'hmac(sha1)' ${AUTH_2} 96 \ @@ -323,22 +318,22 @@ setup_xfrm() cleanup_xfrm_dev() { - ip -netns host1 li del xfrm0 - ip -netns host2 addr del ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr del ${XFRM2_6}/64 dev eth0 + ip -netns $host1 li del xfrm0 + ip -netns $host2 addr del ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr del ${XFRM2_6}/64 dev eth0 } setup_xfrm_dev() { local vrfarg="vrf ${VRF}" - ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID} - ip -netns host1 li set xfrm0 ${vrfarg} up - ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0 - ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0 + ip -netns $host1 li add type xfrm dev eth0 if_id ${IF_ID} + ip -netns $host1 li set xfrm0 ${vrfarg} up + ip -netns $host1 addr add ${XFRM1_4}/24 dev xfrm0 + ip -netns $host1 addr add ${XFRM1_6}/64 dev xfrm0 - ip -netns host2 addr add ${XFRM2_4}/24 dev eth0 - ip -netns host2 addr add ${XFRM2_6}/64 dev eth0 + ip -netns $host2 addr add ${XFRM2_4}/24 dev eth0 + ip -netns $host2 addr add ${XFRM2_6}/64 dev eth0 setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}" } diff --git a/tools/testing/selftests/net/vrf_route_leaking.sh b/tools/testing/selftests/net/vrf_route_leaking.sh index dedc52562b4f..2da32f4c479b 100755 --- a/tools/testing/selftests/net/vrf_route_leaking.sh +++ b/tools/testing/selftests/net/vrf_route_leaking.sh @@ -58,6 +58,7 @@ # to send an ICMP error back to the source when the ttl of a packet reaches 1 # while it is forwarded between different vrfs. +source lib.sh VERBOSE=0 PAUSE_ON_FAIL=no DEFAULT_TTYPE=sym @@ -171,11 +172,7 @@ run_cmd_grep() cleanup() { - local ns - - for ns in h1 h2 r1 r2; do - ip netns del $ns 2>/dev/null - done + cleanup_ns $h1 $h2 $r1 $r2 } setup_vrf() @@ -212,72 +209,69 @@ setup_sym() # # create nodes as namespaces - # - for ns in h1 h2 r1; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r1) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 + for ns in $h1 $h2 $r1; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up # # h1 # - ip -netns h1 addr add dev eth0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 up + ip -netns $h1 addr add dev eth0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev eth0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev eth0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev eth0 # # h2 # - ip -netns h2 addr add dev eth0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 up + ip -netns $h2 addr add dev eth0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev eth0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 up # h2 to h1 via r1 - ip -netns h2 route add default via ${R1_N2_IP} dev eth0 - ip -netns h2 -6 route add default via ${R1_N2_IP6} dev eth0 + ip -netns $h2 route add default via ${R1_N2_IP} dev eth0 + ip -netns $h2 -6 route add default via ${R1_N2_IP6} dev eth0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # Route leak from red to blue - ip -netns r1 route add vrf red ${H1_N1} dev blue - ip -netns r1 -6 route add vrf red ${H1_N1_6} dev blue + ip -netns $r1 route add vrf red ${H1_N1} dev blue + ip -netns $r1 -6 route add vrf red ${H1_N1_6} dev blue # Wait for ip config to settle @@ -293,90 +287,87 @@ setup_asym() # # create nodes as namespaces - # - for ns in h1 h2 r1 r2; do - ip netns add $ns - ip -netns $ns link set lo up - - case "${ns}" in - h[12]) ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 - ;; - r[12]) ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 - ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 - esac + setup_ns h1 h2 r1 r2 + for ns in $h1 $h2 $r1 $r2; do + if echo $ns | grep -q h[12]-; then + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=0 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.keep_addr_on_down=1 + else + ip netns exec $ns sysctl -q -w net.ipv4.ip_forward=1 + ip netns exec $ns sysctl -q -w net.ipv6.conf.all.forwarding=1 + fi done # # create interconnects # - ip -netns h1 link add eth0 type veth peer name r1h1 - ip -netns h1 link set r1h1 netns r1 name eth0 up + ip -netns $h1 link add eth0 type veth peer name r1h1 + ip -netns $h1 link set r1h1 netns $r1 name eth0 up - ip -netns h1 link add eth1 type veth peer name r2h1 - ip -netns h1 link set r2h1 netns r2 name eth0 up + ip -netns $h1 link add eth1 type veth peer name r2h1 + ip -netns $h1 link set r2h1 netns $r2 name eth0 up - ip -netns h2 link add eth0 type veth peer name r1h2 - ip -netns h2 link set r1h2 netns r1 name eth1 up + ip -netns $h2 link add eth0 type veth peer name r1h2 + ip -netns $h2 link set r1h2 netns $r1 name eth1 up - ip -netns h2 link add eth1 type veth peer name r2h2 - ip -netns h2 link set r2h2 netns r2 name eth1 up + ip -netns $h2 link add eth1 type veth peer name r2h2 + ip -netns $h2 link set r2h2 netns $r2 name eth1 up # # h1 # - ip -netns h1 link add br0 type bridge - ip -netns h1 link set br0 up - ip -netns h1 addr add dev br0 ${H1_N1_IP}/24 - ip -netns h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad - ip -netns h1 link set eth0 master br0 up - ip -netns h1 link set eth1 master br0 up + ip -netns $h1 link add br0 type bridge + ip -netns $h1 link set br0 up + ip -netns $h1 addr add dev br0 ${H1_N1_IP}/24 + ip -netns $h1 -6 addr add dev br0 ${H1_N1_IP6}/64 nodad + ip -netns $h1 link set eth0 master br0 up + ip -netns $h1 link set eth1 master br0 up # h1 to h2 via r1 - ip -netns h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 - ip -netns h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 + ip -netns $h1 route add ${H2_N2} via ${R1_N1_IP} dev br0 + ip -netns $h1 -6 route add ${H2_N2_6} via "${R1_N1_IP6}" dev br0 # # h2 # - ip -netns h2 link add br0 type bridge - ip -netns h2 link set br0 up - ip -netns h2 addr add dev br0 ${H2_N2_IP}/24 - ip -netns h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad - ip -netns h2 link set eth0 master br0 up - ip -netns h2 link set eth1 master br0 up + ip -netns $h2 link add br0 type bridge + ip -netns $h2 link set br0 up + ip -netns $h2 addr add dev br0 ${H2_N2_IP}/24 + ip -netns $h2 -6 addr add dev br0 ${H2_N2_IP6}/64 nodad + ip -netns $h2 link set eth0 master br0 up + ip -netns $h2 link set eth1 master br0 up # h2 to h1 via r2 - ip -netns h2 route add default via ${R2_N2_IP} dev br0 - ip -netns h2 -6 route add default via ${R2_N2_IP6} dev br0 + ip -netns $h2 route add default via ${R2_N2_IP} dev br0 + ip -netns $h2 -6 route add default via ${R2_N2_IP6} dev br0 # # r1 # - setup_vrf r1 - create_vrf r1 blue 1101 - create_vrf r1 red 1102 - ip -netns r1 link set mtu 1400 dev eth1 - ip -netns r1 link set eth0 vrf blue up - ip -netns r1 link set eth1 vrf red up - ip -netns r1 addr add dev eth0 ${R1_N1_IP}/24 - ip -netns r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad - ip -netns r1 addr add dev eth1 ${R1_N2_IP}/24 - ip -netns r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad + setup_vrf $r1 + create_vrf $r1 blue 1101 + create_vrf $r1 red 1102 + ip -netns $r1 link set mtu 1400 dev eth1 + ip -netns $r1 link set eth0 vrf blue up + ip -netns $r1 link set eth1 vrf red up + ip -netns $r1 addr add dev eth0 ${R1_N1_IP}/24 + ip -netns $r1 -6 addr add dev eth0 ${R1_N1_IP6}/64 nodad + ip -netns $r1 addr add dev eth1 ${R1_N2_IP}/24 + ip -netns $r1 -6 addr add dev eth1 ${R1_N2_IP6}/64 nodad # Route leak from blue to red - ip -netns r1 route add vrf blue ${H2_N2} dev red - ip -netns r1 -6 route add vrf blue ${H2_N2_6} dev red + ip -netns $r1 route add vrf blue ${H2_N2} dev red + ip -netns $r1 -6 route add vrf blue ${H2_N2_6} dev red # No route leak from red to blue # # r2 # - ip -netns r2 addr add dev eth0 ${R2_N1_IP}/24 - ip -netns r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad - ip -netns r2 addr add dev eth1 ${R2_N2_IP}/24 - ip -netns r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad + ip -netns $r2 addr add dev eth0 ${R2_N1_IP}/24 + ip -netns $r2 -6 addr add dev eth0 ${R2_N1_IP6}/64 nodad + ip -netns $r2 addr add dev eth1 ${R2_N2_IP}/24 + ip -netns $r2 -6 addr add dev eth1 ${R2_N2_IP6}/64 nodad # Wait for ip config to settle sleep 2 @@ -384,14 +375,14 @@ setup_asym() check_connectivity() { - ip netns exec h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 + ip netns exec $h1 ping -c1 -w1 ${H2_N2_IP} >/dev/null 2>&1 log_test $? 0 "Basic IPv4 connectivity" return $? } check_connectivity6() { - ip netns exec h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 + ip netns exec $h1 "${ping6}" -c1 -w1 ${H2_N2_IP6} >/dev/null 2>&1 log_test $? 0 "Basic IPv6 connectivity" return $? } @@ -426,7 +417,7 @@ ipv4_traceroute() check_connectivity || return - run_cmd_grep "${R1_N1_IP}" ip netns exec h1 traceroute ${H2_N2_IP} + run_cmd_grep "${R1_N1_IP}" ip netns exec $h1 traceroute ${H2_N2_IP} log_test $? 0 "Traceroute reports a hop on r1" } @@ -449,7 +440,7 @@ ipv6_traceroute() check_connectivity6 || return - run_cmd_grep "${R1_N1_IP6}" ip netns exec h1 traceroute6 ${H2_N2_IP6} + run_cmd_grep "${R1_N1_IP6}" ip netns exec $h1 traceroute6 ${H2_N2_IP6} log_test $? 0 "Traceroute6 reports a hop on r1" } @@ -470,7 +461,7 @@ ipv4_ping_ttl() check_connectivity || return - run_cmd_grep "Time to live exceeded" ip netns exec h1 ping -t1 -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Time to live exceeded" ip netns exec $h1 ping -t1 -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP ttl exceeded" } @@ -491,7 +482,7 @@ ipv4_ping_frag() check_connectivity || return - run_cmd_grep "Frag needed" ip netns exec h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} + run_cmd_grep "Frag needed" ip netns exec $h1 ping -s 1450 -Mdo -c1 -W2 ${H2_N2_IP} log_test $? 0 "Ping received ICMP Frag needed" } @@ -512,7 +503,7 @@ ipv6_ping_ttl() check_connectivity6 || return - run_cmd_grep "Time exceeded: Hop limit" ip netns exec h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Time exceeded: Hop limit" ip netns exec $h1 "${ping6}" -t1 -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Hop limit" } @@ -533,7 +524,7 @@ ipv6_ping_frag() check_connectivity6 || return - run_cmd_grep "Packet too big" ip netns exec h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} + run_cmd_grep "Packet too big" ip netns exec $h1 "${ping6}" -s 1450 -Mdo -c1 -W2 ${H2_N2_IP6} log_test $? 0 "Ping received ICMP Packet too big" } diff --git a/tools/testing/selftests/net/vrf_strict_mode_test.sh b/tools/testing/selftests/net/vrf_strict_mode_test.sh index 417d214264f3..01552b542544 100755 --- a/tools/testing/selftests/net/vrf_strict_mode_test.sh +++ b/tools/testing/selftests/net/vrf_strict_mode_test.sh @@ -3,9 +3,7 @@ # This test is designed for testing the new VRF strict_mode functionality. -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 - +source lib.sh ret=0 # identifies the "init" network namespace which is often called root network @@ -247,13 +245,12 @@ setup() { modprobe vrf - ip netns add testns - ip netns exec testns ip link set lo up + setup_ns testns } cleanup() { - ip netns del testns 2>/dev/null + ip netns del $testns 2>/dev/null ip link del vrf100 2>/dev/null ip link del vrf101 2>/dev/null @@ -298,28 +295,28 @@ vrf_strict_mode_tests_testns() { log_section "VRF strict_mode test on testns network namespace" - vrf_strict_mode_check_support testns + vrf_strict_mode_check_support $testns - strict_mode_check_default testns + strict_mode_check_default $testns - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns - add_vrf_and_check testns vrf100 100 - config_vrf_and_check testns 10.0.100.1/24 vrf100 + add_vrf_and_check $testns vrf100 100 + config_vrf_and_check $testns 10.0.100.1/24 vrf100 - add_vrf_and_check_fail testns vrf101 100 + add_vrf_and_check_fail $testns vrf101 100 - add_vrf_and_check_fail testns vrf102 100 + add_vrf_and_check_fail $testns vrf102 100 - add_vrf_and_check testns vrf200 200 + add_vrf_and_check $testns vrf200 200 - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns - add_vrf_and_check testns vrf101 100 + add_vrf_and_check $testns vrf101 100 - add_vrf_and_check testns vrf102 100 + add_vrf_and_check $testns vrf102 100 - #the strict_mode is disabled in the testns + #the strict_mode is disabled in the $testns } vrf_strict_mode_tests_mix() @@ -328,25 +325,25 @@ vrf_strict_mode_tests_mix() read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 - del_vrf_and_check testns vrf101 + del_vrf_and_check $testns vrf101 - del_vrf_and_check testns vrf102 + del_vrf_and_check $testns vrf102 disable_strict_mode_and_check init - enable_strict_mode_and_check testns + enable_strict_mode_and_check $testns enable_strict_mode_and_check init enable_strict_mode_and_check init - disable_strict_mode_and_check testns - disable_strict_mode_and_check testns + disable_strict_mode_and_check $testns + disable_strict_mode_and_check $testns read_strict_mode_compare_and_check init 1 - read_strict_mode_compare_and_check testns 0 + read_strict_mode_compare_and_check $testns 0 } ################################################################################ diff --git a/tools/testing/selftests/net/xdp_dummy.bpf.c b/tools/testing/selftests/net/xdp_dummy.bpf.c new file mode 100644 index 000000000000..d988b2e0cee8 --- /dev/null +++ b/tools/testing/selftests/net/xdp_dummy.bpf.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 + +#define KBUILD_MODNAME "xdp_dummy" +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("xdp") +int xdp_dummy_prog(struct xdp_md *ctx) +{ + return XDP_PASS; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/net/xfrm_policy.sh b/tools/testing/selftests/net/xfrm_policy.sh index bdf450eaf60c..457789530645 100755 --- a/tools/testing/selftests/net/xfrm_policy.sh +++ b/tools/testing/selftests/net/xfrm_policy.sh @@ -18,8 +18,7 @@ # ns1: ping 10.0.2.254: does NOT pass via ipsec tunnel (exception) # ns2: ping 10.0.1.254: does NOT pass via ipsec tunnel (exception) -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source lib.sh ret=0 policy_checks_ok=1 @@ -204,24 +203,24 @@ check_xfrm() { ip=$2 local lret=0 - ip netns exec ns1 ping -q -c 1 10.0.2.$ip > /dev/null + ip netns exec ${ns[1]} ping -q -c 1 10.0.2.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi - ip netns exec ns2 ping -q -c 1 10.0.1.$ip > /dev/null + ip netns exec ${ns[2]} ping -q -c 1 10.0.1.$ip > /dev/null - check_ipt_policy_count ns3 + check_ipt_policy_count ${ns[3]} if [ $? -ne $rval ] ; then lret=1 fi - check_ipt_policy_count ns4 + check_ipt_policy_count ${ns[4]} if [ $? -ne $rval ] ; then lret=1 fi @@ -270,11 +269,11 @@ check_hthresh_repeat() i=0 for i in $(seq 1 10);do - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::0014:0000:0001 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break - ip -net ns1 xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break - ip -net ns1 xfrm policy set hthresh6 0 28 || break + ip -net ${ns[1]} xfrm policy update src e000:0001::0000 dst ff01::01 dir in tmpl src :: dst :: proto esp mode tunnel priority 100 action allow || break + ip -net ${ns[1]} xfrm policy set hthresh6 0 28 || break done if [ $i -ne 10 ] ;then @@ -347,79 +346,80 @@ if [ $? -ne 0 ];then exit $ksft_skip fi -for i in 1 2 3 4; do - ip netns add ns$i - ip -net ns$i link set lo up -done +setup_ns ns1 ns2 ns3 ns4 +ns[1]=$ns1 +ns[2]=$ns2 +ns[3]=$ns3 +ns[4]=$ns4 DEV=veth0 -ip link add $DEV netns ns1 type veth peer name eth1 netns ns3 -ip link add $DEV netns ns2 type veth peer name eth1 netns ns4 +ip link add $DEV netns ${ns[1]} type veth peer name eth1 netns ${ns[3]} +ip link add $DEV netns ${ns[2]} type veth peer name eth1 netns ${ns[4]} -ip link add $DEV netns ns3 type veth peer name veth0 netns ns4 +ip link add $DEV netns ${ns[3]} type veth peer name veth0 netns ${ns[4]} DEV=veth0 for i in 1 2; do - ip -net ns$i link set $DEV up - ip -net ns$i addr add 10.0.$i.2/24 dev $DEV - ip -net ns$i addr add dead:$i::2/64 dev $DEV - - ip -net ns$i addr add 10.0.$i.253 dev $DEV - ip -net ns$i addr add 10.0.$i.254 dev $DEV - ip -net ns$i addr add dead:$i::fd dev $DEV - ip -net ns$i addr add dead:$i::fe dev $DEV + ip -net ${ns[$i]} link set $DEV up + ip -net ${ns[$i]} addr add 10.0.$i.2/24 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::2/64 dev $DEV + + ip -net ${ns[$i]} addr add 10.0.$i.253 dev $DEV + ip -net ${ns[$i]} addr add 10.0.$i.254 dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fd dev $DEV + ip -net ${ns[$i]} addr add dead:$i::fe dev $DEV done for i in 3 4; do -ip -net ns$i link set eth1 up -ip -net ns$i link set veth0 up + ip -net ${ns[$i]} link set eth1 up + ip -net ${ns[$i]} link set veth0 up done -ip -net ns1 route add default via 10.0.1.1 -ip -net ns2 route add default via 10.0.2.1 +ip -net ${ns[1]} route add default via 10.0.1.1 +ip -net ${ns[2]} route add default via 10.0.2.1 -ip -net ns3 addr add 10.0.1.1/24 dev eth1 -ip -net ns3 addr add 10.0.3.1/24 dev veth0 -ip -net ns3 addr add 2001:1::1/64 dev eth1 -ip -net ns3 addr add 2001:3::1/64 dev veth0 +ip -net ${ns[3]} addr add 10.0.1.1/24 dev eth1 +ip -net ${ns[3]} addr add 10.0.3.1/24 dev veth0 +ip -net ${ns[3]} addr add 2001:1::1/64 dev eth1 +ip -net ${ns[3]} addr add 2001:3::1/64 dev veth0 -ip -net ns3 route add default via 10.0.3.10 +ip -net ${ns[3]} route add default via 10.0.3.10 -ip -net ns4 addr add 10.0.2.1/24 dev eth1 -ip -net ns4 addr add 10.0.3.10/24 dev veth0 -ip -net ns4 addr add 2001:2::1/64 dev eth1 -ip -net ns4 addr add 2001:3::10/64 dev veth0 -ip -net ns4 route add default via 10.0.3.1 +ip -net ${ns[4]} addr add 10.0.2.1/24 dev eth1 +ip -net ${ns[4]} addr add 10.0.3.10/24 dev veth0 +ip -net ${ns[4]} addr add 2001:2::1/64 dev eth1 +ip -net ${ns[4]} addr add 2001:3::10/64 dev veth0 +ip -net ${ns[4]} route add default via 10.0.3.1 for j in 4 6; do for i in 3 4;do - ip netns exec ns$i sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null - ip netns exec ns$i sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.eth1.forwarding=1 > /dev/null + ip netns exec ${ns[$i]} sysctl net.ipv$j.conf.veth0.forwarding=1 > /dev/null done done # abuse iptables rule counter to check if ping matches a policy -ip netns exec ns3 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec -ip netns exec ns4 iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[3]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec +ip netns exec ${ns[4]} iptables -p icmp -A FORWARD -m policy --dir out --pol ipsec if [ $? -ne 0 ];then echo "SKIP: Could not insert iptables rule" - for i in 1 2 3 4;do ip netns del ns$i;done + cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ksft_skip fi # localip remoteip localnet remotenet -do_esp ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 -do_esp ns3 dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 -do_esp ns4 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 -do_esp ns4 dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 +do_esp ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 $SPI1 $SPI2 +do_esp ${ns[3]} dead:3::1 dead:3::10 dead:1::/64 dead:2::/64 $SPI1 $SPI2 +do_esp ${ns[4]} 10.0.3.10 10.0.3.1 10.0.2.0/24 10.0.1.0/24 $SPI2 $SPI1 +do_esp ${ns[4]} dead:3::10 dead:3::1 dead:2::/64 dead:1::/64 $SPI2 $SPI1 -do_dummies4 ns3 -do_dummies6 ns4 +do_dummies4 ${ns[3]} +do_dummies6 ${ns[4]} -do_esp_policy_get_check ns3 10.0.1.0/24 10.0.2.0/24 -do_esp_policy_get_check ns4 10.0.2.0/24 10.0.1.0/24 -do_esp_policy_get_check ns3 dead:1::/64 dead:2::/64 -do_esp_policy_get_check ns4 dead:2::/64 dead:1::/64 +do_esp_policy_get_check ${ns[3]} 10.0.1.0/24 10.0.2.0/24 +do_esp_policy_get_check ${ns[4]} 10.0.2.0/24 10.0.1.0/24 +do_esp_policy_get_check ${ns[3]} dead:1::/64 dead:2::/64 +do_esp_policy_get_check ${ns[4]} dead:2::/64 dead:1::/64 # ping to .254 should use ipsec, exception is not installed. check_xfrm 1 254 @@ -432,11 +432,11 @@ fi # installs exceptions # localip remoteip encryptdst plaindst -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 -do_exception ns4 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_exception ${ns[4]} 10.0.3.10 10.0.3.1 10.0.1.253 10.0.1.240/28 -do_exception ns3 dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 -do_exception ns4 dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 +do_exception ${ns[3]} dead:3::1 dead:3::10 dead:2::fd dead:2:f0::/96 +do_exception ${ns[4]} dead:3::10 dead:3::1 dead:1::fd dead:1:f0::/96 check_exceptions "exceptions" if [ $? -ne 0 ]; then @@ -444,14 +444,14 @@ if [ $? -ne 0 ]; then fi # insert block policies with adjacent/overlapping netmasks -do_overlap ns3 +do_overlap ${ns[3]} check_exceptions "exceptions and block policies" if [ $? -ne 0 ]; then ret=1 fi -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 28 24 hthresh6 126 125 sleep $((RANDOM%5)) done @@ -459,19 +459,19 @@ done check_exceptions "exceptions and block policies after hresh changes" # full flush of policy db, check everything gets freed incl. internal meta data -ip -net ns3 xfrm policy flush +ip -net ${ns[3]} xfrm policy flush -do_esp_policy ns3 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 -do_exception ns3 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 +do_esp_policy ${ns[3]} 10.0.3.1 10.0.3.10 10.0.1.0/24 10.0.2.0/24 +do_exception ${ns[3]} 10.0.3.1 10.0.3.10 10.0.2.253 10.0.2.240/28 # move inexact policies to hash table -ip -net ns3 xfrm policy set hthresh4 16 16 +ip -net ${ns[3]} xfrm policy set hthresh4 16 16 sleep $((RANDOM%5)) check_exceptions "exceptions and block policies after hthresh change in ns3" # restore original hthresh settings -- move policies back to tables -for n in ns3 ns4;do +for n in ${ns[3]} ${ns[4]};do ip -net $n xfrm policy set hthresh4 32 32 hthresh6 128 128 sleep $((RANDOM%5)) done @@ -479,8 +479,8 @@ check_exceptions "exceptions and block policies after htresh change to normal" check_hthresh_repeat "policies with repeated htresh change" -check_random_order ns3 "policies inserted in random order" +check_random_order ${ns[3]} "policies inserted in random order" -for i in 1 2 3 4;do ip netns del ns$i;done +cleanup_ns $ns1 $ns2 $ns3 $ns4 exit $ret |