From 12b6c3a0380a220edb03691689167be13faa4f45 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:21 -0700 Subject: net: page_pool: support error injection Because of caching / recycling using the general page allocation failures to induce errors in page pool allocation is very hard. Add direct error injection support to page_pool_alloc_pages(). Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-2-kuba@kernel.org Signed-off-by: Jakub Kicinski --- net/core/page_pool.c | 2 ++ 1 file changed, 2 insertions(+) diff --git a/net/core/page_pool.c b/net/core/page_pool.c index 273c24429bce..8bcc7014a61a 100644 --- a/net/core/page_pool.c +++ b/net/core/page_pool.c @@ -5,6 +5,7 @@ * Copyright (C) 2016 Red Hat, Inc. */ +#include #include #include #include @@ -550,6 +551,7 @@ struct page *page_pool_alloc_pages(struct page_pool *pool, gfp_t gfp) return page; } EXPORT_SYMBOL(page_pool_alloc_pages); +ALLOW_ERROR_INJECTION(page_pool_alloc_pages, NULL); /* Calculate distance between two u32 values, valid if distance is below 2^(31) * https://en.wikipedia.org/wiki/Serial_number_arithmetic#General_Solution -- cgit From ff4b2bfa63bd07cca35f6e704dc5035650595950 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:22 -0700 Subject: selftests: drv-net-hw: support using Python from net hw tests We created a separate directory for HW-only tests, recently. Glue in the Python test library there, Python is a bit annoying when it comes to using library code located "lower" in the directory structure. Reuse the Env class, but let tests require non-nsim setup. Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-3-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/Makefile | 2 +- tools/testing/selftests/drivers/net/hw/Makefile | 1 + .../testing/selftests/drivers/net/hw/lib/py/__init__.py | 16 ++++++++++++++++ tools/testing/selftests/drivers/net/lib/py/env.py | 10 ++++++++-- 4 files changed, 26 insertions(+), 3 deletions(-) create mode 100644 tools/testing/selftests/drivers/net/hw/lib/py/__init__.py diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 2c940e9c4ced..9039f3709aff 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -119,7 +119,7 @@ TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug # Networking tests want the net/lib target, include it automatically -ifneq ($(filter net drivers/net,$(TARGETS)),) +ifneq ($(filter net drivers/net drivers/net/hw,$(TARGETS)),) ifeq ($(filter net/lib,$(TARGETS)),) INSTALL_DEP_TARGETS := net/lib endif diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 2259a39a70ed..95f32158b095 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -16,6 +16,7 @@ TEST_FILES := \ # TEST_INCLUDES := \ + $(wildcard lib/py/*.py ../lib/py/*.py) \ ../../../net/lib.sh \ ../../../net/forwarding/lib.sh \ ../../../net/forwarding/ipip_lib.sh \ diff --git a/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py new file mode 100644 index 000000000000..b582885786f5 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/lib/py/__init__.py @@ -0,0 +1,16 @@ +# SPDX-License-Identifier: GPL-2.0 + +import sys +from pathlib import Path + +KSFT_DIR = (Path(__file__).parent / "../../../../..").resolve() + +try: + sys.path.append(KSFT_DIR.as_posix()) + from net.lib.py import * + from drivers.net.lib.py import * +except ModuleNotFoundError as e: + ksft_pr("Failed importing `net` library from kernel sources") + ksft_pr(str(e)) + ktap_result(True, comment="SKIP") + sys.exit(4) diff --git a/tools/testing/selftests/drivers/net/lib/py/env.py b/tools/testing/selftests/drivers/net/lib/py/env.py index e2ab637e56dc..5c8f695b2536 100644 --- a/tools/testing/selftests/drivers/net/lib/py/env.py +++ b/tools/testing/selftests/drivers/net/lib/py/env.py @@ -2,7 +2,7 @@ import os from pathlib import Path -from lib.py import KsftSkipEx +from lib.py import KsftSkipEx, KsftXfailEx from lib.py import cmd, ip from lib.py import NetNS, NetdevSimDev from .remote import Remote @@ -76,7 +76,7 @@ class NetDrvEpEnv: nsim_v4_pfx = "192.0.2." nsim_v6_pfx = "2001:db8::" - def __init__(self, src_path): + def __init__(self, src_path, nsim_test=None): self.env = _load_env_file(src_path) @@ -88,7 +88,10 @@ class NetDrvEpEnv: self._ns_peer = None if "NETIF" in self.env: + if nsim_test is True: + raise KsftXfailEx("Test only works on netdevsim") self._check_env() + self.dev = ip("link show dev " + self.env['NETIF'], json=True)[0] self.v4 = self.env.get("LOCAL_V4") @@ -98,6 +101,9 @@ class NetDrvEpEnv: kind = self.env["REMOTE_TYPE"] args = self.env["REMOTE_ARGS"] else: + if nsim_test is False: + raise KsftXfailEx("Test does not work on netdevsim") + self.create_local() self.dev = self._ns.nsims[0].dev -- cgit From 32a4ca1361d7a51e5003d4af4dfbf570f1b5fd00 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:23 -0700 Subject: selftests: net: py: extract tool logic The main use of the ip() wrapper over cmd() is that it can parse JSON. cmd("ip -j link show") will return stdout as a string, and test has to call json.loads(). With ip("link show", json=True) the return value will be already parsed. More tools (ethtool, bpftool etc.) support the --json switch. To avoid having to wrap all of them individually create a tool() helper. Switch from -j to --json (for ethtool). While at it consume the netns attribute at the ip() level. Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-4-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index d3715e6c21f2..4930a90a64ea 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -56,10 +56,10 @@ class bkg(cmd): return self.process(terminate=self.terminate) -def ip(args, json=None, ns=None, host=None): - cmd_str = "ip " +def tool(name, args, json=None, ns=None, host=None): + cmd_str = name + ' ' if json: - cmd_str += '-j ' + cmd_str += '--json ' cmd_str += args cmd_obj = cmd(cmd_str, ns=ns, host=host) if json: @@ -67,6 +67,12 @@ def ip(args, json=None, ns=None, host=None): return cmd_obj +def ip(args, json=None, ns=None, host=None): + if ns: + args = f'-netns {ns} ' + args + return tool('ip', args, json=json, host=host) + + def rand_port(): """ Get unprivileged port, for now just random, one day we may decide to check if used. -- cgit From ee2512d6bf4168998f3c218fb33ed50544e69e3c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:24 -0700 Subject: selftests: net: py: avoid all ports < 10k When picking TCP ports to use, avoid all below 10k. This should lower the chance of collision or running afoul whatever random policies may be on the host. Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-5-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/net/lib/py/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/testing/selftests/net/lib/py/utils.py b/tools/testing/selftests/net/lib/py/utils.py index 4930a90a64ea..b57d467afd0f 100644 --- a/tools/testing/selftests/net/lib/py/utils.py +++ b/tools/testing/selftests/net/lib/py/utils.py @@ -77,7 +77,7 @@ def rand_port(): """ Get unprivileged port, for now just random, one day we may decide to check if used. """ - return random.randint(1024, 65535) + return random.randint(10000, 65535) def wait_port_listen(port, proto="tcp", ns=None, host=None, sleep=0.005, deadline=5): -- cgit From 0f0cdf312ecc06e63fbba95caf2844e2c405b076 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:25 -0700 Subject: selftests: drv-net: support generating iperf3 load While we are not very interested in testing performance it's useful to be able to generate a lot of traffic. iperf is the simplest way of getting relatively high PPS. Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-6-kuba@kernel.org Signed-off-by: Jakub Kicinski --- .../selftests/drivers/net/lib/py/__init__.py | 1 + tools/testing/selftests/drivers/net/lib/py/load.py | 41 ++++++++++++++++++++++ 2 files changed, 42 insertions(+) create mode 100644 tools/testing/selftests/drivers/net/lib/py/load.py diff --git a/tools/testing/selftests/drivers/net/lib/py/__init__.py b/tools/testing/selftests/drivers/net/lib/py/__init__.py index 4789c1a4282d..401e70f7f136 100644 --- a/tools/testing/selftests/drivers/net/lib/py/__init__.py +++ b/tools/testing/selftests/drivers/net/lib/py/__init__.py @@ -15,4 +15,5 @@ except ModuleNotFoundError as e: sys.exit(4) from .env import * +from .load import * from .remote import Remote diff --git a/tools/testing/selftests/drivers/net/lib/py/load.py b/tools/testing/selftests/drivers/net/lib/py/load.py new file mode 100644 index 000000000000..abdb677bdb1c --- /dev/null +++ b/tools/testing/selftests/drivers/net/lib/py/load.py @@ -0,0 +1,41 @@ +# SPDX-License-Identifier: GPL-2.0 + +import time + +from lib.py import ksft_pr, cmd, ip, rand_port, wait_port_listen + +class GenerateTraffic: + def __init__(self, env): + env.require_cmd("iperf3", remote=True) + + self.env = env + + port = rand_port() + self._iperf_server = cmd(f"iperf3 -s -p {port}", background=True) + wait_port_listen(port) + time.sleep(0.1) + self._iperf_client = cmd(f"iperf3 -c {env.addr} -P 16 -p {port} -t 86400", + background=True, host=env.remote) + + # Wait for traffic to ramp up + pkt = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + for _ in range(50): + time.sleep(0.1) + now = ip("-s link show dev " + env.ifname, json=True)[0]["stats64"]["rx"]["packets"] + if now - pkt > 1000: + return + pkt = now + self.stop(verbose=True) + raise Exception("iperf3 traffic did not ramp up") + + def stop(self, verbose=None): + self._iperf_client.process(terminate=True) + if verbose: + ksft_pr(">> Client:") + ksft_pr(self._iperf_client.stdout) + ksft_pr(self._iperf_client.stderr) + self._iperf_server.process(terminate=True) + if verbose: + ksft_pr(">> Server:") + ksft_pr(self._iperf_server.stdout) + ksft_pr(self._iperf_server.stderr) -- cgit From 9da271f825e42156058a2eb09360bc993853bbba Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 29 Apr 2024 07:44:26 -0700 Subject: selftests: drv-net-hw: add test for memory allocation failures with page pool Bugs in memory allocation failure paths are quite common. Add a test exercising those paths based on qstat and page pool failure hook. Running on bnxt: # ./drivers/net/hw/pp_alloc_fail.py KTAP version 1 1..1 # ethtool -G change retval: success ok 1 pp_alloc_fail.test_pp_alloc # Totals: pass:1 fail:0 xfail:0 xpass:0 skip:0 error:0 I initially wrote this test to validate commit be43b7489a3c ("net/mlx5e: RX, Fix page_pool allocation failure recovery for striding rq") but mlx5 still doesn't have qstat. So I run it on bnxt, and while bnxt survives I found the problem fixed in commit 730117730709 ("eth: bnxt: fix counting packets discarded due to OOM and netpoll"). Reviewed-by: Willem de Bruijn Link: https://lore.kernel.org/r/20240429144426.743476-7-kuba@kernel.org Signed-off-by: Jakub Kicinski --- tools/testing/selftests/drivers/net/hw/Makefile | 1 + .../selftests/drivers/net/hw/pp_alloc_fail.py | 129 +++++++++++++++++++++ tools/testing/selftests/net/lib/py/ksft.py | 4 + 3 files changed, 134 insertions(+) create mode 100755 tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py diff --git a/tools/testing/selftests/drivers/net/hw/Makefile b/tools/testing/selftests/drivers/net/hw/Makefile index 95f32158b095..1dd732855d76 100644 --- a/tools/testing/selftests/drivers/net/hw/Makefile +++ b/tools/testing/selftests/drivers/net/hw/Makefile @@ -9,6 +9,7 @@ TEST_PROGS = \ hw_stats_l3.sh \ hw_stats_l3_gre.sh \ loopback.sh \ + pp_alloc_fail.py \ # TEST_FILES := \ diff --git a/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py new file mode 100755 index 000000000000..026d98976c35 --- /dev/null +++ b/tools/testing/selftests/drivers/net/hw/pp_alloc_fail.py @@ -0,0 +1,129 @@ +#!/usr/bin/env python3 +# SPDX-License-Identifier: GPL-2.0 + +import time +import os +from lib.py import ksft_run, ksft_exit, ksft_pr +from lib.py import KsftSkipEx, KsftFailEx +from lib.py import NetdevFamily, NlError +from lib.py import NetDrvEpEnv +from lib.py import cmd, tool, GenerateTraffic + + +def _write_fail_config(config): + for key, value in config.items(): + with open("/sys/kernel/debug/fail_function/" + key, "w") as fp: + fp.write(str(value) + "\n") + + +def _enable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + raise KsftSkipEx("Kernel built without function error injection (or DebugFS)") + + if not os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + with open("/sys/kernel/debug/fail_function/inject", "w") as fp: + fp.write("page_pool_alloc_pages\n") + + _write_fail_config({ + "verbose": 0, + "interval": 511, + "probability": 100, + "times": -1, + }) + + +def _disable_pp_allocation_fail(): + if not os.path.exists("/sys/kernel/debug/fail_function"): + return + + if os.path.exists("/sys/kernel/debug/fail_function/page_pool_alloc_pages"): + with open("/sys/kernel/debug/fail_function/inject", "w") as fp: + fp.write("\n") + + _write_fail_config({ + "probability": 0, + "times": 0, + }) + + +def test_pp_alloc(cfg, netdevnl): + def get_stats(): + return netdevnl.qstats_get({"ifindex": cfg.ifindex}, dump=True)[0] + + def check_traffic_flowing(): + stat1 = get_stats() + time.sleep(1) + stat2 = get_stats() + if stat2['rx-packets'] - stat1['rx-packets'] < 15000: + raise KsftFailEx("Traffic seems low:", stat2['rx-packets'] - stat1['rx-packets']) + + + try: + stats = get_stats() + except NlError as e: + if e.nl_msg.error == -95: + stats = {} + else: + raise + if 'rx-alloc-fail' not in stats: + raise KsftSkipEx("Driver does not report 'rx-alloc-fail' via qstats") + + set_g = False + traffic = None + try: + traffic = GenerateTraffic(cfg) + + check_traffic_flowing() + + _enable_pp_allocation_fail() + + s1 = get_stats() + time.sleep(3) + s2 = get_stats() + + if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 1: + raise KsftSkipEx("Allocation failures not increasing") + if s2['rx-alloc-fail'] - s1['rx-alloc-fail'] < 100: + raise KsftSkipEx("Allocation increasing too slowly", s2['rx-alloc-fail'] - s1['rx-alloc-fail'], + "packets:", s2['rx-packets'] - s1['rx-packets']) + + # Basic failures are fine, try to wobble some settings to catch extra failures + check_traffic_flowing() + g = tool("ethtool", "-g " + cfg.ifname, json=True)[0] + if 'rx' in g and g["rx"] * 2 <= g["rx-max"]: + new_g = g['rx'] * 2 + elif 'rx' in g: + new_g = g['rx'] // 2 + else: + new_g = None + + if new_g: + set_g = cmd(f"ethtool -G {cfg.ifname} rx {new_g}", fail=False).ret == 0 + if set_g: + ksft_pr("ethtool -G change retval: success") + else: + ksft_pr("ethtool -G change retval: did not succeed", new_g) + else: + ksft_pr("ethtool -G change retval: did not try") + + time.sleep(0.1) + check_traffic_flowing() + finally: + _disable_pp_allocation_fail() + if traffic: + traffic.stop() + time.sleep(0.1) + if set_g: + cmd(f"ethtool -G {cfg.ifname} rx {g['rx']}") + + +def main() -> None: + netdevnl = NetdevFamily() + with NetDrvEpEnv(__file__, nsim_test=False) as cfg: + + ksft_run([test_pp_alloc], args=(cfg, netdevnl, )) + ksft_exit() + + +if __name__ == "__main__": + main() diff --git a/tools/testing/selftests/net/lib/py/ksft.py b/tools/testing/selftests/net/lib/py/ksft.py index f84e9fdd0032..4769b4eb1ea1 100644 --- a/tools/testing/selftests/net/lib/py/ksft.py +++ b/tools/testing/selftests/net/lib/py/ksft.py @@ -11,6 +11,10 @@ KSFT_RESULT = None KSFT_RESULT_ALL = True +class KsftFailEx(Exception): + pass + + class KsftSkipEx(Exception): pass -- cgit