From 8f0ec8c681755f523cf842bfe350ea40609b83a9 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:02 -0700 Subject: bpf: xfrm: Add bpf_xdp_get_xfrm_state() kfunc This commit adds an unstable kfunc helper to access internal xfrm_state associated with an SA. This is intended to be used for the upcoming IPsec pcpu work to assign special pcpu SAs to a particular CPU. In other words: for custom software RSS. That being said, the function that this kfunc wraps is fairly generic and used for a lot of xfrm tasks. I'm sure people will find uses elsewhere over time. This commit also adds a corresponding bpf_xdp_xfrm_state_release() kfunc to release the refcnt acquired by bpf_xdp_get_xfrm_state(). The verifier will require that all acquired xfrm_state's are released. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Acked-by: Steffen Klassert Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/a29699c42f5fad456b875c98dd11c6afc3ffb707.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- include/net/xfrm.h | 9 ++++ net/xfrm/Makefile | 1 + net/xfrm/xfrm_policy.c | 2 + net/xfrm/xfrm_state_bpf.c | 134 ++++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 146 insertions(+) create mode 100644 net/xfrm/xfrm_state_bpf.c diff --git a/include/net/xfrm.h b/include/net/xfrm.h index c9bb0f892f55..1d107241b901 100644 --- a/include/net/xfrm.h +++ b/include/net/xfrm.h @@ -2190,4 +2190,13 @@ static inline int register_xfrm_interface_bpf(void) #endif +#if IS_ENABLED(CONFIG_DEBUG_INFO_BTF) +int register_xfrm_state_bpf(void); +#else +static inline int register_xfrm_state_bpf(void) +{ + return 0; +} +#endif + #endif /* _NET_XFRM_H */ diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile index cd47f88921f5..547cec77ba03 100644 --- a/net/xfrm/Makefile +++ b/net/xfrm/Makefile @@ -21,3 +21,4 @@ obj-$(CONFIG_XFRM_USER_COMPAT) += xfrm_compat.o obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o +obj-$(CONFIG_DEBUG_INFO_BTF) += xfrm_state_bpf.o diff --git a/net/xfrm/xfrm_policy.c b/net/xfrm/xfrm_policy.c index c13dc3ef7910..1b7e75159727 100644 --- a/net/xfrm/xfrm_policy.c +++ b/net/xfrm/xfrm_policy.c @@ -4218,6 +4218,8 @@ void __init xfrm_init(void) #ifdef CONFIG_XFRM_ESPINTCP espintcp_init(); #endif + + register_xfrm_state_bpf(); } #ifdef CONFIG_AUDITSYSCALL diff --git a/net/xfrm/xfrm_state_bpf.c b/net/xfrm/xfrm_state_bpf.c new file mode 100644 index 000000000000..9e20d4a377f7 --- /dev/null +++ b/net/xfrm/xfrm_state_bpf.c @@ -0,0 +1,134 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* Unstable XFRM state BPF helpers. + * + * Note that it is allowed to break compatibility for these functions since the + * interface they are exposed through to BPF programs is explicitly unstable. + */ + +#include +#include +#include +#include +#include + +/* bpf_xfrm_state_opts - Options for XFRM state lookup helpers + * + * Members: + * @error - Out parameter, set for any errors encountered + * Values: + * -EINVAL - netns_id is less than -1 + * -EINVAL - opts__sz isn't BPF_XFRM_STATE_OPTS_SZ + * -ENONET - No network namespace found for netns_id + * -ENOENT - No xfrm_state found + * @netns_id - Specify the network namespace for lookup + * Values: + * BPF_F_CURRENT_NETNS (-1) + * Use namespace associated with ctx + * [0, S32_MAX] + * Network Namespace ID + * @mark - XFRM mark to match on + * @daddr - Destination address to match on + * @spi - Security parameter index to match on + * @proto - IP protocol to match on (eg. IPPROTO_ESP) + * @family - Protocol family to match on (AF_INET/AF_INET6) + */ +struct bpf_xfrm_state_opts { + s32 error; + s32 netns_id; + u32 mark; + xfrm_address_t daddr; + __be32 spi; + u8 proto; + u16 family; +}; + +enum { + BPF_XFRM_STATE_OPTS_SZ = sizeof(struct bpf_xfrm_state_opts), +}; + +__bpf_kfunc_start_defs(); + +/* bpf_xdp_get_xfrm_state - Get XFRM state + * + * A `struct xfrm_state *`, if found, must be released with a corresponding + * bpf_xdp_xfrm_state_release. + * + * Parameters: + * @ctx - Pointer to ctx (xdp_md) in XDP program + * Cannot be NULL + * @opts - Options for lookup (documented above) + * Cannot be NULL + * @opts__sz - Length of the bpf_xfrm_state_opts structure + * Must be BPF_XFRM_STATE_OPTS_SZ + */ +__bpf_kfunc struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, u32 opts__sz) +{ + struct xdp_buff *xdp = (struct xdp_buff *)ctx; + struct net *net = dev_net(xdp->rxq->dev); + struct xfrm_state *x; + + if (!opts || opts__sz < sizeof(opts->error)) + return NULL; + + if (opts__sz != BPF_XFRM_STATE_OPTS_SZ) { + opts->error = -EINVAL; + return NULL; + } + + if (unlikely(opts->netns_id < BPF_F_CURRENT_NETNS)) { + opts->error = -EINVAL; + return NULL; + } + + if (opts->netns_id >= 0) { + net = get_net_ns_by_id(net, opts->netns_id); + if (unlikely(!net)) { + opts->error = -ENONET; + return NULL; + } + } + + x = xfrm_state_lookup(net, opts->mark, &opts->daddr, opts->spi, + opts->proto, opts->family); + + if (opts->netns_id >= 0) + put_net(net); + if (!x) + opts->error = -ENOENT; + + return x; +} + +/* bpf_xdp_xfrm_state_release - Release acquired xfrm_state object + * + * This must be invoked for referenced PTR_TO_BTF_ID, and the verifier rejects + * the program if any references remain in the program in all of the explored + * states. + * + * Parameters: + * @x - Pointer to referenced xfrm_state object, obtained using + * bpf_xdp_get_xfrm_state. + */ +__bpf_kfunc void bpf_xdp_xfrm_state_release(struct xfrm_state *x) +{ + xfrm_state_put(x); +} + +__bpf_kfunc_end_defs(); + +BTF_SET8_START(xfrm_state_kfunc_set) +BTF_ID_FLAGS(func, bpf_xdp_get_xfrm_state, KF_RET_NULL | KF_ACQUIRE) +BTF_ID_FLAGS(func, bpf_xdp_xfrm_state_release, KF_RELEASE) +BTF_SET8_END(xfrm_state_kfunc_set) + +static const struct btf_kfunc_id_set xfrm_state_xdp_kfunc_set = { + .owner = THIS_MODULE, + .set = &xfrm_state_kfunc_set, +}; + +int __init register_xfrm_state_bpf(void) +{ + return register_btf_kfunc_id_set(BPF_PROG_TYPE_XDP, + &xfrm_state_xdp_kfunc_set); +} -- cgit From 77a7a8220f0d87c44425c0a12e0a72b14962535b Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:03 -0700 Subject: bpf: selftests: test_tunnel: Setup fresh topology for each subtest This helps with determinism b/c individual setup/teardown prevents leaking state between different subtests. Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/0fb59fa16fb58cca7def5239df606005a3e8dd0e.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- tools/testing/selftests/bpf/prog_tests/test_tunnel.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index d149ab98798d..b57d48219d0b 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -535,23 +535,20 @@ done: #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ + config_device(); \ test_ ## name(__VA_ARGS__); \ + cleanup(); \ } \ }) static void *test_tunnel_run_tests(void *arg) { - cleanup(); - config_device(); - RUN_TEST(vxlan_tunnel); RUN_TEST(ip6vxlan_tunnel); RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); - cleanup(); - return NULL; } -- cgit From 02b4e126e6a5f5552da2ccec47a028984d2d9654 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:04 -0700 Subject: bpf: selftests: test_tunnel: Use vmlinux.h declarations vmlinux.h declarations are more ergnomic, especially when working with kfuncs. The uapi headers are often incomplete for kfunc definitions. This commit also switches bitfield accesses to use CO-RE helpers. Switching to vmlinux.h definitions makes the verifier very unhappy with raw bitfield accesses. The error is: ; md.u.md2.dir = direction; 33: (69) r1 = *(u16 *)(r2 +11) misaligned stack access off (0x0; 0x0)+-64+11 size 2 Fix by using CO-RE-aware bitfield reads and writes. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/884bde1d9a351d126a3923886b945ea6b1b0776b.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/progs/bpf_tracing_net.h | 1 + .../testing/selftests/bpf/progs/test_tunnel_kern.c | 76 ++++++---------------- 2 files changed, 22 insertions(+), 55 deletions(-) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 0b793a102791..1bdc680b0e0e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -26,6 +26,7 @@ #define IPV6_AUTOFLOWLABEL 70 #define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 #define TC_ACT_SHOT 2 #define SOL_TCP 6 diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f66af753bbbb..b320fb7bb080 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -6,62 +6,26 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include "vmlinux.h" +#include #include #include +#include "bpf_kfuncs.h" +#include "bpf_tracing_net.h" #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) -#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT 4789 +#define ETH_P_IP 0x0800 +#define PACKET_HOST 0 +#define TUNNEL_CSUM bpf_htons(0x01) +#define TUNNEL_KEY bpf_htons(0x04) /* Only IPv4 address assigned to veth1. * 172.16.1.200 */ #define ASSIGNED_ADDR_VETH1 0xac1001c8 -struct geneve_opt { - __be16 opt_class; - __u8 type; - __u8 length:5; - __u8 r3:1; - __u8 r2:1; - __u8 r1:1; - __u8 opt_data[8]; /* hard-coded to 8 byte */ -}; - -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -} __attribute__((packed)); - -struct vxlan_metadata { - __u32 gbp; -}; - -struct bpf_fou_encap { - __be16 sport; - __be16 dport; -}; - -enum bpf_fou_encap_type { - FOU_BPF_ENCAP_FOU, - FOU_BPF_ENCAP_GUE, -}; - int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, @@ -205,9 +169,9 @@ int erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 7; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -246,8 +210,9 @@ int erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -284,9 +249,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 17; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -326,8 +291,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif -- cgit From e7adc8291a9e9c232d600d82465cbbb682164ca3 Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:05 -0700 Subject: bpf: selftests: Move xfrm tunnel test to test_progs test_progs is better than a shell script b/c C is a bit easier to maintain than shell. Also it's easier to use new infra like memory mapped global variables from C via bpf skeleton. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/a350db9e08520c64544562d88ec005a039124d9b.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/test_tunnel.c | 143 +++++++++++++++++++++ .../testing/selftests/bpf/progs/test_tunnel_kern.c | 11 +- tools/testing/selftests/bpf/test_tunnel.sh | 92 ------------- 3 files changed, 151 insertions(+), 95 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index b57d48219d0b..2d7f8fa82ebd 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -50,6 +50,7 @@ */ #include +#include #include #include #include @@ -92,6 +93,11 @@ #define IPIP_TUNL_DEV0 "ipip00" #define IPIP_TUNL_DEV1 "ipip11" +#define XFRM_AUTH "0x1111111111111111111111111111111111111111" +#define XFRM_ENC "0x22222222222222222222222222222222" +#define XFRM_SPI_IN_TO_OUT 0x1 +#define XFRM_SPI_OUT_TO_IN 0x2 + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void) SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); } +static int add_xfrm_tunnel(void) +{ + /* at_ns0 namespace + * at_ns0 -> root + */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32", + IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s", + IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0); + + /* root namespace + * at_ns0 -> root + */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip route add %s dev veth1 via %s src %s", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_xfrm_tunnel(void) +{ + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -532,6 +624,56 @@ done: test_tunnel_kern__destroy(skel); } +static void test_xfrm_tunnel(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int tc_prog_fd; + int ifindex; + int err; + + err = add_xfrm_tunnel(); + if (!ASSERT_OK(err, "add_xfrm_tunnel")) + return; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + + /* attach tc prog to tunnel dev */ + tc_hook.ifindex = ifindex; + tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + goto done; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); + if (!ASSERT_OK(err, "test_ping")) + goto done; + + if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) + goto done; + +done: + delete_xfrm_tunnel(); + if (skel) + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ @@ -548,6 +690,7 @@ static void *test_tunnel_run_tests(void *arg) RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); + RUN_TEST(xfrm_tunnel); return NULL; } diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index b320fb7bb080..3a59eb9c34de 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -929,6 +929,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_reqid = 0; +volatile int xfrm_spi = 0; +volatile int xfrm_remote_ip = 0; + SEC("tc") int xfrm_get_state(struct __sk_buff *skb) { @@ -939,9 +943,10 @@ int xfrm_get_state(struct __sk_buff *skb) if (ret < 0) return TC_ACT_OK; - bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", - x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + xfrm_reqid = x.reqid; + xfrm_spi = bpf_ntohl(x.spi); + xfrm_remote_ip = bpf_ntohl(x.remote_ipv4); + return TC_ACT_OK; } diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2dec7dbf29a2..d9661b9988ba 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -517,90 +517,6 @@ test_ip6ip6() echo -e ${GREEN}"PASS: ip6$TYPE"${NC} } -setup_xfrm_tunnel() -{ - auth=0x$(printf '1%.0s' {1..40}) - enc=0x$(printf '2%.0s' {1..32}) - spi_in_to_out=0x1 - spi_out_to_in=0x2 - # at_ns0 namespace - # at_ns0 -> root - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip netns exec at_ns0 \ - ip addr add dev veth0 10.1.1.100/32 - ip netns exec at_ns0 \ - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ - src 10.1.1.100 - - # root namespace - # at_ns0 -> root - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip addr add dev veth1 10.1.1.200/32 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 -} - -test_xfrm_tunnel() -{ - if [[ -e /sys/kernel/tracing/trace ]]; then - TRACE=/sys/kernel/tracing/trace - else - TRACE=/sys/kernel/debug/tracing/trace - fi - config_device - > ${TRACE} - setup_xfrm_tunnel - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} - tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da object-pinned \ - ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - sleep 1 - grep "reqid 1" ${TRACE} - check_err $? - grep "spi 0x1" ${TRACE} - check_err $? - grep "remote ip 0xac100164" ${TRACE} - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: xfrm tunnel"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} -} - attach_bpf() { DEV=$1 @@ -630,10 +546,6 @@ cleanup() ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null ip link del ip6erspan11 2> /dev/null - ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null - ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null - ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null - ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null } cleanup_exit() @@ -716,10 +628,6 @@ bpf_tunnel_test() test_ip6ip6 errors=$(( $errors + $? )) - echo "Testing IPSec tunnel..." - test_xfrm_tunnel - errors=$(( $errors + $? )) - return $errors } -- cgit From 2cd07b0eb08c0ed63b1bd0bf0114146b19a4ab1f Mon Sep 17 00:00:00 2001 From: Daniel Xu Date: Thu, 14 Dec 2023 15:49:06 -0700 Subject: bpf: xfrm: Add selftest for bpf_xdp_get_xfrm_state() This commit extends test_tunnel selftest to test the new XDP xfrm state lookup kfunc. Co-developed-by: Antony Antony Signed-off-by: Antony Antony Signed-off-by: Daniel Xu Link: https://lore.kernel.org/r/e704e9a4332e3eac7b458e4bfdec8fcc6984cdb6.1702593901.git.dxu@dxuuu.xyz Signed-off-by: Alexei Starovoitov --- .../testing/selftests/bpf/prog_tests/test_tunnel.c | 16 ++++++- .../testing/selftests/bpf/progs/test_tunnel_kern.c | 51 ++++++++++++++++++++++ 2 files changed, 65 insertions(+), 2 deletions(-) diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index 2d7f8fa82ebd..2b3c6dd66259 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -278,7 +278,7 @@ static int add_xfrm_tunnel(void) SYS(fail, "ip netns exec at_ns0 " "ip xfrm state add src %s dst %s proto esp " - "spi %d reqid 1 mode tunnel " + "spi %d reqid 1 mode tunnel replay-window 42 " "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); SYS(fail, @@ -313,7 +313,7 @@ static int add_xfrm_tunnel(void) */ SYS(fail, "ip xfrm state add src %s dst %s proto esp " - "spi %d reqid 1 mode tunnel " + "spi %d reqid 1 mode tunnel replay-window 42 " "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); SYS(fail, @@ -628,8 +628,10 @@ static void test_xfrm_tunnel(void) { DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); struct test_tunnel_kern *skel = NULL; struct nstoken *nstoken; + int xdp_prog_fd; int tc_prog_fd; int ifindex; int err; @@ -654,6 +656,14 @@ static void test_xfrm_tunnel(void) if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) goto done; + /* attach xdp prog to tunnel dev */ + xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) + goto done; + err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts); + if (!ASSERT_OK(err, "bpf_xdp_attach")) + goto done; + /* ping from at_ns0 namespace test */ nstoken = open_netns("at_ns0"); err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); @@ -667,6 +677,8 @@ static void test_xfrm_tunnel(void) goto done; if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) goto done; + if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window")) + goto done; done: delete_xfrm_tunnel(); diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index 3a59eb9c34de..3e436e6f7312 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -30,6 +30,10 @@ int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap) __ksym; +struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, + u32 opts__sz) __ksym; +void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -950,4 +954,51 @@ int xfrm_get_state(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_replay_window = 0; + +SEC("xdp") +int xfrm_get_state_xdp(struct xdp_md *xdp) +{ + struct bpf_xfrm_state_opts opts = {}; + struct xfrm_state *x = NULL; + struct ip_esp_hdr *esph; + struct bpf_dynptr ptr; + u8 esph_buf[8] = {}; + u8 iph_buf[20] = {}; + struct iphdr *iph; + u32 off; + + if (bpf_dynptr_from_xdp(xdp, 0, &ptr)) + goto out; + + off = sizeof(struct ethhdr); + iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf)); + if (!iph || iph->protocol != IPPROTO_ESP) + goto out; + + off += sizeof(struct iphdr); + esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf)); + if (!esph) + goto out; + + opts.netns_id = BPF_F_CURRENT_NETNS; + opts.daddr.a4 = iph->daddr; + opts.spi = esph->spi; + opts.proto = IPPROTO_ESP; + opts.family = AF_INET; + + x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts)); + if (!x) + goto out; + + if (!x->replay_esn) + goto out; + + xfrm_replay_window = x->replay_esn->replay_window; +out: + if (x) + bpf_xdp_xfrm_state_release(x); + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; -- cgit