diff options
Diffstat (limited to 'tools')
93 files changed, 4931 insertions, 845 deletions
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index e88746ba7d21..e0545201b55f 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -951,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -995,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1074,9 +1108,11 @@ enum bpf_link_type { BPF_LINK_TYPE_TCX = 11, BPF_LINK_TYPE_UPROBE_MULTI = 12, BPF_LINK_TYPE_NETKIT = 13, - MAX_BPF_LINK_TYPE, + __MAX_BPF_LINK_TYPE, }; +#define MAX_BPF_LINK_TYPE __MAX_BPF_LINK_TYPE + enum bpf_perf_event_type { BPF_PERF_EVENT_UNSPEC = 0, BPF_PERF_EVENT_UPROBE = 1, @@ -1401,6 +1437,7 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + __u32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1470,6 +1507,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + __u32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1582,6 +1620,7 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_token_fd; }; struct { @@ -1712,6 +1751,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF diff --git a/tools/include/uapi/linux/netdev.h b/tools/include/uapi/linux/netdev.h index 424c5e28f495..93cb411adf72 100644 --- a/tools/include/uapi/linux/netdev.h +++ b/tools/include/uapi/linux/netdev.h @@ -44,10 +44,13 @@ enum netdev_xdp_act { * timestamp via bpf_xdp_metadata_rx_timestamp(). * @NETDEV_XDP_RX_METADATA_HASH: Device is capable of exposing receive packet * hash via bpf_xdp_metadata_rx_hash(). + * @NETDEV_XDP_RX_METADATA_VLAN_TAG: Device is capable of exposing receive + * packet VLAN tag via bpf_xdp_metadata_rx_vlan_tag(). */ enum netdev_xdp_rx_metadata { NETDEV_XDP_RX_METADATA_TIMESTAMP = 1, NETDEV_XDP_RX_METADATA_HASH = 2, + NETDEV_XDP_RX_METADATA_VLAN_TAG = 4, }; /** diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588..b6619199a706 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9dc9625651dc..0ad8e532b3cf 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,7 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { @@ -146,7 +147,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -169,7 +170,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); union bpf_attr attr; int fd; @@ -181,7 +182,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -198,6 +199,8 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -232,7 +235,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -261,8 +264,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); + attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -1182,7 +1186,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1207,6 +1211,8 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); + /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading @@ -1287,3 +1293,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } + +int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, token_create); + union bpf_attr attr; + int fd; + + if (!OPTS_VALID(opts, bpf_token_create_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.token_create.bpffs_fd = bpffs_fd; + attr.token_create.flags = OPTS_GET(opts, flags, 0); + + fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index d0f53772bdc0..991b86bfe7e4 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,11 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + + __u32 token_fd; + size_t :0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field token_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -102,9 +105,10 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field log_true_size +#define bpf_prog_load_opts__last_field token_fd LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, @@ -130,9 +134,10 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field log_true_size +#define bpf_btf_load_opts__last_field token_fd LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); @@ -640,6 +645,30 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); +struct bpf_token_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + size_t :0; +}; +#define bpf_token_create_opts__last_field flags + +/** + * @brief **bpf_token_create()** creates a new instance of BPF token derived + * from specified BPF FS mount point. + * + * BPF token created with this API can be passed to bpf() syscall for + * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. + * + * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token + * instance. + * @param opts optional BPF token creation options, can be NULL + * + * @return BPF token FD > 0, on success; negative error code, otherwise (errno + * is also set to the error code) + */ +LIBBPF_API int bpf_token_create(int bpffs_fd, + struct bpf_token_create_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 1ac57bb7ac55..7325a12692a3 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -111,6 +111,38 @@ enum bpf_enum_value_kind { val; \ }) +/* + * Write to a bitfield, identified by s->field. + * This is the inverse of BPF_CORE_WRITE_BITFIELD(). + */ +#define BPF_CORE_WRITE_BITFIELD(s, field, new_val) ({ \ + void *p = (void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ + unsigned int byte_size = __CORE_RELO(s, field, BYTE_SIZE); \ + unsigned int lshift = __CORE_RELO(s, field, LSHIFT_U64); \ + unsigned int rshift = __CORE_RELO(s, field, RSHIFT_U64); \ + unsigned long long mask, val, nval = new_val; \ + unsigned int rpad = rshift - lshift; \ + \ + asm volatile("" : "+r"(p)); \ + \ + switch (byte_size) { \ + case 1: val = *(unsigned char *)p; break; \ + case 2: val = *(unsigned short *)p; break; \ + case 4: val = *(unsigned int *)p; break; \ + case 8: val = *(unsigned long long *)p; break; \ + } \ + \ + mask = (~0ULL << rshift) >> lshift; \ + val = (val & ~mask) | ((nval << rpad) & mask); \ + \ + switch (byte_size) { \ + case 1: *(unsigned char *)p = val; break; \ + case 2: *(unsigned short *)p = val; break; \ + case 4: *(unsigned int *)p = val; break; \ + case 8: *(unsigned long long *)p = val; break; \ + } \ +}) + #define ___bpf_field_ref1(field) (field) #define ___bpf_field_ref2(type, field) (((typeof(type) *)0)->field) #define ___bpf_field_ref(args...) \ diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d..63033c334320 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,7 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1397,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a5..c92e02394159 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..ce98a334be21 --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,478 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/kernel.h> +#include <linux/filter.h> +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +static int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(int token_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func_global(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_datasec(int token_fd) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_float(int token_fd) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_decl_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_type_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_array_mmap(int token_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE, + .token_fd = token_fd, + ); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + ); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_prog_bind_map(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, .token_fd = token_fd); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, .token_fd = token_fd); + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), &opts); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = token_fd); + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(int token_fd) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +typedef int (*feature_probe_fn)(int /* token_fd */); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(cache->token_fd); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index ea9b8158c20d..4b5ff9508e18 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -693,6 +695,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -2192,7 +2198,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3054,9 +3060,15 @@ static bool prog_needs_vmlinux_btf(struct bpf_program *prog) return false; } +static bool map_needs_vmlinux_btf(struct bpf_map *map) +{ + return bpf_map__is_struct_ops(map); +} + static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) { struct bpf_program *prog; + struct bpf_map *map; int i; /* CO-RE relocations need kernel BTF, only when btf_custom_path @@ -3081,6 +3093,11 @@ static bool obj_needs_vmlinux_btf(const struct bpf_object *obj) return true; } + bpf_object__for_each_map(map, obj) { + if (map_needs_vmlinux_btf(map)) + return true; + } + return false; } @@ -3268,7 +3285,7 @@ skip_exception_cb: } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4591,6 +4608,63 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is already set up */ + if (obj->token_fd > 0) + return 0; + /* token is explicitly prevented */ + if (obj->token_fd < 0) { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + /* reset to zero to avoid extra checks during map_create and prog_load steps */ + obj->token_fd = 0; + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4600,6 +4674,7 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, .token_fd = obj->token_fd); if (obj->gen_loader) return 0; @@ -4609,9 +4684,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4626,462 +4701,18 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; - enum kern_feature_result res; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); - } - } + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return feat_supported(NULL, feat_id); } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -5200,6 +4831,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; if (bpf_map__is_struct_ops(map)) create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; @@ -7035,6 +6667,7 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.attach_btf_id = prog->attach_btf_id; load_attr.kern_version = kern_version; load_attr.prog_ifindex = prog->prog_ifindex; + load_attr.token_fd = obj->token_fd; /* specify func_info/line_info only if kernel supports them */ btf_fd = bpf_object__btf_fd(obj); @@ -7496,10 +7129,10 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; - int err; + int err, token_fd; char *log_buf; size_t log_size; __u32 log_level; @@ -7533,6 +7166,28 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + token_fd = OPTS_GET(opts, bpf_token_fd, -1); + /* non-empty token path can't be combined with invalid token FD */ + if (token_path && token_path[0] != '\0' && token_fd < 0) + return ERR_PTR(-EINVAL); + /* empty token path can't be combined with valid token FD */ + if (token_path && token_path[0] == '\0' && token_fd > 0) + return ERR_PTR(-EINVAL); + /* if user didn't specify bpf_token_path/bpf_token_fd explicitly, + * check if LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as + * bpf_token_path option + */ + if (token_fd == 0 && !token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + /* empty token_path is equivalent to invalid token_fd */ + if (token_path && token_path[0] == '\0') { + token_path = NULL; + token_fd = -1; + } + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7541,6 +7196,19 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + obj->token_fd = token_fd <= 0 ? token_fd : dup_good_fd(token_fd); + if (token_fd > 0 && obj->token_fd < 0) { + err = -errno; + goto out; + } + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -8051,7 +7719,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_and_load_btf(obj); @@ -8588,6 +8257,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } @@ -10601,7 +10275,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..916904bd2a7a 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,45 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* FD of a BPF token instantiated by user through bpf_token_create() + * API. BPF object will keep dup()'ed FD internally, so passed token + * FD can be closed after BPF object/skeleton open step. + * + * Setting bpf_token_fd to negative value disables libbpf's automatic + * attempt to create BPF token from default BPF FS mount point + * (/sys/fs/bpf), in case this default behavior is undesirable. + * + * If bpf_token_path and bpf_token_fd are not specified, libbpf will + * consult LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will + * be taken as a value of bpf_token_path option and will force libbpf + * to either create BPF token from provided custom BPF FS path, or + * will disable implicit BPF token creation, if envvar value is an + * empty string. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. + */ + int bpf_token_fd; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + * + * bpf_token_path and bpf_token_fd are mutually exclusive and only one + * of those options should be set. Either of them overrides + * LIBBPF_BPF_TOKEN_PATH envvar. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 91c5aef7dae7..df7657b65c47 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -401,6 +401,7 @@ LIBBPF_1.3.0 { bpf_program__attach_netkit; bpf_program__attach_tcx; bpf_program__attach_uprobe_multi; + bpf_token_create; ring__avail_data_size; ring__consume; ring__consumer_pos; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index b5d334754e5d..4cda32298c49 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -360,15 +360,32 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; + int token_fd; +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); + const char *str_sec, size_t str_len, + int token_fd); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -532,6 +549,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -543,7 +571,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6..8e7437006639 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, .token_fd = token_fd); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +243,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +273,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index 5ced96d99f8c..52a2901e8bd0 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -719,13 +719,25 @@ static int linker_sanity_check_elf(struct src_obj *obj) return -EINVAL; } - if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) + if (sec->shdr->sh_addralign && !is_pow_of_2(sec->shdr->sh_addralign)) { + pr_warn("ELF section #%zu alignment %llu is non pow-of-2 alignment in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + obj->filename); return -EINVAL; - if (sec->shdr->sh_addralign != sec->data->d_align) + } + if (sec->shdr->sh_addralign != sec->data->d_align) { + pr_warn("ELF section #%zu has inconsistent alignment addr=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_addralign, + (long long unsigned)sec->data->d_align, obj->filename); return -EINVAL; + } - if (sec->shdr->sh_size != sec->data->d_size) + if (sec->shdr->sh_size != sec->data->d_size) { + pr_warn("ELF section #%zu has inconsistent section size sh=%llu != d=%llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + (long long unsigned)sec->data->d_size, obj->filename); return -EINVAL; + } switch (sec->shdr->sh_type) { case SHT_SYMTAB: @@ -737,8 +749,12 @@ static int linker_sanity_check_elf(struct src_obj *obj) break; case SHT_PROGBITS: if (sec->shdr->sh_flags & SHF_EXECINSTR) { - if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) + if (sec->shdr->sh_size % sizeof(struct bpf_insn) != 0) { + pr_warn("ELF section #%zu has unexpected size alignment %llu in %s\n", + sec->sec_idx, (long long unsigned)sec->shdr->sh_size, + obj->filename); return -EINVAL; + } } break; case SHT_NOBITS: diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b6..626d7ffb03d6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index 5ca68ff0b59f..b4e78c1eb37b 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -55,4 +55,14 @@ void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; +extern int bpf_get_file_xattr(struct file *file, const char *name, + struct bpf_dynptr *value_ptr) __ksym; +extern int bpf_get_fsverity_digest(struct file *file, struct bpf_dynptr *digest_ptr) __ksym; + +extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; +extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; +extern void bpf_key_put(struct bpf_key *key) __ksym; +extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, + struct bpf_dynptr *sig_ptr, + struct bpf_key *trusted_keyring) __ksym; #endif diff --git a/tools/testing/selftests/bpf/cgroup_helpers.c b/tools/testing/selftests/bpf/cgroup_helpers.c index 5aa133bf3688..19be9c63d5e8 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.c +++ b/tools/testing/selftests/bpf/cgroup_helpers.c @@ -689,3 +689,19 @@ int get_cgroup1_hierarchy_id(const char *subsys_name) fclose(file); return found ? id : -1; } + +/** + * open_classid() - Open a cgroupv1 net_cls classid + * + * This function expects the cgroup work dir to be already created, as we + * open it here. + * + * On success, it returns the file descriptor. On failure it returns -1. + */ +int open_classid(void) +{ + char cgroup_workdir[PATH_MAX + 1]; + + format_classid_path(cgroup_workdir); + return open(cgroup_workdir, O_RDONLY); +} diff --git a/tools/testing/selftests/bpf/cgroup_helpers.h b/tools/testing/selftests/bpf/cgroup_helpers.h index ee053641c026..502845160d88 100644 --- a/tools/testing/selftests/bpf/cgroup_helpers.h +++ b/tools/testing/selftests/bpf/cgroup_helpers.h @@ -33,6 +33,7 @@ void cleanup_cgroup_environment(void); int set_classid(void); int join_classid(void); unsigned long long get_classid_cgroup_id(void); +int open_classid(void); int setup_classid_environment(void); void cleanup_classid_environment(void); diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index 3ec5927ec3e5..c125c441abc7 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -23,6 +23,7 @@ CONFIG_FPROBE=y CONFIG_FTRACE_SYSCALLS=y CONFIG_FUNCTION_ERROR_INJECTION=y CONFIG_FUNCTION_TRACER=y +CONFIG_FS_VERITY=y CONFIG_GENEVE=y CONFIG_IKCONFIG=y CONFIG_IKCONFIG_PROC=y @@ -82,7 +83,7 @@ CONFIG_SECURITY=y CONFIG_SECURITYFS=y CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y +CONFIG_VSOCKETS=y CONFIG_VXLAN=y CONFIG_XDP_SOCKETS=y CONFIG_XFRM_INTERFACE=y -CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/prog_tests/btf.c b/tools/testing/selftests/bpf/prog_tests/btf.c index 8fb4a04fbbc0..816145bcb647 100644 --- a/tools/testing/selftests/bpf/prog_tests/btf.c +++ b/tools/testing/selftests/bpf/prog_tests/btf.c @@ -4630,11 +4630,6 @@ static int test_btf_id(unsigned int test_num) /* The map holds the last ref to BTF and its btf_id */ close(map_fd); map_fd = -1; - btf_fd[0] = bpf_btf_get_fd_by_id(map_info.btf_id); - if (CHECK(btf_fd[0] >= 0, "BTF lingers")) { - err = -1; - goto done; - } fprintf(stderr, "OK"); diff --git a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c index 63e776f4176e..747761572098 100644 --- a/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c +++ b/tools/testing/selftests/bpf/prog_tests/cgrp_local_storage.c @@ -19,6 +19,21 @@ struct socket_cookie { __u64 cookie_value; }; +static bool is_cgroup1; +static int target_hid; + +#define CGROUP_MODE_SET(skel) \ +{ \ + skel->bss->is_cgroup1 = is_cgroup1; \ + skel->bss->target_hid = target_hid; \ +} + +static void cgroup_mode_value_init(bool cgroup, int hid) +{ + is_cgroup1 = cgroup; + target_hid = hid; +} + static void test_tp_btf(int cgroup_fd) { struct cgrp_ls_tp_btf *skel; @@ -29,6 +44,8 @@ static void test_tp_btf(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + /* populate a value in map_b */ err = bpf_map_update_elem(bpf_map__fd(skel->maps.map_b), &cgroup_fd, &val1, BPF_ANY); if (!ASSERT_OK(err, "map_update_elem")) @@ -130,6 +147,8 @@ static void test_recursion(int cgroup_fd) if (!ASSERT_OK_PTR(skel, "skel_open_and_load")) return; + CGROUP_MODE_SET(skel); + err = cgrp_ls_recursion__attach(skel); if (!ASSERT_OK(err, "skel_attach")) goto out; @@ -165,6 +184,8 @@ static void test_cgroup_iter_sleepable(int cgroup_fd, __u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.cgroup_iter, true); err = cgrp_ls_sleepable__load(skel); if (!ASSERT_OK(err, "skel_load")) @@ -202,6 +223,7 @@ static void test_yes_rcu_lock(__u64 cgroup_id) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); skel->bss->target_pid = syscall(SYS_gettid); bpf_program__set_autoload(skel->progs.yes_rcu_lock, true); @@ -229,6 +251,8 @@ static void test_no_rcu_lock(void) if (!ASSERT_OK_PTR(skel, "skel_open")) return; + CGROUP_MODE_SET(skel); + bpf_program__set_autoload(skel->progs.no_rcu_lock, true); err = cgrp_ls_sleepable__load(skel); ASSERT_ERR(err, "skel_load"); @@ -236,7 +260,25 @@ static void test_no_rcu_lock(void) cgrp_ls_sleepable__destroy(skel); } -void test_cgrp_local_storage(void) +static void test_cgrp1_no_rcu_lock(void) +{ + struct cgrp_ls_sleepable *skel; + int err; + + skel = cgrp_ls_sleepable__open(); + if (!ASSERT_OK_PTR(skel, "skel_open")) + return; + + CGROUP_MODE_SET(skel); + + bpf_program__set_autoload(skel->progs.cgrp1_no_rcu_lock, true); + err = cgrp_ls_sleepable__load(skel); + ASSERT_OK(err, "skel_load"); + + cgrp_ls_sleepable__destroy(skel); +} + +static void cgrp2_local_storage(void) { __u64 cgroup_id; int cgroup_fd; @@ -245,6 +287,8 @@ void test_cgrp_local_storage(void) if (!ASSERT_GE(cgroup_fd, 0, "join_cgroup /cgrp_local_storage")) return; + cgroup_mode_value_init(0, -1); + cgroup_id = get_cgroup_id("/cgrp_local_storage"); if (test__start_subtest("tp_btf")) test_tp_btf(cgroup_fd); @@ -263,3 +307,55 @@ void test_cgrp_local_storage(void) close(cgroup_fd); } + +static void cgrp1_local_storage(void) +{ + int cgrp1_fd, cgrp1_hid, cgrp1_id, err; + + /* Setup cgroup1 hierarchy */ + err = setup_classid_environment(); + if (!ASSERT_OK(err, "setup_classid_environment")) + return; + + err = join_classid(); + if (!ASSERT_OK(err, "join_cgroup1")) + goto cleanup; + + cgrp1_fd = open_classid(); + if (!ASSERT_GE(cgrp1_fd, 0, "cgroup1 fd")) + goto cleanup; + + cgrp1_id = get_classid_cgroup_id(); + if (!ASSERT_GE(cgrp1_id, 0, "cgroup1 id")) + goto close_fd; + + cgrp1_hid = get_cgroup1_hierarchy_id("net_cls"); + if (!ASSERT_GE(cgrp1_hid, 0, "cgroup1 hid")) + goto close_fd; + + cgroup_mode_value_init(1, cgrp1_hid); + + if (test__start_subtest("cgrp1_tp_btf")) + test_tp_btf(cgrp1_fd); + if (test__start_subtest("cgrp1_recursion")) + test_recursion(cgrp1_fd); + if (test__start_subtest("cgrp1_negative")) + test_negative(); + if (test__start_subtest("cgrp1_iter_sleepable")) + test_cgroup_iter_sleepable(cgrp1_fd, cgrp1_id); + if (test__start_subtest("cgrp1_yes_rcu_lock")) + test_yes_rcu_lock(cgrp1_id); + if (test__start_subtest("cgrp1_no_rcu_lock")) + test_cgrp1_no_rcu_lock(); + +close_fd: + close(cgrp1_fd); +cleanup: + cleanup_classid_environment(); +} + +void test_cgrp_local_storage(void) +{ + cgrp2_local_storage(); + cgrp1_local_storage(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/cpumask.c b/tools/testing/selftests/bpf/prog_tests/cpumask.c index 756ea8b590b6..c2e886399e3c 100644 --- a/tools/testing/selftests/bpf/prog_tests/cpumask.c +++ b/tools/testing/selftests/bpf/prog_tests/cpumask.c @@ -18,6 +18,7 @@ static const char * const cpumask_success_testcases[] = { "test_insert_leave", "test_insert_remove_release", "test_global_mask_rcu", + "test_cpumask_weight", }; static void verify_success(const char *prog_name) diff --git a/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c new file mode 100644 index 000000000000..37056ba73847 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/fs_kfuncs.c @@ -0,0 +1,142 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <stdlib.h> +#include <sys/types.h> +#include <sys/xattr.h> +#include <linux/fsverity.h> +#include <unistd.h> +#include <test_progs.h> +#include "test_get_xattr.skel.h" +#include "test_fsverity.skel.h" + +static const char testfile[] = "/tmp/test_progs_fs_kfuncs"; + +static void test_xattr(void) +{ + struct test_get_xattr *skel = NULL; + int fd = -1, err; + + fd = open(testfile, O_CREAT | O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + close(fd); + fd = -1; + + err = setxattr(testfile, "user.kfuncs", "hello", sizeof("hello"), 0); + if (err && errno == EOPNOTSUPP) { + printf("%s:SKIP:local fs doesn't support xattr (%d)\n" + "To run this test, make sure /tmp filesystem supports xattr.\n", + __func__, errno); + test__skip(); + goto out; + } + + if (!ASSERT_OK(err, "setxattr")) + goto out; + + skel = test_get_xattr__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_get_xattr__open_and_load")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_get_xattr__attach(skel); + + if (!ASSERT_OK(err, "test_get_xattr__attach")) + goto out; + + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file")) + goto out; + + ASSERT_EQ(skel->bss->found_xattr, 1, "found_xattr"); + +out: + close(fd); + test_get_xattr__destroy(skel); + remove(testfile); +} + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +static void test_fsverity(void) +{ + struct fsverity_enable_arg arg = {0}; + struct test_fsverity *skel = NULL; + struct fsverity_digest *d; + int fd, err; + char buffer[4096]; + + fd = open(testfile, O_CREAT | O_RDWR, 0644); + if (!ASSERT_GE(fd, 0, "create_file")) + return; + + /* Write random buffer, so the file is not empty */ + err = write(fd, buffer, 4096); + if (!ASSERT_EQ(err, 4096, "write_file")) + goto out; + close(fd); + + /* Reopen read-only, otherwise FS_IOC_ENABLE_VERITY will fail */ + fd = open(testfile, O_RDONLY, 0644); + if (!ASSERT_GE(fd, 0, "open_file1")) + return; + + /* Enable fsverity for the file. + * If the file system doesn't support verity, this will fail. Skip + * the test in such case. + */ + arg.version = 1; + arg.hash_algorithm = FS_VERITY_HASH_ALG_SHA256; + arg.block_size = 4096; + err = ioctl(fd, FS_IOC_ENABLE_VERITY, &arg); + if (err) { + printf("%s:SKIP:local fs doesn't support fsverity (%d)\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__, errno); + test__skip(); + goto out; + } + + skel = test_fsverity__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_fsverity__open_and_load")) + goto out; + + /* Get fsverity_digest from ioctl */ + d = (struct fsverity_digest *)skel->bss->expected_digest; + d->digest_algorithm = FS_VERITY_HASH_ALG_SHA256; + d->digest_size = SHA256_DIGEST_SIZE; + err = ioctl(fd, FS_IOC_MEASURE_VERITY, skel->bss->expected_digest); + if (!ASSERT_OK(err, "ioctl_FS_IOC_MEASURE_VERITY")) + goto out; + + skel->bss->monitored_pid = getpid(); + err = test_fsverity__attach(skel); + if (!ASSERT_OK(err, "test_fsverity__attach")) + goto out; + + /* Reopen the file to trigger the program */ + close(fd); + fd = open(testfile, O_RDONLY); + if (!ASSERT_GE(fd, 0, "open_file2")) + goto out; + + ASSERT_EQ(skel->bss->got_fsverity, 1, "got_fsverity"); + ASSERT_EQ(skel->bss->digest_matches, 1, "digest_matches"); +out: + close(fd); + test_fsverity__destroy(skel); + remove(testfile); +} + +void test_fs_kfuncs(void) +{ + if (test__start_subtest("xattr")) + test_xattr(); + + if (test__start_subtest("fsverity")) + test_fsverity(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c new file mode 100644 index 000000000000..65309894b27a --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/global_func_dead_code.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include <test_progs.h> +#include "verifier_global_subprogs.skel.h" +#include "freplace_dead_global_func.skel.h" + +void test_global_func_dead_code(void) +{ + struct verifier_global_subprogs *tgt_skel = NULL; + struct freplace_dead_global_func *skel = NULL; + char log_buf[4096]; + int err, tgt_fd; + + /* first, try to load target with good global subprog */ + tgt_skel = verifier_global_subprogs__open(); + if (!ASSERT_OK_PTR(tgt_skel, "tgt_skel_good_open")) + return; + + bpf_program__set_autoload(tgt_skel->progs.chained_global_func_calls_success, true); + + err = verifier_global_subprogs__load(tgt_skel); + if (!ASSERT_OK(err, "tgt_skel_good_load")) + goto out; + + tgt_fd = bpf_program__fd(tgt_skel->progs.chained_global_func_calls_success); + + /* Attach to good non-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_good_open")) + goto out; + + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_good"); + ASSERT_OK(err, "attach_target_good"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_OK(err, "skel_good_load")) + goto out; + + freplace_dead_global_func__destroy(skel); + + /* Try attaching to dead code-eliminated subprog */ + skel = freplace_dead_global_func__open(); + if (!ASSERT_OK_PTR(skel, "skel_dead_open")) + goto out; + + bpf_program__set_log_buf(skel->progs.freplace_prog, log_buf, sizeof(log_buf)); + err = bpf_program__set_attach_target(skel->progs.freplace_prog, tgt_fd, "global_dead"); + ASSERT_OK(err, "attach_target_dead"); + + err = freplace_dead_global_func__load(skel); + if (!ASSERT_ERR(err, "skel_dead_load")) + goto out; + + ASSERT_HAS_SUBSTR(log_buf, "Subprog global_dead doesn't exist", "dead_subprog_missing_msg"); + +out: + verifier_global_subprogs__destroy(tgt_skel); + freplace_dead_global_func__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c index 4041cfa670eb..05000810e28e 100644 --- a/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/kprobe_multi_test.c @@ -222,6 +222,7 @@ static void test_attach_api_fails(void) "bpf_fentry_test2", }; __u64 cookies[2]; + int saved_error; addrs[0] = ksym_get_addr("bpf_fentry_test1"); addrs[1] = ksym_get_addr("bpf_fentry_test2"); @@ -238,10 +239,11 @@ static void test_attach_api_fails(void) /* fail_1 - pattern and opts NULL */ link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, NULL); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_1")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_1_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_1_error")) goto cleanup; /* fail_2 - both addrs and syms set */ @@ -252,10 +254,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, NULL, &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_2")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_2_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_2_error")) goto cleanup; /* fail_3 - pattern and addrs set */ @@ -266,10 +269,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_3")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_3_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_3_error")) goto cleanup; /* fail_4 - pattern and cnt set */ @@ -280,10 +284,11 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_4")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_4_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_4_error")) goto cleanup; /* fail_5 - pattern and cookies */ @@ -294,10 +299,26 @@ static void test_attach_api_fails(void) link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, "ksys_*", &opts); + saved_error = -errno; if (!ASSERT_ERR_PTR(link, "fail_5")) goto cleanup; - if (!ASSERT_EQ(libbpf_get_error(link), -EINVAL, "fail_5_error")) + if (!ASSERT_EQ(saved_error, -EINVAL, "fail_5_error")) + goto cleanup; + + /* fail_6 - abnormal cnt */ + opts.addrs = (const unsigned long *) addrs; + opts.syms = NULL; + opts.cnt = INT_MAX; + opts.cookies = NULL; + + link = bpf_program__attach_kprobe_multi_opts(skel->progs.test_kprobe_manual, + NULL, &opts); + saved_error = -errno; + if (!ASSERT_ERR_PTR(link, "fail_6")) + goto cleanup; + + if (!ASSERT_EQ(saved_error, -E2BIG, "fail_6_error")) goto cleanup; cleanup: diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 9f766ddd946a..4ed46ed58a7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; + if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) + continue; if (!test__start_subtest(prog_type_name)) continue; @@ -68,6 +70,8 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; + if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) + continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index c440ea3311ed..62ea855ec4d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -87,7 +87,7 @@ static void test_libbpf_bpf_link_type_str(void) const char *link_type_str; char buf[256]; - if (link_type == MAX_BPF_LINK_TYPE) + if (link_type == __MAX_BPF_LINK_TYPE) continue; link_type_name = btf__str_by_offset(btf, e->name_off); @@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; + if (map_type == __MAX_BPF_MAP_TYPE) + continue; + map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); @@ -186,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; + if (prog_type == __MAX_BPF_PROG_TYPE) + continue; + prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); diff --git a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c index e6e50a394472..827e713f6cf1 100644 --- a/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/prog_tests/local_kptr_stash.c @@ -48,6 +48,27 @@ static void test_local_kptr_stash_plain(void) local_kptr_stash__destroy(skel); } +static void test_local_kptr_stash_local_with_root(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts, + .data_in = &pkt_v4, + .data_size_in = sizeof(pkt_v4), + .repeat = 1, + ); + struct local_kptr_stash *skel; + int ret; + + skel = local_kptr_stash__open_and_load(); + if (!ASSERT_OK_PTR(skel, "local_kptr_stash__open_and_load")) + return; + + ret = bpf_prog_test_run_opts(bpf_program__fd(skel->progs.stash_local_with_root), &opts); + ASSERT_OK(ret, "local_kptr_stash_add_local_with_root run"); + ASSERT_OK(opts.retval, "local_kptr_stash_add_local_with_root retval"); + + local_kptr_stash__destroy(skel); +} + static void test_local_kptr_stash_unstash(void) { LIBBPF_OPTS(bpf_test_run_opts, opts, @@ -115,6 +136,8 @@ void test_local_kptr_stash(void) test_local_kptr_stash_simple(); if (test__start_subtest("local_kptr_stash_plain")) test_local_kptr_stash_plain(); + if (test__start_subtest("local_kptr_stash_local_with_root")) + test_local_kptr_stash_local_with_root(); if (test__start_subtest("local_kptr_stash_unstash")) test_local_kptr_stash_unstash(); if (test__start_subtest("refcount_acquire_without_unstash")) diff --git a/tools/testing/selftests/bpf/prog_tests/map_btf.c b/tools/testing/selftests/bpf/prog_tests/map_btf.c new file mode 100644 index 000000000000..2c4ef6037573 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_btf.c @@ -0,0 +1,98 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <test_progs.h> + +#include "normal_map_btf.skel.h" +#include "map_in_map_btf.skel.h" + +static void do_test_normal_map_btf(void) +{ + struct normal_map_btf *skel; + int i, err, new_fd = -1; + int map_fd_arr[64]; + + skel = normal_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = normal_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Use percpu_array to slow bpf_map_free_deferred() down. + * The memory allocation may fail, so doesn't check the returned fd. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) + map_fd_arr[i] = bpf_map_create(BPF_MAP_TYPE_PERCPU_ARRAY, NULL, 4, 4, 256, NULL); + + /* Close array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.array)); +out: + normal_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the release of the map + * btf which is held by other maps (e.g, bss). After that, array map + * holds the last reference of map btf. + */ + kern_sync_rcu(); + usleep(4000); + /* Spawn multiple kworkers to delay the invocation of + * bpf_map_free_deferred() for array map. + */ + for (i = 0; i < ARRAY_SIZE(map_fd_arr); i++) { + if (map_fd_arr[i] < 0) + continue; + close(map_fd_arr[i]); + } + close(new_fd); +} + +static void do_test_map_in_map_btf(void) +{ + int err, zero = 0, new_fd = -1; + struct map_in_map_btf *skel; + + skel = map_in_map_btf__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = map_in_map_btf__attach(skel); + if (!ASSERT_OK(err, "attach")) + goto out; + + skel->bss->pid = getpid(); + usleep(1); + ASSERT_TRUE(skel->bss->done, "done"); + + /* Close inner_array fd later */ + new_fd = dup(bpf_map__fd(skel->maps.inner_array)); + /* Defer the free of inner_array */ + err = bpf_map__delete_elem(skel->maps.outer_array, &zero, sizeof(zero), 0); + ASSERT_OK(err, "delete inner map"); +out: + map_in_map_btf__destroy(skel); + if (new_fd < 0) + return; + /* Use kern_sync_rcu() to wait for the start of the free of the bpf + * program and use an assumed delay to wait for the free of the outer + * map and the release of map btf. After that, inner map holds the last + * reference of map btf. + */ + kern_sync_rcu(); + usleep(10000); + close(new_fd); +} + +void test_map_btf(void) +{ + if (test__start_subtest("array_btf")) + do_test_normal_map_btf(); + if (test__start_subtest("inner_array_btf")) + do_test_map_in_map_btf(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/map_in_map.c b/tools/testing/selftests/bpf/prog_tests/map_in_map.c new file mode 100644 index 000000000000..d2a10eb4e5b5 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/map_in_map.c @@ -0,0 +1,141 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> +#include <test_progs.h> +#include <bpf/btf.h> +#include "access_map_in_map.skel.h" + +struct thread_ctx { + pthread_barrier_t barrier; + int outer_map_fd; + int start, abort; + int loop, err; +}; + +static int wait_for_start_or_abort(struct thread_ctx *ctx) +{ + while (!ctx->start && !ctx->abort) + usleep(1); + return ctx->abort ? -1 : 0; +} + +static void *update_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop, err = 0; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + int fd, zero = 0; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, 4, 4, 1, NULL); + if (fd < 0) { + err |= 1; + pthread_barrier_wait(&ctx->barrier); + continue; + } + + /* Remove the old inner map */ + if (bpf_map_update_elem(ctx->outer_map_fd, &zero, &fd, 0) < 0) + err |= 2; + close(fd); + pthread_barrier_wait(&ctx->barrier); + } + + ctx->err = err; + + return NULL; +} + +static void *access_map_fn(void *data) +{ + struct thread_ctx *ctx = data; + int loop = ctx->loop; + + if (wait_for_start_or_abort(ctx) < 0) + return NULL; + pthread_barrier_wait(&ctx->barrier); + + while (loop-- > 0) { + /* Access the old inner map */ + syscall(SYS_getpgid); + pthread_barrier_wait(&ctx->barrier); + } + + return NULL; +} + +static void test_map_in_map_access(const char *prog_name, const char *map_name) +{ + struct access_map_in_map *skel; + struct bpf_map *outer_map; + struct bpf_program *prog; + struct thread_ctx ctx; + pthread_t tid[2]; + int err; + + skel = access_map_in_map__open(); + if (!ASSERT_OK_PTR(skel, "access_map_in_map open")) + return; + + prog = bpf_object__find_program_by_name(skel->obj, prog_name); + if (!ASSERT_OK_PTR(prog, "find program")) + goto out; + bpf_program__set_autoload(prog, true); + + outer_map = bpf_object__find_map_by_name(skel->obj, map_name); + if (!ASSERT_OK_PTR(outer_map, "find map")) + goto out; + + err = access_map_in_map__load(skel); + if (!ASSERT_OK(err, "access_map_in_map load")) + goto out; + + err = access_map_in_map__attach(skel); + if (!ASSERT_OK(err, "access_map_in_map attach")) + goto out; + + skel->bss->tgid = getpid(); + + memset(&ctx, 0, sizeof(ctx)); + pthread_barrier_init(&ctx.barrier, NULL, 2); + ctx.outer_map_fd = bpf_map__fd(outer_map); + ctx.loop = 4; + + err = pthread_create(&tid[0], NULL, update_map_fn, &ctx); + if (!ASSERT_OK(err, "close_thread")) + goto out; + + err = pthread_create(&tid[1], NULL, access_map_fn, &ctx); + if (!ASSERT_OK(err, "read_thread")) { + ctx.abort = 1; + pthread_join(tid[0], NULL); + goto out; + } + + ctx.start = 1; + pthread_join(tid[0], NULL); + pthread_join(tid[1], NULL); + + ASSERT_OK(ctx.err, "err"); +out: + access_map_in_map__destroy(skel); +} + +void test_map_in_map(void) +{ + if (test__start_subtest("acc_map_in_array")) + test_map_in_map_access("access_map_in_array", "outer_array_map"); + if (test__start_subtest("sleepable_acc_map_in_array")) + test_map_in_map_access("sleepable_access_map_in_array", "outer_array_map"); + if (test__start_subtest("acc_map_in_htab")) + test_map_in_map_access("access_map_in_htab", "outer_htab_map"); + if (test__start_subtest("sleepable_acc_map_in_htab")) + test_map_in_map_access("sleepable_access_map_in_htab", "outer_htab_map"); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/syscall.c b/tools/testing/selftests/bpf/prog_tests/syscall.c index f4d40001155a..0be8301c0ffd 100644 --- a/tools/testing/selftests/bpf/prog_tests/syscall.c +++ b/tools/testing/selftests/bpf/prog_tests/syscall.c @@ -12,7 +12,7 @@ struct args { int btf_fd; }; -void test_syscall(void) +static void test_syscall_load_prog(void) { static char verifier_log[8192]; struct args ctx = { @@ -32,7 +32,7 @@ void test_syscall(void) if (!ASSERT_OK_PTR(skel, "skel_load")) goto cleanup; - prog_fd = bpf_program__fd(skel->progs.bpf_prog); + prog_fd = bpf_program__fd(skel->progs.load_prog); err = bpf_prog_test_run_opts(prog_fd, &tattr); ASSERT_EQ(err, 0, "err"); ASSERT_EQ(tattr.retval, 1, "retval"); @@ -53,3 +53,29 @@ cleanup: if (ctx.btf_fd > 0) close(ctx.btf_fd); } + +static void test_syscall_update_outer_map(void) +{ + LIBBPF_OPTS(bpf_test_run_opts, opts); + struct syscall *skel; + int err, prog_fd; + + skel = syscall__open_and_load(); + if (!ASSERT_OK_PTR(skel, "skel_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.update_outer_map); + err = bpf_prog_test_run_opts(prog_fd, &opts); + ASSERT_EQ(err, 0, "err"); + ASSERT_EQ(opts.retval, 1, "retval"); +cleanup: + syscall__destroy(skel); +} + +void test_syscall(void) +{ + if (test__start_subtest("load_prog")) + test_syscall_load_prog(); + if (test__start_subtest("update_outer_map")) + test_syscall_update_outer_map(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c index d149ab98798d..2b3c6dd66259 100644 --- a/tools/testing/selftests/bpf/prog_tests/test_tunnel.c +++ b/tools/testing/selftests/bpf/prog_tests/test_tunnel.c @@ -50,6 +50,7 @@ */ #include <arpa/inet.h> +#include <linux/if_link.h> #include <linux/if_tun.h> #include <linux/limits.h> #include <linux/sysctl.h> @@ -92,6 +93,11 @@ #define IPIP_TUNL_DEV0 "ipip00" #define IPIP_TUNL_DEV1 "ipip11" +#define XFRM_AUTH "0x1111111111111111111111111111111111111111" +#define XFRM_ENC "0x22222222222222222222222222222222" +#define XFRM_SPI_IN_TO_OUT 0x1 +#define XFRM_SPI_OUT_TO_IN 0x2 + #define PING_ARGS "-i 0.01 -c 3 -w 10 -q" static int config_device(void) @@ -264,6 +270,92 @@ static void delete_ipip_tunnel(void) SYS_NOFAIL("ip fou del port 5555 2> /dev/null"); } +static int add_xfrm_tunnel(void) +{ + /* at_ns0 namespace + * at_ns0 -> root + */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel replay-window 42 " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip netns exec at_ns0 " + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip netns exec at_ns0 ip addr add dev veth0 %s/32", + IP4_ADDR_TUNL_DEV0); + SYS(fail, "ip netns exec at_ns0 ip route add %s dev veth0 via %s src %s", + IP4_ADDR_TUNL_DEV1, IP4_ADDR1_VETH1, IP4_ADDR_TUNL_DEV0); + + /* root namespace + * at_ns0 -> root + */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 1 mode tunnel replay-window 42 " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir in " + "tmpl src %s dst %s proto esp reqid 1 " + "mode tunnel", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1, IP4_ADDR_VETH0, IP4_ADDR1_VETH1); + + /* root -> at_ns0 */ + SYS(fail, + "ip xfrm state add src %s dst %s proto esp " + "spi %d reqid 2 mode tunnel " + "auth-trunc 'hmac(sha1)' %s 96 enc 'cbc(aes)' %s", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN, XFRM_AUTH, XFRM_ENC); + SYS(fail, + "ip xfrm policy add src %s/32 dst %s/32 dir out " + "tmpl src %s dst %s proto esp reqid 2 " + "mode tunnel", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0, IP4_ADDR1_VETH1, IP4_ADDR_VETH0); + + /* address & route */ + SYS(fail, "ip addr add dev veth1 %s/32", IP4_ADDR_TUNL_DEV1); + SYS(fail, "ip route add %s dev veth1 via %s src %s", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_VETH0, IP4_ADDR_TUNL_DEV1); + + return 0; +fail: + return -1; +} + +static void delete_xfrm_tunnel(void) +{ + SYS_NOFAIL("ip xfrm policy delete dir out src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV1, IP4_ADDR_TUNL_DEV0); + SYS_NOFAIL("ip xfrm policy delete dir in src %s/32 dst %s/32 2> /dev/null", + IP4_ADDR_TUNL_DEV0, IP4_ADDR_TUNL_DEV1); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR_VETH0, IP4_ADDR1_VETH1, XFRM_SPI_IN_TO_OUT); + SYS_NOFAIL("ip xfrm state delete src %s dst %s proto esp spi %d 2> /dev/null", + IP4_ADDR1_VETH1, IP4_ADDR_VETH0, XFRM_SPI_OUT_TO_IN); +} + static int test_ping(int family, const char *addr) { SYS(fail, "%s %s %s > /dev/null", ping_command(family), PING_ARGS, addr); @@ -532,25 +624,85 @@ done: test_tunnel_kern__destroy(skel); } +static void test_xfrm_tunnel(void) +{ + DECLARE_LIBBPF_OPTS(bpf_tc_hook, tc_hook, + .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_xdp_attach_opts, opts); + struct test_tunnel_kern *skel = NULL; + struct nstoken *nstoken; + int xdp_prog_fd; + int tc_prog_fd; + int ifindex; + int err; + + err = add_xfrm_tunnel(); + if (!ASSERT_OK(err, "add_xfrm_tunnel")) + return; + + skel = test_tunnel_kern__open_and_load(); + if (!ASSERT_OK_PTR(skel, "test_tunnel_kern__open_and_load")) + goto done; + + ifindex = if_nametoindex("veth1"); + if (!ASSERT_NEQ(ifindex, 0, "veth1 ifindex")) + goto done; + + /* attach tc prog to tunnel dev */ + tc_hook.ifindex = ifindex; + tc_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state); + if (!ASSERT_GE(tc_prog_fd, 0, "bpf_program__fd")) + goto done; + if (attach_tc_prog(&tc_hook, tc_prog_fd, -1)) + goto done; + + /* attach xdp prog to tunnel dev */ + xdp_prog_fd = bpf_program__fd(skel->progs.xfrm_get_state_xdp); + if (!ASSERT_GE(xdp_prog_fd, 0, "bpf_program__fd")) + goto done; + err = bpf_xdp_attach(ifindex, xdp_prog_fd, XDP_FLAGS_REPLACE, &opts); + if (!ASSERT_OK(err, "bpf_xdp_attach")) + goto done; + + /* ping from at_ns0 namespace test */ + nstoken = open_netns("at_ns0"); + err = test_ping(AF_INET, IP4_ADDR_TUNL_DEV1); + close_netns(nstoken); + if (!ASSERT_OK(err, "test_ping")) + goto done; + + if (!ASSERT_EQ(skel->bss->xfrm_reqid, 1, "req_id")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_spi, XFRM_SPI_IN_TO_OUT, "spi")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_remote_ip, 0xac100164, "remote_ip")) + goto done; + if (!ASSERT_EQ(skel->bss->xfrm_replay_window, 42, "replay_window")) + goto done; + +done: + delete_xfrm_tunnel(); + if (skel) + test_tunnel_kern__destroy(skel); +} + #define RUN_TEST(name, ...) \ ({ \ if (test__start_subtest(#name)) { \ + config_device(); \ test_ ## name(__VA_ARGS__); \ + cleanup(); \ } \ }) static void *test_tunnel_run_tests(void *arg) { - cleanup(); - config_device(); - RUN_TEST(vxlan_tunnel); RUN_TEST(ip6vxlan_tunnel); RUN_TEST(ipip_tunnel, NONE); RUN_TEST(ipip_tunnel, FOU); RUN_TEST(ipip_tunnel, GUE); - - cleanup(); + RUN_TEST(xfrm_tunnel); return NULL; } diff --git a/tools/testing/selftests/bpf/prog_tests/time_tai.c b/tools/testing/selftests/bpf/prog_tests/time_tai.c index a31119823666..f45af1b0ef2c 100644 --- a/tools/testing/selftests/bpf/prog_tests/time_tai.c +++ b/tools/testing/selftests/bpf/prog_tests/time_tai.c @@ -56,7 +56,7 @@ void test_time_tai(void) ASSERT_NEQ(ts2, 0, "tai_ts2"); /* TAI is moving forward only */ - ASSERT_GT(ts2, ts1, "tai_forward"); + ASSERT_GE(ts2, ts1, "tai_forward"); /* Check for future */ ret = clock_gettime(CLOCK_TAI, &now_tai); diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c new file mode 100644 index 000000000000..b5dce630e0e1 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -0,0 +1,1031 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <test_progs.h> +#include <bpf/btf.h> +#include "cap_helpers.h" +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/unistd.h> +#include <linux/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/un.h> +#include "priv_map.skel.h" +#include "priv_prog.skel.h" +#include "dummy_st_ops_success.skel.h" + +static inline int sys_mount(const char *dev_name, const char *dir_name, + const char *type, unsigned long flags, + const void *data) +{ + return syscall(__NR_mount, dev_name, dir_name, type, flags, data); +} + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fspick(int dfd, const char *path, unsigned flags) +{ + return syscall(__NR_fspick, dfd, path, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); +} + +static int drop_priv_caps(__u64 *old_caps) +{ + return cap_disable_effective((1ULL << CAP_BPF) | + (1ULL << CAP_PERFMON) | + (1ULL << CAP_NET_ADMIN) | + (1ULL << CAP_SYS_ADMIN), old_caps); +} + +static int restore_priv_caps(__u64 old_caps) +{ + return cap_enable_effective(old_caps, NULL); +} + +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) +{ + char buf[32]; + int err; + + if (!mask_str) { + if (mask == ~0ULL) { + mask_str = "any"; + } else { + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + mask_str = buf; + } + } + + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, + mask_str, 0); + if (err < 0) + err = -errno; + return err; +} + +#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) + +struct bpffs_opts { + __u64 cmds; + __u64 maps; + __u64 progs; + __u64 attachs; + const char *cmds_str; + const char *maps_str; + const char *progs_str; + const char *attachs_str; +}; + +static int create_bpffs_fd(void) +{ + int fs_fd; + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + ASSERT_GE(fs_fd, 0, "fs_fd"); + + return fs_fd; +} + +static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) +{ + int mnt_fd, err; + + /* set up token delegation mount options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); + if (!ASSERT_OK(err, "fs_cfg_cmds")) + return err; + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); + if (!ASSERT_OK(err, "fs_cfg_maps")) + return err; + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); + if (!ASSERT_OK(err, "fs_cfg_progs")) + return err; + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); + if (!ASSERT_OK(err, "fs_cfg_attachs")) + return err; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (err < 0) + return -errno; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (err < 0) + return -errno; + + return mnt_fd; +} + +/* send FD over Unix domain (AF_UNIX) socket */ +static int sendfd(int sockfd, int fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1] = { fd }, err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + + err = sendmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "sendmsg")) + return -EINVAL; + + return 0; +} + +/* receive FD over Unix domain (AF_UNIX) socket */ +static int recvfd(int sockfd, int *fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1], err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + err = recvmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "recvmsg")) + return -EINVAL; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || + !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || + !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || + !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) + return -EINVAL; + + memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); + *fd = fds[0]; + + return 0; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +typedef int (*child_callback_fn)(int); + +static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1; + + /* setup userns with root mappings */ + err = create_and_enter_userns(); + if (!ASSERT_OK(err, "create_and_enter_userns")) + goto cleanup; + + /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "create_mountns")) + goto cleanup; + + err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (!ASSERT_OK(err, "remount_root")) + goto cleanup; + + fs_fd = create_bpffs_fd(); + if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, fs_fd); + if (!ASSERT_OK(err, "send_fs_fd")) + goto cleanup; + zclose(fs_fd); + + /* avoid mucking around with mount namespaces and mounting at + * well-known path, just get detach-mounted BPF FS fd back from parent + */ + err = recvfd(sock_fd, &mnt_fd); + if (!ASSERT_OK(err, "recv_mnt_fd")) + goto cleanup; + + /* try to fspick() BPF FS and try to add some delegation options */ + fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); + if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot reconfigure to set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); + if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { + err = -EINVAL; + goto cleanup; + } + + /* do custom test logic with customly set up BPF FS instance */ + err = callback(bpffs_fd); + if (!ASSERT_OK(err, "test_callback")) + goto cleanup; + + err = 0; +cleanup: + zclose(sock_fd); + zclose(mnt_fd); + zclose(fs_fd); + zclose(bpffs_fd); + + exit(-err); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) +{ + int fs_fd = -1, mnt_fd = -1, err; + + err = recvfd(sock_fd, &fs_fd); + if (!ASSERT_OK(err, "recv_bpffs_fd")) + goto cleanup; + + mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, mnt_fd); + if (!ASSERT_OK(err, "send_mnt_fd")) + goto cleanup; + zclose(mnt_fd); + + err = wait_for_pid(child_pid); + ASSERT_OK(err, "waitpid_child"); + +cleanup: + zclose(sock_fd); + zclose(fs_fd); + zclose(mnt_fd); + + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static void subtest_userns(struct bpffs_opts *bpffs_opts, child_callback_fn cb) +{ + int sock_fds[2] = { -1, -1 }; + int child_pid = 0, err; + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); + if (!ASSERT_OK(err, "socketpair")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GE(child_pid, 0, "fork")) + goto cleanup; + + if (child_pid == 0) { + zclose(sock_fds[0]); + return child(sock_fds[1], bpffs_opts, cb); + + } else { + zclose(sock_fds[1]); + return parent(child_pid, bpffs_opts, sock_fds[0]); + } + +cleanup: + zclose(sock_fds[0]); + zclose(sock_fds[1]); + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static int userns_map_create(int mnt_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int err, token_fd = -1, map_fd = -1; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no token, no CAP_BPF -> fail */ + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* token without CAP_BPF -> fail */ + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* CAP_BPF without token -> fail */ + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* finally, namespaced CAP_BPF + token -> success */ + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { + err = -EINVAL; + goto cleanup; + } + +cleanup: + zclose(token_fd); + zclose(map_fd); + return err; +} + +static int userns_btf_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); + int err, token_fd = -1, btf_fd = -1; + const void *raw_btf_data; + struct btf *btf = NULL; + __u32 raw_btf_size; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* setup a trivial BTF data to load to the kernel */ + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + goto cleanup; + + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) + goto cleanup; + + /* no token + no CAP_BPF -> failure */ + btf_opts.token_fd = 0; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) + goto cleanup; + + /* token + no CAP_BPF -> failure */ + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) + goto cleanup; + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* token + CAP_BPF -> success */ + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) + goto cleanup; + + err = 0; +cleanup: + btf__free(btf); + zclose(btf_fd); + zclose(token_fd); + return err; +} + +static int userns_prog_load(int mnt_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + int err, token_fd = -1, prog_fd = -1; + struct bpf_insn insns[] = { + /* bpf_jiffies64() requires CAP_BPF */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + /* bpf_get_current_task() requires CAP_PERFMON */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), + /* r0 = 0; exit; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* validate we can successfully load BPF program with token; this + * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) + * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have + * BPF token wired properly in a bunch of places in the kernel + */ + prog_opts.token_fd = token_fd; + prog_opts.expected_attach_type = BPF_XDP; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { + err = -EPERM; + goto cleanup; + } + + /* no token + caps -> failure */ + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no caps + token -> failure */ + prog_opts.token_fd = token_fd; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + /* no caps + no token -> definitely a failure */ + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = 0; +cleanup: + zclose(prog_fd); + zclose(token_fd); + return err; +} + +static int userns_obj_priv_map(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_map *skel; + int err, token_fd; + + skel = priv_map__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_map__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + /* create token and pass it through bpf_token_fd */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "create_token")) + return -EINVAL; + + opts.bpf_token_path = NULL; + opts.bpf_token_fd = token_fd; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_fd_open")) + return -EINVAL; + + /* we can close our token FD, bpf_object owns dup()'ed FD now */ + close(token_fd); + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_fd_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_prog(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_prog *skel; + int err; + + skel = priv_prog__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_prog__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, + * which should cause struct_ops application to fail, as BTF won't be uploaded + * into the kernel, even if STRUCT_OPS programs themselves are allowed + */ +static int validate_struct_ops_load(int mnt_fd, bool expect_success) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct dummy_st_ops_success *skel; + int err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (expect_success) { + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + } else /* expect failure */ { + if (!ASSERT_ERR(err, "obj_token_path_load")) + return -EINVAL; + } + + return 0; +} + +static int userns_obj_priv_btf_fail(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, false /* should fail */); +} + +static int userns_obj_priv_btf_success(int mnt_fd) +{ + return validate_struct_ops_load(mnt_fd, true /* should succeed */); +} + +#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" +#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" + +static int userns_obj_priv_implicit_token(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF + * token automatically and implicitly + */ + err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + return -EINVAL; + + /* disable implicit BPF token creation by setting + * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail + */ + err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + return -EINVAL; + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { + unsetenv(TOKEN_ENVVAR); + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + unsetenv(TOKEN_ENVVAR); + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from /sys/fs/bpf mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + return -EINVAL; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, should fail */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + return -EINVAL; + + /* now disable implicit token through negative bpf_token_fd, should fail */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_implicit_token_envvar(int mnt_fd) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over custom location, so libbpf can't create + * BPF token implicitly, unless pointed to it through + * LIBBPF_BPF_TOKEN_PATH envvar + */ + rmdir(TOKEN_BPFFS_CUSTOM); + if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + goto err_out; + err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + goto err_out; + + /* even though we have BPF FS with delegation, it's not at default + * /sys/fs/bpf location, so we still fail to load until envvar is set up + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { + dummy_st_ops_success__destroy(skel); + goto err_out; + } + + err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + goto err_out; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from custom mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + goto err_out; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + goto err_out; + + /* now disable implicit token through negative bpf_token_fd, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = NULL; + opts.bpf_token_fd = -1; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_neg_token_fd_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_neg_token_fd_load")) + goto err_out; + + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return 0; +err_out: + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return -EINVAL; +} + +#define bit(n) (1ULL << (n)) + +void test_token(void) +{ + if (test__start_subtest("map_token")) { + struct bpffs_opts opts = { + .cmds_str = "map_create", + .maps_str = "stack", + }; + + subtest_userns(&opts, userns_map_create); + } + if (test__start_subtest("btf_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_BTF_LOAD, + }; + + subtest_userns(&opts, userns_btf_load); + } + if (test__start_subtest("prog_token")) { + struct bpffs_opts opts = { + .cmds_str = "PROG_LOAD", + .progs_str = "XDP", + .attachs_str = "xdp", + }; + + subtest_userns(&opts, userns_prog_load); + } + if (test__start_subtest("obj_priv_map")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .maps = bit(BPF_MAP_TYPE_QUEUE), + }; + + subtest_userns(&opts, userns_obj_priv_map); + } + if (test__start_subtest("obj_priv_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_KPROBE), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_prog); + } + if (test__start_subtest("obj_priv_btf_fail")) { + struct bpffs_opts opts = { + /* disallow BTF loading */ + .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_fail); + } + if (test__start_subtest("obj_priv_btf_success")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_success); + } + if (test__start_subtest("obj_priv_implicit_token")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token); + } + if (test__start_subtest("obj_priv_implicit_token_envvar")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c index ece260cf2c0b..8269cdee33ae 100644 --- a/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c +++ b/tools/testing/selftests/bpf/prog_tests/uprobe_multi_test.c @@ -234,6 +234,177 @@ static void test_attach_api_syms(void) test_attach_api("/proc/self/exe", NULL, &opts); } +static void test_attach_api_fails(void) +{ + LIBBPF_OPTS(bpf_link_create_opts, opts); + const char *path = "/proc/self/exe"; + struct uprobe_multi *skel = NULL; + int prog_fd, link_fd = -1; + unsigned long offset = 0; + + skel = uprobe_multi__open_and_load(); + if (!ASSERT_OK_PTR(skel, "uprobe_multi__open_and_load")) + goto cleanup; + + prog_fd = bpf_program__fd(skel->progs.uprobe_extra); + + /* abnormal cnt */ + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = &offset; + opts.uprobe_multi.cnt = INT_MAX; + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -E2BIG, "big cnt")) + goto cleanup; + + /* cnt is 0 */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "cnt_is_zero")) + goto cleanup; + + /* negative offset */ + offset = -1; + opts.uprobe_multi.path = path; + opts.uprobe_multi.offsets = (unsigned long *) &offset; + opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offset_is_negative")) + goto cleanup; + + /* offsets is NULL */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "offsets_is_null")) + goto cleanup; + + /* wrong offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "offsets_is_wrong")) + goto cleanup; + + /* path is NULL */ + offset = 1; + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "path_is_null")) + goto cleanup; + + /* wrong path pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = (const char *) 1, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "path_is_wrong")) + goto cleanup; + + /* wrong path type */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = "/", + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EBADF, "path_is_wrong_type")) + goto cleanup; + + /* wrong cookies pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) 1ULL, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "cookies_is_wrong")) + goto cleanup; + + /* wrong ref_ctr_offsets pointer */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cookies = (__u64 *) &offset, + .uprobe_multi.ref_ctr_offsets = (unsigned long *) 1, + .uprobe_multi.cnt = 1, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EFAULT, "ref_ctr_offsets_is_wrong")) + goto cleanup; + + /* wrong flags */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.flags = 1 << 31, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + if (!ASSERT_EQ(link_fd, -EINVAL, "wrong_flags")) + goto cleanup; + + /* wrong pid */ + LIBBPF_OPTS_RESET(opts, + .uprobe_multi.path = path, + .uprobe_multi.offsets = (unsigned long *) &offset, + .uprobe_multi.cnt = 1, + .uprobe_multi.pid = -2, + ); + + link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_UPROBE_MULTI, &opts); + if (!ASSERT_ERR(link_fd, "link_fd")) + goto cleanup; + ASSERT_EQ(link_fd, -ESRCH, "pid_is_wrong"); + +cleanup: + if (link_fd >= 0) + close(link_fd); + uprobe_multi__destroy(skel); +} + static void __test_link_api(struct child *child) { int prog_fd, link1_fd = -1, link2_fd = -1, link3_fd = -1, link4_fd = -1; @@ -311,7 +482,7 @@ cleanup: free(offsets); } -void test_link_api(void) +static void test_link_api(void) { struct child *child; @@ -412,4 +583,6 @@ void test_uprobe_multi_test(void) test_bench_attach_uprobe(); if (test__start_subtest("bench_usdt")) test_bench_attach_usdt(); + if (test__start_subtest("attach_api_fails")) + test_attach_api_fails(); } diff --git a/tools/testing/selftests/bpf/prog_tests/verifier.c b/tools/testing/selftests/bpf/prog_tests/verifier.c index 8d746642cbd7..ac49ec25211d 100644 --- a/tools/testing/selftests/bpf/prog_tests/verifier.c +++ b/tools/testing/selftests/bpf/prog_tests/verifier.c @@ -6,6 +6,7 @@ #include "verifier_and.skel.h" #include "verifier_array_access.skel.h" #include "verifier_basic_stack.skel.h" +#include "verifier_bitfield_write.skel.h" #include "verifier_bounds.skel.h" #include "verifier_bounds_deduction.skel.h" #include "verifier_bounds_deduction_non_const.skel.h" @@ -116,6 +117,7 @@ static void run_tests_aux(const char *skel_name, void test_verifier_and(void) { RUN(verifier_and); } void test_verifier_basic_stack(void) { RUN(verifier_basic_stack); } +void test_verifier_bitfield_write(void) { RUN(verifier_bitfield_write); } void test_verifier_bounds(void) { RUN(verifier_bounds); } void test_verifier_bounds_deduction(void) { RUN(verifier_bounds_deduction); } void test_verifier_bounds_deduction_non_const(void) { RUN(verifier_bounds_deduction_non_const); } diff --git a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c index dd7f2bc70048..ab0f02faa80c 100644 --- a/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/prog_tests/verify_pkcs7_sig.c @@ -16,9 +16,12 @@ #include <sys/wait.h> #include <sys/mman.h> #include <linux/keyctl.h> +#include <sys/xattr.h> +#include <linux/fsverity.h> #include <test_progs.h> #include "test_verify_pkcs7_sig.skel.h" +#include "test_sig_in_xattr.skel.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 @@ -26,6 +29,10 @@ #define VERIFY_USE_SECONDARY_KEYRING (1UL) #define VERIFY_USE_PLATFORM_KEYRING (2UL) +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + /* In stripped ARM and x86-64 modules, ~ is surprisingly rare. */ #define MODULE_SIG_STRING "~Module signature appended~\n" @@ -254,7 +261,7 @@ out: return ret; } -void test_verify_pkcs7_sig(void) +static void test_verify_pkcs7_sig_from_map(void) { libbpf_print_fn_t old_print_cb; char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; @@ -400,3 +407,159 @@ close_prog: skel->bss->monitored_pid = 0; test_verify_pkcs7_sig__destroy(skel); } + +static int get_signature_size(const char *sig_path) +{ + struct stat st; + + if (stat(sig_path, &st) == -1) + return -1; + + return st.st_size; +} + +static int add_signature_to_xattr(const char *data_path, const char *sig_path) +{ + char sig[MAX_SIG_SIZE] = {0}; + int fd, size, ret; + + if (sig_path) { + fd = open(sig_path, O_RDONLY); + if (fd < 0) + return -1; + + size = read(fd, sig, MAX_SIG_SIZE); + close(fd); + if (size <= 0) + return -1; + } else { + /* no sig_path, just write 32 bytes of zeros */ + size = 32; + } + ret = setxattr(data_path, "user.sig", sig, size, 0); + if (!ASSERT_OK(ret, "setxattr")) + return -1; + + return 0; +} + +static int test_open_file(struct test_sig_in_xattr *skel, char *data_path, + pid_t pid, bool should_success, char *name) +{ + int ret; + + skel->bss->monitored_pid = pid; + ret = open(data_path, O_RDONLY); + close(ret); + skel->bss->monitored_pid = 0; + + if (should_success) { + if (!ASSERT_GE(ret, 0, name)) + return -1; + } else { + if (!ASSERT_LT(ret, 0, name)) + return -1; + } + return 0; +} + +static void test_pkcs7_sig_fsverity(void) +{ + char data_path[PATH_MAX]; + char sig_path[PATH_MAX]; + char tmp_dir_template[] = "/tmp/verify_sigXXXXXX"; + char *tmp_dir; + struct test_sig_in_xattr *skel = NULL; + pid_t pid; + int ret; + + tmp_dir = mkdtemp(tmp_dir_template); + if (!ASSERT_OK_PTR(tmp_dir, "mkdtemp")) + return; + + snprintf(data_path, PATH_MAX, "%s/data-file", tmp_dir); + snprintf(sig_path, PATH_MAX, "%s/sig-file", tmp_dir); + + ret = _run_setup_process(tmp_dir, "setup"); + if (!ASSERT_OK(ret, "_run_setup_process")) + goto out; + + ret = _run_setup_process(tmp_dir, "fsverity-create-sign"); + + if (ret) { + printf("%s: SKIP: fsverity [sign|enable] doesn't work.\n" + "To run this test, try enable CONFIG_FS_VERITY and enable FSVerity for the filesystem.\n", + __func__); + test__skip(); + goto out; + } + + skel = test_sig_in_xattr__open(); + if (!ASSERT_OK_PTR(skel, "test_sig_in_xattr__open")) + goto out; + ret = get_signature_size(sig_path); + if (!ASSERT_GT(ret, 0, "get_signature_size")) + goto out; + skel->bss->sig_size = ret; + skel->bss->user_keyring_serial = syscall(__NR_request_key, "keyring", + "ebpf_testing_keyring", NULL, + KEY_SPEC_SESSION_KEYRING); + memcpy(skel->bss->digest, "FSVerity", 8); + + ret = test_sig_in_xattr__load(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__load")) + goto out; + + ret = test_sig_in_xattr__attach(skel); + if (!ASSERT_OK(ret, "test_sig_in_xattr__attach")) + goto out; + + pid = getpid(); + + /* Case 1: fsverity is not enabled, open should succeed */ + if (test_open_file(skel, data_path, pid, true, "open_1")) + goto out; + + /* Case 2: fsverity is enabled, xattr is missing, open should + * fail + */ + ret = _run_setup_process(tmp_dir, "fsverity-enable"); + if (!ASSERT_OK(ret, "fsverity-enable")) + goto out; + if (test_open_file(skel, data_path, pid, false, "open_2")) + goto out; + + /* Case 3: fsverity is enabled, xattr has valid signature, open + * should succeed + */ + ret = add_signature_to_xattr(data_path, sig_path); + if (!ASSERT_OK(ret, "add_signature_to_xattr_1")) + goto out; + + if (test_open_file(skel, data_path, pid, true, "open_3")) + goto out; + + /* Case 4: fsverity is enabled, xattr has invalid signature, open + * should fail + */ + ret = add_signature_to_xattr(data_path, NULL); + if (!ASSERT_OK(ret, "add_signature_to_xattr_2")) + goto out; + test_open_file(skel, data_path, pid, false, "open_4"); + +out: + _run_setup_process(tmp_dir, "cleanup"); + if (!skel) + return; + + skel->bss->monitored_pid = 0; + test_sig_in_xattr__destroy(skel); +} + +void test_verify_pkcs7_sig(void) +{ + if (test__start_subtest("pkcs7_sig_from_map")) + test_verify_pkcs7_sig_from_map(); + if (test__start_subtest("pkcs7_sig_fsverity")) + test_pkcs7_sig_fsverity(); +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c index ab4952b9fb1d..e6a783c7f5db 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_context_test_run.c @@ -77,8 +77,8 @@ void test_xdp_context_test_run(void) test_xdp_context_error(prog_fd, opts, 4, sizeof(__u32), sizeof(data), 0, 0, 0); - /* Meta data must be 32 bytes or smaller */ - test_xdp_context_error(prog_fd, opts, 0, 36, sizeof(data), 0, 0, 0); + /* Meta data must be 255 bytes or smaller */ + test_xdp_context_error(prog_fd, opts, 0, 256, sizeof(data), 0, 0, 0); /* Total size of data must match data_end - data_meta */ test_xdp_context_error(prog_fd, opts, 0, sizeof(__u32), diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c index 33cdf88efa6b..05edcf32f528 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c +++ b/tools/testing/selftests/bpf/prog_tests/xdp_metadata.c @@ -20,7 +20,7 @@ #define UDP_PAYLOAD_BYTES 4 -#define AF_XDP_SOURCE_PORT 1234 +#define UDP_SOURCE_PORT 1234 #define AF_XDP_CONSUMER_PORT 8080 #define UMEM_NUM 16 @@ -33,6 +33,18 @@ #define RX_ADDR "10.0.0.2" #define PREFIX_LEN "8" #define FAMILY AF_INET +#define TX_NETNS_NAME "xdp_metadata_tx" +#define RX_NETNS_NAME "xdp_metadata_rx" +#define TX_MAC "00:00:00:00:00:01" +#define RX_MAC "00:00:00:00:00:02" + +#define VLAN_ID 59 +#define VLAN_PROTO "802.1Q" +#define VLAN_PID htons(ETH_P_8021Q) +#define TX_NAME_VLAN TX_NAME "." TO_STR(VLAN_ID) + +#define XDP_RSS_TYPE_L4 BIT(3) +#define VLAN_VID_MASK 0xfff struct xsk { void *umem_area; @@ -181,7 +193,7 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) ASSERT_EQ(inet_pton(FAMILY, RX_ADDR, &iph->daddr), 1, "inet_pton(RX_ADDR)"); ip_csum(iph); - udph->source = htons(AF_XDP_SOURCE_PORT); + udph->source = htons(UDP_SOURCE_PORT); udph->dest = htons(dst_port); udph->len = htons(sizeof(*udph) + UDP_PAYLOAD_BYTES); udph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, @@ -204,6 +216,30 @@ static int generate_packet(struct xsk *xsk, __u16 dst_port) return 0; } +static int generate_packet_inet(void) +{ + char udp_payload[UDP_PAYLOAD_BYTES]; + struct sockaddr_in rx_addr; + int sock_fd, err = 0; + + /* Build a packet */ + memset(udp_payload, 0xAA, UDP_PAYLOAD_BYTES); + rx_addr.sin_addr.s_addr = inet_addr(RX_ADDR); + rx_addr.sin_family = AF_INET; + rx_addr.sin_port = htons(AF_XDP_CONSUMER_PORT); + + sock_fd = socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP); + if (!ASSERT_GE(sock_fd, 0, "socket(AF_INET, SOCK_DGRAM, IPPROTO_UDP)")) + return sock_fd; + + err = sendto(sock_fd, udp_payload, UDP_PAYLOAD_BYTES, MSG_DONTWAIT, + (void *)&rx_addr, sizeof(rx_addr)); + ASSERT_GE(err, 0, "sendto"); + + close(sock_fd); + return err; +} + static void complete_tx(struct xsk *xsk) { struct xsk_tx_metadata *meta; @@ -236,7 +272,7 @@ static void refill_rx(struct xsk *xsk, __u64 addr) } } -static int verify_xsk_metadata(struct xsk *xsk) +static int verify_xsk_metadata(struct xsk *xsk, bool sent_from_af_xdp) { const struct xdp_desc *rx_desc; struct pollfd fds = {}; @@ -290,17 +326,42 @@ static int verify_xsk_metadata(struct xsk *xsk) if (!ASSERT_NEQ(meta->rx_hash, 0, "rx_hash")) return -1; + if (!sent_from_af_xdp) { + if (!ASSERT_NEQ(meta->rx_hash_type & XDP_RSS_TYPE_L4, 0, "rx_hash_type")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_tci & VLAN_VID_MASK, VLAN_ID, "rx_vlan_tci")) + return -1; + + if (!ASSERT_EQ(meta->rx_vlan_proto, VLAN_PID, "rx_vlan_proto")) + return -1; + goto done; + } + ASSERT_EQ(meta->rx_hash_type, 0, "rx_hash_type"); /* checksum offload */ ASSERT_EQ(udph->check, htons(0x721c), "csum"); +done: xsk_ring_cons__release(&xsk->rx, 1); refill_rx(xsk, comp_addr); return 0; } +static void switch_ns_to_rx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(RX_NETNS_NAME); +} + +static void switch_ns_to_tx(struct nstoken **tok) +{ + close_netns(*tok); + *tok = open_netns(TX_NETNS_NAME); +} + void test_xdp_metadata(void) { struct xdp_metadata2 *bpf_obj2 = NULL; @@ -318,27 +379,35 @@ void test_xdp_metadata(void) int sock_fd; int ret; - /* Setup new networking namespace, with a veth pair. */ + /* Setup new networking namespaces, with a veth pair. */ + SYS(out, "ip netns add " TX_NETNS_NAME); + SYS(out, "ip netns add " RX_NETNS_NAME); - SYS(out, "ip netns add xdp_metadata"); - tok = open_netns("xdp_metadata"); + tok = open_netns(TX_NETNS_NAME); SYS(out, "ip link add numtxqueues 1 numrxqueues 1 " TX_NAME " type veth peer " RX_NAME " numtxqueues 1 numrxqueues 1"); - SYS(out, "ip link set dev " TX_NAME " address 00:00:00:00:00:01"); - SYS(out, "ip link set dev " RX_NAME " address 00:00:00:00:00:02"); + SYS(out, "ip link set " RX_NAME " netns " RX_NETNS_NAME); + + SYS(out, "ip link set dev " TX_NAME " address " TX_MAC); SYS(out, "ip link set dev " TX_NAME " up"); + + SYS(out, "ip link add link " TX_NAME " " TX_NAME_VLAN + " type vlan proto " VLAN_PROTO " id " TO_STR(VLAN_ID)); + SYS(out, "ip link set dev " TX_NAME_VLAN " up"); + SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME_VLAN); + + /* Avoid ARP calls */ + SYS(out, "ip -4 neigh add " RX_ADDR " lladdr " RX_MAC " dev " TX_NAME_VLAN); + + switch_ns_to_rx(&tok); + + SYS(out, "ip link set dev " RX_NAME " address " RX_MAC); SYS(out, "ip link set dev " RX_NAME " up"); - SYS(out, "ip addr add " TX_ADDR "/" PREFIX_LEN " dev " TX_NAME); SYS(out, "ip addr add " RX_ADDR "/" PREFIX_LEN " dev " RX_NAME); rx_ifindex = if_nametoindex(RX_NAME); - tx_ifindex = if_nametoindex(TX_NAME); - - /* Setup separate AF_XDP for TX and RX interfaces. */ - ret = open_xsk(tx_ifindex, &tx_xsk); - if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) - goto out; + /* Setup separate AF_XDP for RX interface. */ ret = open_xsk(rx_ifindex, &rx_xsk); if (!ASSERT_OK(ret, "open_xsk(RX_NAME)")) @@ -379,18 +448,38 @@ void test_xdp_metadata(void) if (!ASSERT_GE(ret, 0, "bpf_map_update_elem")) goto out; - /* Send packet destined to RX AF_XDP socket. */ + switch_ns_to_tx(&tok); + + /* Setup separate AF_XDP for TX interface nad send packet to the RX socket. */ + tx_ifindex = if_nametoindex(TX_NAME); + ret = open_xsk(tx_ifindex, &tx_xsk); + if (!ASSERT_OK(ret, "open_xsk(TX_NAME)")) + goto out; + if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate AF_XDP_CONSUMER_PORT")) goto out; - /* Verify AF_XDP RX packet has proper metadata. */ - if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk), 0, + switch_ns_to_rx(&tok); + + /* Verify packet sent from AF_XDP has proper metadata. */ + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, true), 0, "verify_xsk_metadata")) goto out; + switch_ns_to_tx(&tok); complete_tx(&tx_xsk); + /* Now check metadata of packet, generated with network stack */ + if (!ASSERT_GE(generate_packet_inet(), 0, "generate UDP packet")) + goto out; + + switch_ns_to_rx(&tok); + + if (!ASSERT_GE(verify_xsk_metadata(&rx_xsk, false), 0, + "verify_xsk_metadata")) + goto out; + /* Make sure freplace correctly picks up original bound device * and doesn't crash. */ @@ -408,11 +497,15 @@ void test_xdp_metadata(void) if (!ASSERT_OK(xdp_metadata2__attach(bpf_obj2), "attach freplace")) goto out; + switch_ns_to_tx(&tok); + /* Send packet to trigger . */ if (!ASSERT_GE(generate_packet(&tx_xsk, AF_XDP_CONSUMER_PORT), 0, "generate freplace packet")) goto out; + switch_ns_to_rx(&tok); + while (!retries--) { if (bpf_obj2->bss->called) break; @@ -427,5 +520,6 @@ out: xdp_metadata__destroy(bpf_obj); if (tok) close_netns(tok); - SYS_NOFAIL("ip netns del xdp_metadata"); + SYS_NOFAIL("ip netns del " RX_NETNS_NAME); + SYS_NOFAIL("ip netns del " TX_NETNS_NAME); } diff --git a/tools/testing/selftests/bpf/progs/access_map_in_map.c b/tools/testing/selftests/bpf/progs/access_map_in_map.c new file mode 100644 index 000000000000..1126871c2ebd --- /dev/null +++ b/tools/testing/selftests/bpf/progs/access_map_in_map.c @@ -0,0 +1,93 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <linux/bpf.h> +#include <time.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_htab_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int tgid = 0; + +static int acc_map_in_map(void *outer_map) +{ + int i, key, value = 0xdeadbeef; + void *inner_map; + + if ((bpf_get_current_pid_tgid() >> 32) != tgid) + return 0; + + /* Find nonexistent inner map */ + key = 1; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (inner_map) + return 0; + + /* Find the old inner map */ + key = 0; + inner_map = bpf_map_lookup_elem(outer_map, &key); + if (!inner_map) + return 0; + + /* Wait for the old inner map to be replaced */ + for (i = 0; i < 2048; i++) + bpf_map_update_elem(inner_map, &key, &value, 0); + + return 0; +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_array(void *ctx) +{ + return acc_map_in_map(&outer_array_map); +} + +SEC("?kprobe/" SYS_PREFIX "sys_getpgid") +int access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int sleepable_access_map_in_htab(void *ctx) +{ + return acc_map_in_map(&outer_htab_map); +} diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 799fff4995d8..2fd59970c43a 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -71,6 +71,7 @@ #define __retval_unpriv(val) __attribute__((btf_decl_tag("comment:test_retval_unpriv="#val))) #define __auxiliary __attribute__((btf_decl_tag("comment:test_auxiliary"))) #define __auxiliary_unpriv __attribute__((btf_decl_tag("comment:test_auxiliary_unpriv"))) +#define __btf_path(path) __attribute__((btf_decl_tag("comment:test_btf_path=" path))) /* Convenience macro for use with 'asm volatile' blocks */ #define __naked __attribute__((naked)) diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index 0b793a102791..1bdc680b0e0e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -26,6 +26,7 @@ #define IPV6_AUTOFLOWLABEL 70 #define TC_ACT_UNSPEC (-1) +#define TC_ACT_OK 0 #define TC_ACT_SHOT 2 #define SOL_TCP 6 diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c index a043d8fefdac..610c2427fd93 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_recursion.c @@ -21,50 +21,100 @@ struct { __type(value, long); } map_b SEC(".maps"); +int target_hid = 0; +bool is_cgroup1 = 0; + +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_lookup(struct cgroup *cgrp) +{ + bpf_cgrp_storage_delete(&map_a, cgrp); + bpf_cgrp_storage_delete(&map_b, cgrp); +} + SEC("fentry/bpf_local_storage_lookup") int BPF_PROG(on_lookup) { struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; - bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); - bpf_cgrp_storage_delete(&map_b, task->cgroups->dfl_cgrp); + __on_lookup(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_lookup(task->cgroups->dfl_cgrp); return 0; } -SEC("fentry/bpf_local_storage_update") -int BPF_PROG(on_update) +static void __on_update(struct cgroup *cgrp) { - struct task_struct *task = bpf_get_current_task_btf(); long *ptr; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr += 1; +} +SEC("fentry/bpf_local_storage_update") +int BPF_PROG(on_update) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_update(cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_update(task->cgroups->dfl_cgrp); return 0; } -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; - task = bpf_get_current_task_btf(); - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 200; - ptr = bpf_cgrp_storage_get(&map_b, task->cgroups->dfl_cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); + ptr = bpf_cgrp_storage_get(&map_b, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (ptr) *ptr = 100; +} + +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) +{ + struct task_struct *task = bpf_get_current_task_btf(); + struct cgroup *cgrp; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c index 4c7844e1dbfa..facedd8b8250 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_sleepable.c @@ -17,7 +17,11 @@ struct { __u32 target_pid; __u64 cgroup_id; +int target_hid; +bool is_cgroup1; +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; @@ -37,23 +41,50 @@ int cgroup_iter(struct bpf_iter__cgroup *ctx) return 0; } +static void __no_rcu_lock(struct cgroup *cgrp) +{ + long *ptr; + + /* Note that trace rcu is held in sleepable prog, so we can use + * bpf_cgrp_storage_get() in sleepable prog. + */ + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, + BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; +} + SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") -int no_rcu_lock(void *ctx) +int cgrp1_no_rcu_lock(void *ctx) { struct task_struct *task; struct cgroup *cgrp; - long *ptr; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + /* bpf_task_get_cgroup1 can work in sleepable prog */ + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __no_rcu_lock(cgrp); + bpf_cgroup_release(cgrp); + return 0; +} + +SEC("?fentry.s/" SYS_PREFIX "sys_getpgid") +int no_rcu_lock(void *ctx) +{ + struct task_struct *task; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; /* task->cgroups is untrusted in sleepable prog outside of RCU CS */ - cgrp = task->cgroups->dfl_cgrp; - ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, - BPF_LOCAL_STORAGE_GET_F_CREATE); - if (ptr) - cgroup_id = cgrp->kn->id; + __no_rcu_lock(task->cgroups->dfl_cgrp); return 0; } @@ -68,6 +99,22 @@ int yes_rcu_lock(void *ctx) if (task->pid != target_pid) return 0; + if (is_cgroup1) { + bpf_rcu_read_lock(); + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) { + bpf_rcu_read_unlock(); + return 0; + } + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); + if (ptr) + cgroup_id = cgrp->kn->id; + bpf_cgroup_release(cgrp); + bpf_rcu_read_unlock(); + return 0; + } + bpf_rcu_read_lock(); cgrp = task->cgroups->dfl_cgrp; /* cgrp is trusted under RCU CS */ diff --git a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c index 9ebb8e2fe541..1c348f000f38 100644 --- a/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c +++ b/tools/testing/selftests/bpf/progs/cgrp_ls_tp_btf.c @@ -27,62 +27,100 @@ pid_t target_pid = 0; int mismatch_cnt = 0; int enter_cnt = 0; int exit_cnt = 0; +int target_hid = 0; +bool is_cgroup1 = 0; -SEC("tp_btf/sys_enter") -int BPF_PROG(on_enter, struct pt_regs *regs, long id) +struct cgroup *bpf_task_get_cgroup1(struct task_struct *task, int hierarchy_id) __ksym; +void bpf_cgroup_release(struct cgroup *cgrp) __ksym; + +static void __on_enter(struct pt_regs *regs, long id, struct cgroup *cgrp) { - struct task_struct *task; long *ptr; int err; - task = bpf_get_current_task_btf(); - if (task->pid != target_pid) - return 0; - /* populate value 0 */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; /* delete value 0 */ - err = bpf_cgrp_storage_delete(&map_a, task->cgroups->dfl_cgrp); + err = bpf_cgrp_storage_delete(&map_a, cgrp); if (err) - return 0; + return; /* value is not available */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, 0); + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, 0); if (ptr) - return 0; + return; /* re-populate the value */ - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&enter_cnt, 1); *ptr = MAGIC_VALUE + enter_cnt; - - return 0; } -SEC("tp_btf/sys_exit") -int BPF_PROG(on_exit, struct pt_regs *regs, long id) +SEC("tp_btf/sys_enter") +int BPF_PROG(on_enter, struct pt_regs *regs, long id) { struct task_struct *task; - long *ptr; + struct cgroup *cgrp; task = bpf_get_current_task_btf(); if (task->pid != target_pid) return 0; - ptr = bpf_cgrp_storage_get(&map_a, task->cgroups->dfl_cgrp, 0, + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_enter(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_enter(regs, id, task->cgroups->dfl_cgrp); + return 0; +} + +static void __on_exit(struct pt_regs *regs, long id, struct cgroup *cgrp) +{ + long *ptr; + + ptr = bpf_cgrp_storage_get(&map_a, cgrp, 0, BPF_LOCAL_STORAGE_GET_F_CREATE); if (!ptr) - return 0; + return; __sync_fetch_and_add(&exit_cnt, 1); if (*ptr != MAGIC_VALUE + exit_cnt) __sync_fetch_and_add(&mismatch_cnt, 1); +} + +SEC("tp_btf/sys_exit") +int BPF_PROG(on_exit, struct pt_regs *regs, long id) +{ + struct task_struct *task; + struct cgroup *cgrp; + + task = bpf_get_current_task_btf(); + if (task->pid != target_pid) + return 0; + + if (is_cgroup1) { + cgrp = bpf_task_get_cgroup1(task, target_hid); + if (!cgrp) + return 0; + + __on_exit(regs, id, cgrp); + bpf_cgroup_release(cgrp); + return 0; + } + + __on_exit(regs, id, task->cgroups->dfl_cgrp); return 0; } diff --git a/tools/testing/selftests/bpf/progs/cpumask_common.h b/tools/testing/selftests/bpf/progs/cpumask_common.h index b15c588ace15..0cd4aebb97cf 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_common.h +++ b/tools/testing/selftests/bpf/progs/cpumask_common.h @@ -54,6 +54,7 @@ bool bpf_cpumask_full(const struct cpumask *cpumask) __ksym; void bpf_cpumask_copy(struct bpf_cpumask *dst, const struct cpumask *src) __ksym; u32 bpf_cpumask_any_distribute(const struct cpumask *src) __ksym; u32 bpf_cpumask_any_and_distribute(const struct cpumask *src1, const struct cpumask *src2) __ksym; +u32 bpf_cpumask_weight(const struct cpumask *cpumask) __ksym; void bpf_rcu_read_lock(void) __ksym; void bpf_rcu_read_unlock(void) __ksym; diff --git a/tools/testing/selftests/bpf/progs/cpumask_success.c b/tools/testing/selftests/bpf/progs/cpumask_success.c index 674a63424dee..fc3666edf456 100644 --- a/tools/testing/selftests/bpf/progs/cpumask_success.c +++ b/tools/testing/selftests/bpf/progs/cpumask_success.c @@ -461,6 +461,49 @@ int BPF_PROG(test_global_mask_rcu, struct task_struct *task, u64 clone_flags) } SEC("tp_btf/task_newtask") +int BPF_PROG(test_cpumask_weight, struct task_struct *task, u64 clone_flags) +{ + struct bpf_cpumask *local; + + if (!is_test_task()) + return 0; + + local = create_cpumask(); + if (!local) + return 0; + + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 3; + goto out; + } + + bpf_cpumask_set_cpu(0, local); + if (bpf_cpumask_weight(cast(local)) != 1) { + err = 4; + goto out; + } + + /* + * Make sure that adding additional CPUs changes the weight. Test to + * see whether the CPU was set to account for running on UP machines. + */ + bpf_cpumask_set_cpu(1, local); + if (bpf_cpumask_test_cpu(1, cast(local)) && bpf_cpumask_weight(cast(local)) != 2) { + err = 5; + goto out; + } + + bpf_cpumask_clear(local); + if (bpf_cpumask_weight(cast(local)) != 0) { + err = 6; + goto out; + } +out: + bpf_cpumask_release(local); + return 0; +} + +SEC("tp_btf/task_newtask") __success int BPF_PROG(test_refcount_null_tracking, struct task_struct *task, u64 clone_flags) { diff --git a/tools/testing/selftests/bpf/progs/exceptions_assert.c b/tools/testing/selftests/bpf/progs/exceptions_assert.c index 49efaed143fc..0ef81040da59 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_assert.c +++ b/tools/testing/selftests/bpf/progs/exceptions_assert.c @@ -125,7 +125,7 @@ int check_assert_generic(struct __sk_buff *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0)") +__failure __msg("At program exit the register R1 has smin=64 smax=64") int check_assert_with_return(void *ctx) { bpf_assert_with(!ctx, 64); diff --git a/tools/testing/selftests/bpf/progs/exceptions_fail.c b/tools/testing/selftests/bpf/progs/exceptions_fail.c index 8c0ef2742208..9cceb6521143 100644 --- a/tools/testing/selftests/bpf/progs/exceptions_fail.c +++ b/tools/testing/selftests/bpf/progs/exceptions_fail.c @@ -308,7 +308,7 @@ int reject_set_exception_cb_bad_ret1(void *ctx) } SEC("?fentry/bpf_check") -__failure __msg("At program exit the register R0 has value (0x40; 0x0) should") +__failure __msg("At program exit the register R1 has smin=64 smax=64 should") int reject_set_exception_cb_bad_ret2(void *ctx) { bpf_throw(64); diff --git a/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c new file mode 100644 index 000000000000..e6a75f86cac6 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/freplace_dead_global_func.c @@ -0,0 +1,11 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> + +SEC("freplace") +int freplace_prog(void) +{ + return 0; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index b2181f850d3e..3aca3dc145b5 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -846,7 +846,7 @@ __naked int delayed_precision_mark(void) "call %[bpf_iter_num_next];" "if r0 == 0 goto 2f;" "if r6 != 42 goto 3f;" - "r7 = -32;" + "r7 = -33;" "call %[bpf_get_prandom_u32];" "r6 = r0;" "goto 1b;\n" diff --git a/tools/testing/selftests/bpf/progs/local_kptr_stash.c b/tools/testing/selftests/bpf/progs/local_kptr_stash.c index 1769fdff6aea..75043ffc5dad 100644 --- a/tools/testing/selftests/bpf/progs/local_kptr_stash.c +++ b/tools/testing/selftests/bpf/progs/local_kptr_stash.c @@ -37,11 +37,18 @@ struct plain_local { long data; }; +struct local_with_root { + long key; + struct bpf_spin_lock l; + struct bpf_rb_root r __contains(node_data, node); +}; + struct map_value { struct prog_test_ref_kfunc *not_kptr; struct prog_test_ref_kfunc __kptr *val; struct node_data __kptr *node; struct plain_local __kptr *plain; + struct local_with_root __kptr *local_root; }; /* This is necessary so that LLVM generates BTF for node_data struct @@ -65,6 +72,17 @@ struct { __uint(max_entries, 2); } some_nodes SEC(".maps"); +static bool less(struct bpf_rb_node *a, const struct bpf_rb_node *b) +{ + struct node_data *node_a; + struct node_data *node_b; + + node_a = container_of(a, struct node_data, node); + node_b = container_of(b, struct node_data, node); + + return node_a->key < node_b->key; +} + static int create_and_stash(int idx, int val) { struct map_value *mapval; @@ -114,6 +132,41 @@ long stash_plain(void *ctx) } SEC("tc") +long stash_local_with_root(void *ctx) +{ + struct local_with_root *res; + struct map_value *mapval; + struct node_data *n; + int idx = 0; + + mapval = bpf_map_lookup_elem(&some_nodes, &idx); + if (!mapval) + return 1; + + res = bpf_obj_new(typeof(*res)); + if (!res) + return 2; + res->key = 41; + + n = bpf_obj_new(typeof(*n)); + if (!n) { + bpf_obj_drop(res); + return 3; + } + + bpf_spin_lock(&res->l); + bpf_rbtree_add(&res->r, &n->node, less); + bpf_spin_unlock(&res->l); + + res = bpf_kptr_xchg(&mapval->local_root, res); + if (res) { + bpf_obj_drop(res); + return 4; + } + return 0; +} + +SEC("tc") long unstash_rb_node(void *ctx) { struct map_value *mapval; diff --git a/tools/testing/selftests/bpf/progs/map_in_map_btf.c b/tools/testing/selftests/bpf/progs/map_in_map_btf.c new file mode 100644 index 000000000000..7a1336d7b16a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/map_in_map_btf.c @@ -0,0 +1,73 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct inner_array_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} inner_array SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); + __array(values, struct inner_array_type); +} outer_array SEC(".maps") = { + .values = { + [0] = &inner_array, + }, +}; + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_inner_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + struct bpf_map *map; + int zero = 0; + + if (done || (u32)bpf_get_current_pid_tgid() != pid) + return 0; + + map = bpf_map_lookup_elem(&outer_array, &zero); + if (!map) + return 0; + + value = bpf_map_lookup_elem(map, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/normal_map_btf.c b/tools/testing/selftests/bpf/progs/normal_map_btf.c new file mode 100644 index 000000000000..66cde82aa86d --- /dev/null +++ b/tools/testing/selftests/bpf/progs/normal_map_btf.c @@ -0,0 +1,56 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <vmlinux.h> +#include <bpf/bpf_tracing.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_misc.h" +#include "bpf_experimental.h" + +struct node_data { + __u64 data; + struct bpf_list_node node; +}; + +struct map_value { + struct bpf_list_head head __contains(node_data, node); + struct bpf_spin_lock lock; +}; + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct map_value); + __uint(max_entries, 1); +} array SEC(".maps"); + +char _license[] SEC("license") = "GPL"; + +int pid = 0; +bool done = false; + +SEC("fentry/" SYS_PREFIX "sys_nanosleep") +int add_to_list_in_array(void *ctx) +{ + struct map_value *value; + struct node_data *new; + int zero = 0; + + if (done || (u32)bpf_get_current_pid_tgid() != pid) + return 0; + + value = bpf_map_lookup_elem(&array, &zero); + if (!value) + return 0; + + new = bpf_obj_new(typeof(*new)); + if (!new) + return 0; + + bpf_spin_lock(&value->lock); + bpf_list_push_back(&value->head, &new->node); + bpf_spin_unlock(&value->lock); + done = true; + + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c new file mode 100644 index 000000000000..9085be50f03b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_map.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, 1); + __type(value, __u32); +} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c new file mode 100644 index 000000000000..3c7b2b618c8a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe") +int kprobe_prog(void *ctx) +{ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/syscall.c b/tools/testing/selftests/bpf/progs/syscall.c index e550f728962d..3d3cafdebe72 100644 --- a/tools/testing/selftests/bpf/progs/syscall.c +++ b/tools/testing/selftests/bpf/progs/syscall.c @@ -6,9 +6,15 @@ #include <bpf/bpf_tracing.h> #include <../../../tools/include/linux/filter.h> #include <linux/btf.h> +#include <string.h> +#include <errno.h> char _license[] SEC("license") = "GPL"; +struct bpf_map { + int id; +} __attribute__((preserve_access_index)); + struct args { __u64 log_buf; __u32 log_size; @@ -27,6 +33,37 @@ struct args { BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_INT, 0, 0), sz), \ BTF_INT_ENC(encoding, bits_offset, bits) +struct { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, union bpf_attr); + __uint(max_entries, 1); +} bpf_attr_array SEC(".maps"); + +struct inner_map_type { + __uint(type, BPF_MAP_TYPE_ARRAY); + __uint(key_size, 4); + __uint(value_size, 4); + __uint(max_entries, 1); +} inner_map SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __type(key, int); + __type(value, int); + __uint(max_entries, 1); + __array(values, struct inner_map_type); +} outer_array_map SEC(".maps") = { + .values = { + [0] = &inner_map, + }, +}; + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + static int btf_load(void) { struct btf_blob { @@ -58,7 +95,7 @@ static int btf_load(void) } SEC("syscall") -int bpf_prog(struct args *ctx) +int load_prog(struct args *ctx) { static char license[] = "GPL"; static struct bpf_insn insns[] = { @@ -94,8 +131,8 @@ int bpf_prog(struct args *ctx) map_create_attr.max_entries = ctx->max_entries; map_create_attr.btf_fd = ret; - prog_load_attr.license = (long) license; - prog_load_attr.insns = (long) insns; + prog_load_attr.license = ptr_to_u64(license); + prog_load_attr.insns = ptr_to_u64(insns); prog_load_attr.log_buf = ctx->log_buf; prog_load_attr.log_size = ctx->log_size; prog_load_attr.log_level = 1; @@ -107,8 +144,8 @@ int bpf_prog(struct args *ctx) insns[3].imm = ret; map_update_attr.map_fd = ret; - map_update_attr.key = (long) &key; - map_update_attr.value = (long) &value; + map_update_attr.key = ptr_to_u64(&key); + map_update_attr.value = ptr_to_u64(&value); ret = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, &map_update_attr, sizeof(map_update_attr)); if (ret < 0) return ret; @@ -119,3 +156,52 @@ int bpf_prog(struct args *ctx) ctx->prog_fd = ret; return 1; } + +SEC("syscall") +int update_outer_map(void *ctx) +{ + int zero = 0, ret = 0, outer_fd = -1, inner_fd = -1, err; + const int attr_sz = sizeof(union bpf_attr); + union bpf_attr *attr; + + attr = bpf_map_lookup_elem((struct bpf_map *)&bpf_attr_array, &zero); + if (!attr) + goto out; + + memset(attr, 0, attr_sz); + attr->map_id = ((struct bpf_map *)&outer_array_map)->id; + outer_fd = bpf_sys_bpf(BPF_MAP_GET_FD_BY_ID, attr, attr_sz); + if (outer_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_type = BPF_MAP_TYPE_ARRAY; + attr->key_size = 4; + attr->value_size = 4; + attr->max_entries = 1; + inner_fd = bpf_sys_bpf(BPF_MAP_CREATE, attr, attr_sz); + if (inner_fd < 0) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + attr->value = ptr_to_u64(&inner_fd); + err = bpf_sys_bpf(BPF_MAP_UPDATE_ELEM, attr, attr_sz); + if (err) + goto out; + + memset(attr, 0, attr_sz); + attr->map_fd = outer_fd; + attr->key = ptr_to_u64(&zero); + err = bpf_sys_bpf(BPF_MAP_DELETE_ELEM, attr, attr_sz); + if (err) + goto out; + ret = 1; +out: + if (inner_fd >= 0) + bpf_sys_close(inner_fd); + if (outer_fd >= 0) + bpf_sys_close(outer_fd); + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_fsverity.c b/tools/testing/selftests/bpf/progs/test_fsverity.c new file mode 100644 index 000000000000..3975495b75c8 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_fsverity.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ + +char expected_digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +char digest[SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; +__u32 monitored_pid; +__u32 got_fsverity; +__u32 digest_matches; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr; + __u32 pid; + int ret; + int i; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + ret = bpf_get_fsverity_digest(f, &digest_ptr); + if (ret < 0) + return 0; + got_fsverity = 1; + + for (i = 0; i < sizeof(digest); i++) { + if (digest[i] != expected_digest[i]) + return 0; + } + + digest_matches = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_get_xattr.c b/tools/testing/selftests/bpf/progs/test_get_xattr.c new file mode 100644 index 000000000000..7eb2a4e5a3e5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_get_xattr.c @@ -0,0 +1,37 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +__u32 monitored_pid; +__u32 found_xattr; + +static const char expected_value[] = "hello"; +char value[32]; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr value_ptr; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + bpf_dynptr_from_mem(value, sizeof(value), 0, &value_ptr); + + ret = bpf_get_file_xattr(f, "user.kfuncs", &value_ptr); + if (ret != sizeof(expected_value)) + return 0; + if (bpf_strncmp(value, ret, expected_value)) + return 0; + found_xattr = 1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func15.c b/tools/testing/selftests/bpf/progs/test_global_func15.c index b512d6a6c75e..b4e089d6981d 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func15.c +++ b/tools/testing/selftests/bpf/progs/test_global_func15.c @@ -13,7 +13,7 @@ __noinline int foo(unsigned int *v) } SEC("cgroup_skb/ingress") -__failure __msg("At program exit the register R0 has value") +__failure __msg("At program exit the register R0 has ") int global_func15(struct __sk_buff *skb) { unsigned int v = 1; @@ -22,3 +22,35 @@ int global_func15(struct __sk_buff *skb) return v; } + +SEC("cgroup_skb/ingress") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 2: (b7) r0 = 1") +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7") +__msg("At program exit the register R0 has ") +__naked int global_func15_tricky_pruning(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 1;" + "1:" + "goto +0;" /* checkpoint */ + /* cgroup_skb/ingress program is expected to return [0, 1] + * values, so branch above makes sure that in a fallthrough + * case we have a valid 1 stored in R0 register, but in + * a branch case we assign some random value to R0. So if + * there is something wrong with precision tracking for R0 at + * program exit, we might erronenously prune branch case, + * because R0 in fallthrough case is imprecise (and thus any + * value is valid from POV of verifier is_state_equal() logic) + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} diff --git a/tools/testing/selftests/bpf/progs/test_global_func16.c b/tools/testing/selftests/bpf/progs/test_global_func16.c index e7206304632e..e3e64bc472cd 100644 --- a/tools/testing/selftests/bpf/progs/test_global_func16.c +++ b/tools/testing/selftests/bpf/progs/test_global_func16.c @@ -13,7 +13,7 @@ __noinline int foo(int (*arr)[10]) } SEC("cgroup_skb/ingress") -__failure __msg("invalid indirect read from stack") +__success int global_func16(struct __sk_buff *skb) { int array[10]; diff --git a/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c new file mode 100644 index 000000000000..2f0eb1334d65 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_sig_in_xattr.c @@ -0,0 +1,83 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <errno.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" + +char _license[] SEC("license") = "GPL"; + +#ifndef SHA256_DIGEST_SIZE +#define SHA256_DIGEST_SIZE 32 +#endif + +#define MAX_SIG_SIZE 1024 + +/* By default, "fsverity sign" signs a file with fsverity_formatted_digest + * of the file. fsverity_formatted_digest on the kernel side is only used + * with CONFIG_FS_VERITY_BUILTIN_SIGNATURES. However, BPF LSM doesn't not + * require CONFIG_FS_VERITY_BUILTIN_SIGNATURES, so vmlinux.h may not have + * fsverity_formatted_digest. In this test, we intentionally avoid using + * fsverity_formatted_digest. + * + * Luckily, fsverity_formatted_digest is simply 8-byte magic followed by + * fsverity_digest. We use a char array of size fsverity_formatted_digest + * plus SHA256_DIGEST_SIZE. The magic part of it is filled by user space, + * and the rest of it is filled by bpf_get_fsverity_digest. + * + * Note that, generating signatures based on fsverity_formatted_digest is + * the design choice of this selftest (and "fsverity sign"). With BPF + * LSM, we have the flexibility to generate signature based on other data + * sets, for example, fsverity_digest or only the digest[] part of it. + */ +#define MAGIC_SIZE 8 +#define SIZEOF_STRUCT_FSVERITY_DIGEST 4 /* sizeof(struct fsverity_digest) */ +char digest[MAGIC_SIZE + SIZEOF_STRUCT_FSVERITY_DIGEST + SHA256_DIGEST_SIZE]; + +__u32 monitored_pid; +char sig[MAX_SIG_SIZE]; +__u32 sig_size; +__u32 user_keyring_serial; + +SEC("lsm.s/file_open") +int BPF_PROG(test_file_open, struct file *f) +{ + struct bpf_dynptr digest_ptr, sig_ptr; + struct bpf_key *trusted_keyring; + __u32 pid; + int ret; + + pid = bpf_get_current_pid_tgid() >> 32; + if (pid != monitored_pid) + return 0; + + /* digest_ptr points to fsverity_digest */ + bpf_dynptr_from_mem(digest + MAGIC_SIZE, sizeof(digest) - MAGIC_SIZE, 0, &digest_ptr); + + ret = bpf_get_fsverity_digest(f, &digest_ptr); + /* No verity, allow access */ + if (ret < 0) + return 0; + + /* Move digest_ptr to fsverity_formatted_digest */ + bpf_dynptr_from_mem(digest, sizeof(digest), 0, &digest_ptr); + + /* Read signature from xattr */ + bpf_dynptr_from_mem(sig, sizeof(sig), 0, &sig_ptr); + ret = bpf_get_file_xattr(f, "user.sig", &sig_ptr); + /* No signature, reject access */ + if (ret < 0) + return -EPERM; + + trusted_keyring = bpf_lookup_user_key(user_keyring_serial, 0); + if (!trusted_keyring) + return -ENOENT; + + /* Verify signature */ + ret = bpf_verify_pkcs7_signature(&digest_ptr, &sig_ptr, trusted_keyring); + + bpf_key_put(trusted_keyring); + return ret; +} diff --git a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c index f66af753bbbb..3e436e6f7312 100644 --- a/tools/testing/selftests/bpf/progs/test_tunnel_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tunnel_kern.c @@ -6,66 +6,34 @@ * modify it under the terms of version 2 of the GNU General Public * License as published by the Free Software Foundation. */ -#include <stddef.h> -#include <string.h> -#include <arpa/inet.h> -#include <linux/bpf.h> -#include <linux/if_ether.h> -#include <linux/if_packet.h> -#include <linux/if_tunnel.h> -#include <linux/ip.h> -#include <linux/ipv6.h> -#include <linux/icmp.h> -#include <linux/types.h> -#include <linux/socket.h> -#include <linux/pkt_cls.h> -#include <linux/erspan.h> -#include <linux/udp.h> +#include "vmlinux.h" +#include <bpf/bpf_core_read.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_endian.h> +#include "bpf_kfuncs.h" +#include "bpf_tracing_net.h" #define log_err(__ret) bpf_printk("ERROR line:%d ret:%d\n", __LINE__, __ret) -#define VXLAN_UDP_PORT 4789 +#define VXLAN_UDP_PORT 4789 +#define ETH_P_IP 0x0800 +#define PACKET_HOST 0 +#define TUNNEL_CSUM bpf_htons(0x01) +#define TUNNEL_KEY bpf_htons(0x04) /* Only IPv4 address assigned to veth1. * 172.16.1.200 */ #define ASSIGNED_ADDR_VETH1 0xac1001c8 -struct geneve_opt { - __be16 opt_class; - __u8 type; - __u8 length:5; - __u8 r3:1; - __u8 r2:1; - __u8 r1:1; - __u8 opt_data[8]; /* hard-coded to 8 byte */ -}; - -struct vxlanhdr { - __be32 vx_flags; - __be32 vx_vni; -} __attribute__((packed)); - -struct vxlan_metadata { - __u32 gbp; -}; - -struct bpf_fou_encap { - __be16 sport; - __be16 dport; -}; - -enum bpf_fou_encap_type { - FOU_BPF_ENCAP_FOU, - FOU_BPF_ENCAP_GUE, -}; - int bpf_skb_set_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap, int type) __ksym; int bpf_skb_get_fou_encap(struct __sk_buff *skb_ctx, struct bpf_fou_encap *encap) __ksym; +struct xfrm_state * +bpf_xdp_get_xfrm_state(struct xdp_md *ctx, struct bpf_xfrm_state_opts *opts, + u32 opts__sz) __ksym; +void bpf_xdp_xfrm_state_release(struct xfrm_state *x) __ksym; struct { __uint(type, BPF_MAP_TYPE_ARRAY); @@ -205,9 +173,9 @@ int erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 7; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -246,8 +214,9 @@ int erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -284,9 +253,9 @@ int ip4ip6erspan_set_tunnel(struct __sk_buff *skb) __u8 hwid = 17; md.version = 2; - md.u.md2.dir = direction; - md.u.md2.hwid = hwid & 0xf; - md.u.md2.hwid_upper = (hwid >> 4) & 0x3; + BPF_CORE_WRITE_BITFIELD(&md.u.md2, dir, direction); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid, (hwid & 0xf)); + BPF_CORE_WRITE_BITFIELD(&md.u.md2, hwid_upper, (hwid >> 4) & 0x3); #endif ret = bpf_skb_set_tunnel_opt(skb, &md, sizeof(md)); @@ -326,8 +295,9 @@ int ip4ip6erspan_get_tunnel(struct __sk_buff *skb) bpf_printk("\tindex %x\n", index); #else bpf_printk("\tdirection %d hwid %x timestamp %u\n", - md.u.md2.dir, - (md.u.md2.hwid_upper << 4) + md.u.md2.hwid, + BPF_CORE_READ_BITFIELD(&md.u.md2, dir), + (BPF_CORE_READ_BITFIELD(&md.u.md2, hwid_upper) << 4) + + BPF_CORE_READ_BITFIELD(&md.u.md2, hwid), bpf_ntohl(md.u.md2.timestamp)); #endif @@ -963,6 +933,10 @@ int ip6ip6_get_tunnel(struct __sk_buff *skb) return TC_ACT_OK; } +volatile int xfrm_reqid = 0; +volatile int xfrm_spi = 0; +volatile int xfrm_remote_ip = 0; + SEC("tc") int xfrm_get_state(struct __sk_buff *skb) { @@ -973,10 +947,58 @@ int xfrm_get_state(struct __sk_buff *skb) if (ret < 0) return TC_ACT_OK; - bpf_printk("reqid %d spi 0x%x remote ip 0x%x\n", - x.reqid, bpf_ntohl(x.spi), - bpf_ntohl(x.remote_ipv4)); + xfrm_reqid = x.reqid; + xfrm_spi = bpf_ntohl(x.spi); + xfrm_remote_ip = bpf_ntohl(x.remote_ipv4); + return TC_ACT_OK; } +volatile int xfrm_replay_window = 0; + +SEC("xdp") +int xfrm_get_state_xdp(struct xdp_md *xdp) +{ + struct bpf_xfrm_state_opts opts = {}; + struct xfrm_state *x = NULL; + struct ip_esp_hdr *esph; + struct bpf_dynptr ptr; + u8 esph_buf[8] = {}; + u8 iph_buf[20] = {}; + struct iphdr *iph; + u32 off; + + if (bpf_dynptr_from_xdp(xdp, 0, &ptr)) + goto out; + + off = sizeof(struct ethhdr); + iph = bpf_dynptr_slice(&ptr, off, iph_buf, sizeof(iph_buf)); + if (!iph || iph->protocol != IPPROTO_ESP) + goto out; + + off += sizeof(struct iphdr); + esph = bpf_dynptr_slice(&ptr, off, esph_buf, sizeof(esph_buf)); + if (!esph) + goto out; + + opts.netns_id = BPF_F_CURRENT_NETNS; + opts.daddr.a4 = iph->daddr; + opts.spi = esph->spi; + opts.proto = IPPROTO_ESP; + opts.family = AF_INET; + + x = bpf_xdp_get_xfrm_state(xdp, &opts, sizeof(opts)); + if (!x) + goto out; + + if (!x->replay_esn) + goto out; + + xfrm_replay_window = x->replay_esn->replay_window; +out: + if (x) + bpf_xdp_xfrm_state_release(x); + return XDP_PASS; +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c index 7748cc23de8a..f42e9f3831a1 100644 --- a/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c +++ b/tools/testing/selftests/bpf/progs/test_verify_pkcs7_sig.c @@ -10,17 +10,11 @@ #include <errno.h> #include <bpf/bpf_helpers.h> #include <bpf/bpf_tracing.h> +#include "bpf_kfuncs.h" #define MAX_DATA_SIZE (1024 * 1024) #define MAX_SIG_SIZE 1024 -extern struct bpf_key *bpf_lookup_user_key(__u32 serial, __u64 flags) __ksym; -extern struct bpf_key *bpf_lookup_system_key(__u64 id) __ksym; -extern void bpf_key_put(struct bpf_key *key) __ksym; -extern int bpf_verify_pkcs7_signature(struct bpf_dynptr *data_ptr, - struct bpf_dynptr *sig_ptr, - struct bpf_key *trusted_keyring) __ksym; - __u32 monitored_pid; __u32 user_keyring_serial; __u64 system_keyring_id; diff --git a/tools/testing/selftests/bpf/progs/timer_failure.c b/tools/testing/selftests/bpf/progs/timer_failure.c index 226d33b5a05c..0996c2486f05 100644 --- a/tools/testing/selftests/bpf/progs/timer_failure.c +++ b/tools/testing/selftests/bpf/progs/timer_failure.c @@ -21,17 +21,38 @@ struct { __type(value, struct elem); } timer_map SEC(".maps"); -static int timer_cb_ret1(void *map, int *key, struct bpf_timer *timer) +__naked __noinline __used +static unsigned long timer_cb_ret_bad() { - if (bpf_get_smp_processor_id() % 2) - return 1; - else - return 0; + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* async callback is expected to return 0, so branch above + * skipping r0 = 0; should lead to a failure, but if exit + * instruction doesn't enforce r0's precision, this callback + * will be successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); } SEC("fentry/bpf_fentry_test1") -__failure __msg("should have been in (0x0; 0x0)") -int BPF_PROG2(test_ret_1, int, a) +__log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +__failure +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before") +__msg(": (85) call bpf_get_prandom_u32#7") /* anchor message */ +/* check that branch code path marks r0 as precise */ +__msg("mark_precise: frame0: regs=r0 stack= before ") __msg(": (85) call bpf_get_prandom_u32#7") +__msg("should have been in [0, 0]") +long BPF_PROG2(test_bad_ret, int, a) { int key = 0; struct bpf_timer *timer; @@ -39,7 +60,7 @@ int BPF_PROG2(test_ret_1, int, a) timer = bpf_map_lookup_elem(&timer_map, &key); if (timer) { bpf_timer_init(timer, &timer_map, CLOCK_BOOTTIME); - bpf_timer_set_callback(timer, timer_cb_ret1); + bpf_timer_set_callback(timer, timer_cb_ret_bad); bpf_timer_start(timer, 1000, 0); } diff --git a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c index 03ee946c6bf7..11ab25c42c36 100644 --- a/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c +++ b/tools/testing/selftests/bpf/progs/user_ringbuf_fail.c @@ -184,7 +184,7 @@ invalid_drain_callback_return(struct bpf_dynptr *dynptr, void *context) * not be able to write to that pointer. */ SEC("?raw_tp") -__failure __msg("At callback return the register R0 has value") +__failure __msg("At callback return the register R0 has ") int user_ringbuf_callback_invalid_return(void *ctx) { bpf_user_ringbuf_drain(&user_ringbuf, invalid_drain_callback_return, NULL, 0); diff --git a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c index 359df865a8f3..8d77cc5323d3 100644 --- a/tools/testing/selftests/bpf/progs/verifier_basic_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_basic_stack.c @@ -27,8 +27,8 @@ __naked void stack_out_of_bounds(void) SEC("socket") __description("uninitialized stack1") -__failure __msg("invalid indirect read from stack") -__failure_unpriv +__success __log_level(4) __msg("stack depth 8") +__failure_unpriv __msg_unpriv("invalid indirect read from stack") __naked void uninitialized_stack1(void) { asm volatile (" \ @@ -45,8 +45,8 @@ __naked void uninitialized_stack1(void) SEC("socket") __description("uninitialized stack2") -__failure __msg("invalid read from stack") -__failure_unpriv +__success __log_level(4) __msg("stack depth 8") +__failure_unpriv __msg_unpriv("invalid read from stack") __naked void uninitialized_stack2(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c new file mode 100644 index 000000000000..623f130a3198 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/verifier_bitfield_write.c @@ -0,0 +1,100 @@ +// SPDX-License-Identifier: GPL-2.0 + +#include <linux/bpf.h> +#include <stdint.h> + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_core_read.h> + +#include "bpf_misc.h" + +struct core_reloc_bitfields { + /* unsigned bitfields */ + uint8_t ub1: 1; + uint8_t ub2: 2; + uint32_t ub7: 7; + /* signed bitfields */ + int8_t sb4: 4; + int32_t sb20: 20; + /* non-bitfields */ + uint32_t u32; + int32_t s32; +} __attribute__((preserve_access_index)); + +SEC("tc") +__description("single CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(3) +int single_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + return BPF_CORE_READ_BITFIELD(&bitfields, ub2); +} + +SEC("tc") +__description("multiple CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x3FD) +int multiple_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub2; + int8_t sb4; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, sb4, -1); + + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + sb4 = BPF_CORE_READ_BITFIELD(&bitfields, sb4); + + return (((uint8_t)sb4) << 2) | ub2; +} + +SEC("tc") +__description("adjacent CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(7) +int adjacent_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint8_t ub1, ub2; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub2, 3); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub2 = BPF_CORE_READ_BITFIELD(&bitfields, ub2); + + return (ub2 << 1) | ub1; +} + +SEC("tc") +__description("multibyte CO-RE bitfield roundtrip") +__btf_path("btf__core_reloc_bitfields.bpf.o") +__success +__retval(0x21) +int multibyte_field_roundtrip(struct __sk_buff *ctx) +{ + struct core_reloc_bitfields bitfields; + uint32_t ub7; + uint8_t ub1; + + __builtin_memset(&bitfields, 0, sizeof(bitfields)); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub1, 1); + BPF_CORE_WRITE_BITFIELD(&bitfields, ub7, 16); + + ub1 = BPF_CORE_READ_BITFIELD(&bitfields, ub1); + ub7 = BPF_CORE_READ_BITFIELD(&bitfields, ub7); + + return (ub7 << 1) | ub1; +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c index d6c4a7f3f790..6e0f349f8f15 100644 --- a/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_cgroup_inv_retcode.c @@ -7,7 +7,7 @@ SEC("cgroup/sock") __description("bpf_exit with invalid return code. test1") -__failure __msg("R0 has value (0x0; 0xffffffff)") +__failure __msg("smin=0 smax=4294967295 should have been in [0, 1]") __naked void with_invalid_return_code_test1(void) { asm volatile (" \ @@ -30,7 +30,7 @@ __naked void with_invalid_return_code_test2(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test3") -__failure __msg("R0 has value (0x0; 0x3)") +__failure __msg("smin=0 smax=3 should have been in [0, 1]") __naked void with_invalid_return_code_test3(void) { asm volatile (" \ @@ -53,7 +53,7 @@ __naked void with_invalid_return_code_test4(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test5") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test5(void) { asm volatile (" \ @@ -75,7 +75,7 @@ __naked void with_invalid_return_code_test6(void) SEC("cgroup/sock") __description("bpf_exit with invalid return code. test7") -__failure __msg("R0 has unknown scalar value") +__failure __msg("R0 has unknown scalar value should have been in [0, 1]") __naked void with_invalid_return_code_test7(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index 99a23dea8233..be95570ab382 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -411,7 +411,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("direct packet access: test17 (pruning, alignment)") -__failure __msg("misaligned packet access off 2+(0x0; 0x0)+15+-4 size 4") +__failure __msg("misaligned packet access off 2+0+15+-4 size 4") __flag(BPF_F_STRICT_ALIGNMENT) __naked void packet_access_test17_pruning_alignment(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c index a0a5efd1caa1..bd696a431244 100644 --- a/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c +++ b/tools/testing/selftests/bpf/progs/verifier_global_subprogs.c @@ -10,6 +10,7 @@ int arr[1]; int unkn_idx; +const volatile bool call_dead_subprog = false; __noinline long global_bad(void) { @@ -31,23 +32,31 @@ __noinline long global_calls_good_only(void) return global_good(); } +__noinline long global_dead(void) +{ + return arr[0] * 2; +} + SEC("?raw_tp") __success __log_level(2) /* main prog is validated completely first */ __msg("('global_calls_good_only') is global and assumed valid.") -__msg("1: (95) exit") /* eventually global_good() is transitively validated as well */ __msg("Validating global_good() func") __msg("('global_good') is safe for any args that match its prototype") int chained_global_func_calls_success(void) { - return global_calls_good_only(); + int sum = 0; + + if (call_dead_subprog) + sum += global_dead(); + return global_calls_good_only() + sum; } SEC("?raw_tp") __failure __log_level(2) /* main prog validated successfully first */ -__msg("1: (95) exit") +__msg("('global_calls_bad') is global and assumed valid.") /* eventually we validate global_bad() and fail */ __msg("Validating global_bad() func") __msg("math between map_value pointer and register") /* BOOM */ diff --git a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c index b054f9c48143..9fc3fae5cd83 100644 --- a/tools/testing/selftests/bpf/progs/verifier_int_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_int_ptr.c @@ -5,9 +5,10 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -SEC("cgroup/sysctl") +SEC("socket") __description("ARG_PTR_TO_LONG uninitialized") -__failure __msg("invalid indirect read from stack R4 off -16+0 size 8") +__success +__failure_unpriv __msg_unpriv("invalid indirect read from stack R4 off -16+0 size 8") __naked void arg_ptr_to_long_uninitialized(void) { asm volatile (" \ @@ -67,7 +68,7 @@ __naked void ptr_to_long_half_uninitialized(void) SEC("cgroup/sysctl") __description("ARG_PTR_TO_LONG misaligned") -__failure __msg("misaligned stack access off (0x0; 0x0)+-20+0 size 8") +__failure __msg("misaligned stack access off 0+-20+0 size 8") __naked void arg_ptr_to_long_misaligned(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c index 353ae6da00e1..e1ffa5d32ff0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c +++ b/tools/testing/selftests/bpf/progs/verifier_netfilter_retcode.c @@ -39,7 +39,7 @@ __naked void with_valid_return_code_test3(void) SEC("netfilter") __description("bpf_exit with invalid return code. test4") -__failure __msg("R0 has value (0x2; 0x0)") +__failure __msg("R0 has smin=2 smax=2 should have been in [0, 1]") __naked void with_invalid_return_code_test4(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c index efbfc3a4ad6a..f67390224a9c 100644 --- a/tools/testing/selftests/bpf/progs/verifier_raw_stack.c +++ b/tools/testing/selftests/bpf/progs/verifier_raw_stack.c @@ -5,9 +5,10 @@ #include <bpf/bpf_helpers.h> #include "bpf_misc.h" -SEC("tc") +SEC("socket") __description("raw_stack: no skb_load_bytes") -__failure __msg("invalid read from stack R6 off=-8 size=8") +__success +__failure_unpriv __msg_unpriv("invalid read from stack R6 off=-8 size=8") __naked void stack_no_skb_load_bytes(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 6115520154e3..39fe3372e0e0 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -4,6 +4,7 @@ #include <linux/bpf.h> #include <bpf/bpf_helpers.h> #include "bpf_misc.h" +#include <../../../tools/include/linux/filter.h> struct { __uint(type, BPF_MAP_TYPE_RINGBUF); @@ -450,4 +451,290 @@ l0_%=: r1 >>= 16; \ : __clobber_all); } +SEC("raw_tp") +__log_level(2) +__success +__msg("fp-8=0m??mmmm") +__msg("fp-16=00mm??mm") +__msg("fp-24=00mm???m") +__naked void spill_subregs_preserve_stack_zero(void) +{ + asm volatile ( + "call %[bpf_get_prandom_u32];" + + /* 32-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp1_u8_st_zero];" /* ZERO, LLVM-18+: *(u8 *)(r10 -1) = 0; */ + "*(u8 *)(r10 -2) = r0;" /* MISC */ + /* fp-3 and fp-4 stay INVALID */ + "*(u32 *)(r10 -8) = r0;" + + /* 16-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp10_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r10 -10) = 0; */ + "*(u16 *)(r10 -12) = r0;" /* MISC */ + /* fp-13 and fp-14 stay INVALID */ + "*(u16 *)(r10 -16) = r0;" + + /* 8-bit subreg spill with ZERO, MISC, and INVALID */ + ".8byte %[fp18_u16_st_zero];" /* ZERO, LLVM-18+: *(u16 *)(r18 -10) = 0; */ + "*(u16 *)(r10 -20) = r0;" /* MISC */ + /* fp-21, fp-22, and fp-23 stay INVALID */ + "*(u8 *)(r10 -24) = r0;" + + "r0 = 0;" + "exit;" + : + : __imm(bpf_get_prandom_u32), + __imm_insn(fp1_u8_st_zero, BPF_ST_MEM(BPF_B, BPF_REG_FP, -1, 0)), + __imm_insn(fp10_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -10, 0)), + __imm_insn(fp18_u16_st_zero, BPF_ST_MEM(BPF_H, BPF_REG_FP, -18, 0)) + : __clobber_all); +} + +char single_byte_buf[1] SEC(".data.single_byte_buf"); + +SEC("raw_tp") +__log_level(2) +__success +/* make sure fp-8 is all STACK_ZERO */ +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") +/* validate that assigning R2 from STACK_ZERO doesn't mark register + * precise immediately; if necessary, it will be marked precise later + */ +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000") +/* similarly, when R2 is assigned from spilled register, it is initially + * imprecise, but will be marked precise later once it is used in precise context + */ +__msg("10: (71) r2 = *(u8 *)(r10 -9) ; R2_w=0 R10=fp0 fp-16_w=0") +__msg("11: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 11 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 10: (71) r2 = *(u8 *)(r10 -9)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (71) r2 = *(u8 *)(r10 -1)") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 4: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0") +__naked void partial_stack_load_preserves_zeros(void) +{ + asm volatile ( + /* fp-8 is all STACK_ZERO */ + ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */ + + /* fp-16 is const zero register */ + "r0 = 0;" + "*(u64 *)(r10 -16) = r0;" + + /* load single U8 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -1);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U8 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -9);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -2);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -10);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -4);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -12);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from STACK_ZERO slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* for completeness, load U64 from ZERO REG slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(single_byte_buf), + __imm_insn(fp8_st_zero, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 0)) + : __clobber_common); +} + +char two_byte_buf[2] SEC(".data.two_byte_buf"); + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is IMPRECISE fake register spill */ +__msg("3: (7a) *(u64 *)(r10 -8) = 1 ; R10=fp0 fp-8_w=1") +/* and fp-16 is spilled IMPRECISE const reg */ +__msg("5: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16_w=1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (79) r2 = *(u64 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +/* note, fp-8 is precise, fp-16 is not yet precise, we'll get there */ +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_w=1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (7a) *(u64 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (79) r2 = *(u64 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (79) r2 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (79) r2 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +/* now both fp-8 and fp-16 are precise, very good */ +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_rw=P1 fp-16_rw=P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u64 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 register */ + "r0 = 1;" + "*(u64 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U64 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U64 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u64 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_DW, BPF_REG_FP, -8, 1)) + : __clobber_common); +} + +SEC("raw_tp") +__log_level(2) __flag(BPF_F_TEST_STATE_FREQ) +__success +/* make sure fp-8 is 32-bit FAKE subregister spill */ +__msg("3: (62) *(u32 *)(r10 -8) = 1 ; R10=fp0 fp-8=????1") +/* but fp-16 is spilled IMPRECISE zero const reg */ +__msg("5: (63) *(u32 *)(r10 -16) = r0 ; R0_w=1 R10=fp0 fp-16=????1") +/* validate load from fp-8, which was initialized using BPF_ST_MEM */ +__msg("8: (61) r2 = *(u32 *)(r10 -8) ; R2_w=1 R10=fp0 fp-8=????1") +__msg("9: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 9 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-8 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16=????1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-8 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs= stack=-8 before 4: (b7) r0 = 1") +__msg("mark_precise: frame0: regs= stack=-8 before 3: (62) *(u32 *)(r10 -8) = 1") +__msg("10: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +/* validate load from fp-16, which was initialized using BPF_STX_MEM */ +__msg("12: (61) r2 = *(u32 *)(r10 -16) ; R2_w=1 R10=fp0 fp-16=????1") +__msg("13: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 13 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 12: (61) r2 = *(u32 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (bf) r1 = r6") +__msg("mark_precise: frame0: regs= stack=-16 before 10: (73) *(u8 *)(r1 +0) = r2") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (0f) r1 += r2") +__msg("mark_precise: frame0: regs= stack=-16 before 8: (61) r2 = *(u32 *)(r10 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 7: (bf) r1 = r6") +__msg("mark_precise: frame0: parent state regs= stack=-16: R0_w=1 R1=ctx() R6_r=map_value(map=.data.two_byte_,ks=4,vs=2) R10=fp0 fp-8_r=????P1 fp-16_r=????P1") +__msg("mark_precise: frame0: last_idx 6 first_idx 3 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-16 before 6: (05) goto pc+0") +__msg("mark_precise: frame0: regs= stack=-16 before 5: (63) *(u32 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 4: (b7) r0 = 1") +__msg("14: R1_w=map_value(map=.data.two_byte_,ks=4,vs=2,off=1) R2_w=1") +__naked void stack_load_preserves_const_precision_subreg(void) +{ + asm volatile ( + /* establish checkpoint with state that has no stack slots; + * if we bubble up to this state without finding desired stack + * slot, then it's a bug and should be caught + */ + "goto +0;" + + /* fp-8 is const 1 *fake* SUB-register */ + ".8byte %[fp8_st_one];" /* LLVM-18+: *(u32 *)(r10 -8) = 1; */ + + /* fp-16 is const 1 SUB-register */ + "r0 = 1;" + "*(u32 *)(r10 -16) = r0;" + + /* force checkpoint to check precision marks preserved in parent states */ + "goto +0;" + + /* load single U32 from aligned FAKE_REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -8);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from aligned REG=1 slot */ + "r1 = %[two_byte_buf];" + "r2 = *(u32 *)(r10 -16);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(two_byte_buf), + __imm_insn(fp8_st_one, BPF_ST_MEM(BPF_W, BPF_REG_FP, -8, 1)) /* 32-bit spill */ + : __clobber_common); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c index e0f77e3e7869..417c61cd4b19 100644 --- a/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c +++ b/tools/testing/selftests/bpf/progs/verifier_stack_ptr.c @@ -37,7 +37,7 @@ __naked void ptr_to_stack_store_load(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on off") -__failure __msg("misaligned stack access off (0x0; 0x0)+-8+2 size 8") +__failure __msg("misaligned stack access off 0+-8+2 size 8") __failure_unpriv __naked void load_bad_alignment_on_off(void) { @@ -53,7 +53,7 @@ __naked void load_bad_alignment_on_off(void) SEC("socket") __description("PTR_TO_STACK store/load - bad alignment on reg") -__failure __msg("misaligned stack access off (0x0; 0x0)+-10+8 size 8") +__failure __msg("misaligned stack access off 0+-10+8 size 8") __failure_unpriv __naked void load_bad_alignment_on_reg(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c index b5efcaeaa1ae..6f5d19665cf6 100644 --- a/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c +++ b/tools/testing/selftests/bpf/progs/verifier_subprog_precision.c @@ -117,6 +117,56 @@ __naked int global_subprog_result_precise(void) ); } +__naked __noinline __used +static unsigned long loop_callback_bad() +{ + /* bpf_loop() callback that can return values outside of [0, 1] range */ + asm volatile ( + "call %[bpf_get_prandom_u32];" + "if r0 s> 1000 goto 1f;" + "r0 = 0;" + "1:" + "goto +0;" /* checkpoint */ + /* bpf_loop() expects [0, 1] values, so branch above skipping + * r0 = 0; should lead to a failure, but if exit instruction + * doesn't enforce r0's precision, this callback will be + * successfully verified + */ + "exit;" + : + : __imm(bpf_get_prandom_u32) + : __clobber_common + ); +} + +SEC("?raw_tp") +__failure __log_level(2) +__flag(BPF_F_TEST_STATE_FREQ) +/* check that fallthrough code path marks r0 as precise */ +__msg("mark_precise: frame1: regs=r0 stack= before 11: (b7) r0 = 0") +/* check that we have branch code path doing its own validation */ +__msg("from 10 to 12: frame1: R0=scalar(smin=umin=1001") +/* check that branch code path marks r0 as precise, before failing */ +__msg("mark_precise: frame1: regs=r0 stack= before 9: (85) call bpf_get_prandom_u32#7") +__msg("At callback return the register R0 has smin=1001 should have been in [0, 1]") +__naked int callback_precise_return_fail(void) +{ + asm volatile ( + "r1 = 1;" /* nr_loops */ + "r2 = %[loop_callback_bad];" /* callback_fn */ + "r3 = 0;" /* callback_ctx */ + "r4 = 0;" /* flags */ + "call %[bpf_loop];" + + "r0 = 0;" + "exit;" + : + : __imm_ptr(loop_callback_bad), + __imm(bpf_loop) + : __clobber_common + ); +} + SEC("?raw_tp") __success __log_level(2) /* First simulated path does not include callback body, @@ -539,11 +589,24 @@ static __u64 subprog_spill_reg_precise(void) SEC("?raw_tp") __success __log_level(2) -/* precision backtracking can't currently handle stack access not through r10, - * so we won't be able to mark stack slot fp-8 as precise, and so will - * fallback to forcing all as precise - */ -__msg("mark_precise: frame0: falling back to forcing all scalars precise") +__msg("10: (0f) r1 += r7") +__msg("mark_precise: frame0: last_idx 10 first_idx 7 subseq_idx -1") +__msg("mark_precise: frame0: regs=r7 stack= before 9: (bf) r1 = r8") +__msg("mark_precise: frame0: regs=r7 stack= before 8: (27) r7 *= 4") +__msg("mark_precise: frame0: regs=r7 stack= before 7: (79) r7 = *(u64 *)(r10 -8)") +__msg("mark_precise: frame0: parent state regs= stack=-8: R0_w=2 R6_w=1 R8_rw=map_value(map=.data.vals,ks=4,vs=16) R10=fp0 fp-8_rw=P1") +__msg("mark_precise: frame0: last_idx 18 first_idx 0 subseq_idx 7") +__msg("mark_precise: frame0: regs= stack=-8 before 18: (95) exit") +__msg("mark_precise: frame1: regs= stack= before 17: (0f) r0 += r2") +__msg("mark_precise: frame1: regs= stack= before 16: (79) r2 = *(u64 *)(r1 +0)") +__msg("mark_precise: frame1: regs= stack= before 15: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame1: regs= stack= before 14: (7b) *(u64 *)(r10 -16) = r2") +__msg("mark_precise: frame1: regs= stack= before 13: (7b) *(u64 *)(r1 +0) = r2") +__msg("mark_precise: frame1: regs=r2 stack= before 6: (85) call pc+6") +__msg("mark_precise: frame0: regs=r2 stack= before 5: (bf) r2 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 4: (07) r1 += -8") +__msg("mark_precise: frame0: regs=r6 stack= before 3: (bf) r1 = r10") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") __naked int subprog_spill_into_parent_stack_slot_precise(void) { asm volatile ( @@ -578,14 +641,68 @@ __naked int subprog_spill_into_parent_stack_slot_precise(void) ); } -__naked __noinline __used -static __u64 subprog_with_checkpoint(void) +SEC("?raw_tp") +__success __log_level(2) +__msg("17: (0f) r1 += r0") +__msg("mark_precise: frame0: last_idx 17 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r0 stack= before 16: (bf) r1 = r7") +__msg("mark_precise: frame0: regs=r0 stack= before 15: (27) r0 *= 4") +__msg("mark_precise: frame0: regs=r0 stack= before 14: (79) r0 = *(u64 *)(r10 -16)") +__msg("mark_precise: frame0: regs= stack=-16 before 13: (7b) *(u64 *)(r7 -8) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 12: (79) r0 = *(u64 *)(r8 +16)") +__msg("mark_precise: frame0: regs= stack=-16 before 11: (7b) *(u64 *)(r8 +16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 10: (79) r0 = *(u64 *)(r7 -8)") +__msg("mark_precise: frame0: regs= stack=-16 before 9: (7b) *(u64 *)(r10 -16) = r0") +__msg("mark_precise: frame0: regs=r0 stack= before 8: (07) r8 += -32") +__msg("mark_precise: frame0: regs=r0 stack= before 7: (bf) r8 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 6: (07) r7 += -8") +__msg("mark_precise: frame0: regs=r0 stack= before 5: (bf) r7 = r10") +__msg("mark_precise: frame0: regs=r0 stack= before 21: (95) exit") +__msg("mark_precise: frame1: regs=r0 stack= before 20: (bf) r0 = r1") +__msg("mark_precise: frame1: regs=r1 stack= before 4: (85) call pc+15") +__msg("mark_precise: frame0: regs=r1 stack= before 3: (bf) r1 = r6") +__msg("mark_precise: frame0: regs=r6 stack= before 2: (b7) r6 = 1") +__naked int stack_slot_aliases_precision(void) { asm volatile ( - "r0 = 0;" - /* guaranteed checkpoint if BPF_F_TEST_STATE_FREQ is used */ - "goto +0;" + "r6 = 1;" + /* pass r6 through r1 into subprog to get it back as r0; + * this whole chain will have to be marked as precise later + */ + "r1 = r6;" + "call identity_subprog;" + /* let's setup two registers that are aliased to r10 */ + "r7 = r10;" + "r7 += -8;" /* r7 = r10 - 8 */ + "r8 = r10;" + "r8 += -32;" /* r8 = r10 - 32 */ + /* now spill subprog's return value (a r6 -> r1 -> r0 chain) + * a few times through different stack pointer regs, making + * sure to use r10, r7, and r8 both in LDX and STX insns, and + * *importantly* also using a combination of const var_off and + * insn->off to validate that we record final stack slot + * correctly, instead of relying on just insn->off derivation, + * which is only valid for r10-based stack offset + */ + "*(u64 *)(r10 - 16) = r0;" + "r0 = *(u64 *)(r7 - 8);" /* r7 - 8 == r10 - 16 */ + "*(u64 *)(r8 + 16) = r0;" /* r8 + 16 = r10 - 16 */ + "r0 = *(u64 *)(r8 + 16);" + "*(u64 *)(r7 - 8) = r0;" + "r0 = *(u64 *)(r10 - 16);" + /* get ready to use r0 as an index into array to force precision */ + "r0 *= 4;" + "r1 = %[vals];" + /* here r0->r1->r6 chain is forced to be precise and has to be + * propagated back to the beginning, including through the + * subprog call and all the stack spills and loads + */ + "r1 += r0;" + "r0 = *(u32 *)(r1 + 0);" "exit;" + : + : __imm_ptr(vals) + : __clobber_common, "r6" ); } diff --git a/tools/testing/selftests/bpf/progs/verifier_var_off.c b/tools/testing/selftests/bpf/progs/verifier_var_off.c index 83a90afba785..c810f4f6f479 100644 --- a/tools/testing/selftests/bpf/progs/verifier_var_off.c +++ b/tools/testing/selftests/bpf/progs/verifier_var_off.c @@ -59,9 +59,10 @@ __naked void stack_read_priv_vs_unpriv(void) " ::: __clobber_all); } -SEC("lwt_in") +SEC("cgroup/skb") __description("variable-offset stack read, uninitialized") -__failure __msg("invalid variable-offset read from stack R2") +__success +__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root") __naked void variable_offset_stack_read_uninitialized(void) { asm volatile (" \ @@ -83,13 +84,56 @@ __naked void variable_offset_stack_read_uninitialized(void) SEC("socket") __description("variable-offset stack write, priv vs unpriv") -__success __failure_unpriv +__success +/* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. + */ +__log_level(4) __msg("stack depth 16") +__failure_unpriv /* Variable stack access is rejected for unprivileged. */ __msg_unpriv("R2 variable stack access prohibited for !root") __retval(0) __naked void stack_write_priv_vs_unpriv(void) { + asm volatile (" \ + /* Get an unknown value */ \ + r2 = *(u32*)(r1 + 0); \ + /* Make it small and 8-byte aligned */ \ + r2 &= 8; \ + r2 -= 16; \ + /* Add it to fp. We now have either fp-8 or \ + * fp-16, but we don't know which \ + */ \ + r2 += r10; \ + /* Dereference it for a stack write */ \ + r0 = 0; \ + *(u64*)(r2 + 0) = r0; \ + exit; \ +" ::: __clobber_all); +} + +/* Similar to the previous test, but this time also perform a read from the + * address written to with a variable offset. The read is allowed, showing that, + * after a variable-offset write, a priviledged program can read the slots that + * were in the range of that write (even if the verifier doesn't actually know if + * the slot being read was really written to or not. + * + * Despite this test being mostly a superset, the previous test is also kept for + * the sake of it checking the stack depth in the case where there is no read. + */ +SEC("socket") +__description("variable-offset stack write followed by read") +__success +/* Check that the maximum stack depth is correctly maintained according to the + * maximum possible variable offset. + */ +__log_level(4) __msg("stack depth 16") +__failure_unpriv +__msg_unpriv("R2 variable stack access prohibited for !root") +__retval(0) +__naked void stack_write_followed_by_read(void) +{ asm volatile (" \ /* Get an unknown value */ \ r2 = *(u32*)(r1 + 0); \ @@ -103,12 +147,7 @@ __naked void stack_write_priv_vs_unpriv(void) /* Dereference it for a stack write */ \ r0 = 0; \ *(u64*)(r2 + 0) = r0; \ - /* Now read from the address we just wrote. This shows\ - * that, after a variable-offset write, a priviledged\ - * program can read the slots that were in the range of\ - * that write (even if the verifier doesn't actually know\ - * if the slot being read was really written to or not.\ - */ \ + /* Now read from the address we just wrote. */ \ r3 = *(u64*)(r2 + 0); \ r0 = 0; \ exit; \ @@ -224,6 +263,35 @@ __naked void access_max_out_of_bound(void) : __clobber_all); } +/* Similar to the test above, but this time check the special case of a + * zero-sized stack access. We used to have a bug causing crashes for zero-sized + * out-of-bounds accesses. + */ +SEC("socket") +__description("indirect variable-offset stack access, zero-sized, max out of bound") +__failure __msg("invalid variable-offset indirect access to stack R1") +__naked void zero_sized_access_max_out_of_bound(void) +{ + asm volatile (" \ + r0 = 0; \ + /* Fill some stack */ \ + *(u64*)(r10 - 16) = r0; \ + *(u64*)(r10 - 8) = r0; \ + /* Get an unknown value */ \ + r1 = *(u32*)(r1 + 0); \ + r1 &= 63; \ + r1 += -16; \ + /* r1 is now anywhere in [-16,48) */ \ + r1 += r10; \ + r2 = 0; \ + r3 = 0; \ + call %[bpf_probe_read_kernel]; \ + exit; \ +" : + : __imm(bpf_probe_read_kernel) + : __clobber_all); +} + SEC("lwt_in") __description("indirect variable-offset stack access, min out of bound") __failure __msg("invalid variable-offset indirect access to stack R2") @@ -253,9 +321,10 @@ __naked void access_min_out_of_bound(void) : __clobber_all); } -SEC("lwt_in") +SEC("cgroup/skb") __description("indirect variable-offset stack access, min_off < min_initialized") -__failure __msg("invalid indirect read from stack R2 var_off") +__success +__failure_unpriv __msg_unpriv("R2 variable stack access prohibited for !root") __naked void access_min_off_min_initialized(void) { asm volatile (" \ diff --git a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c index f6d1cc9ad892..330ece2eabdb 100644 --- a/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_hw_metadata.c @@ -20,21 +20,32 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp.frags") int rx(struct xdp_md *ctx) { void *data, *data_meta, *data_end; struct ipv6hdr *ip6h = NULL; - struct ethhdr *eth = NULL; struct udphdr *udp = NULL; struct iphdr *iph = NULL; struct xdp_meta *meta; + struct ethhdr *eth; int err; data = (void *)(long)ctx->data; data_end = (void *)(long)ctx->data_end; eth = data; + + if (eth + 1 < data_end && (eth->h_proto == bpf_htons(ETH_P_8021AD) || + eth->h_proto == bpf_htons(ETH_P_8021Q))) + eth = (void *)eth + sizeof(struct vlan_hdr); + + if (eth + 1 < data_end && eth->h_proto == bpf_htons(ETH_P_8021Q)) + eth = (void *)eth + sizeof(struct vlan_hdr); + if (eth + 1 < data_end) { if (eth->h_proto == bpf_htons(ETH_P_IP)) { iph = (void *)(eth + 1); @@ -76,15 +87,28 @@ int rx(struct xdp_md *ctx) return XDP_PASS; } + meta->hint_valid = 0; + + meta->xdp_timestamp = bpf_ktime_get_tai_ns(); err = bpf_xdp_metadata_rx_timestamp(ctx, &meta->rx_timestamp); - if (!err) - meta->xdp_timestamp = bpf_ktime_get_tai_ns(); + if (err) + meta->rx_timestamp_err = err; else - meta->rx_timestamp = 0; /* Used by AF_XDP as not avail signal */ + meta->hint_valid |= XDP_META_FIELD_TS; - err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); - if (err < 0) - meta->rx_hash_err = err; /* Used by AF_XDP as no hash signal */ + err = bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, + &meta->rx_hash_type); + if (err) + meta->rx_hash_err = err; + else + meta->hint_valid |= XDP_META_FIELD_RSS; + + err = bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); + if (err) + meta->rx_vlan_tag_err = err; + else + meta->hint_valid |= XDP_META_FIELD_VLAN_TAG; __sync_add_and_fetch(&pkts_redir, 1); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); diff --git a/tools/testing/selftests/bpf/progs/xdp_metadata.c b/tools/testing/selftests/bpf/progs/xdp_metadata.c index d151d406a123..31ca229bb3c0 100644 --- a/tools/testing/selftests/bpf/progs/xdp_metadata.c +++ b/tools/testing/selftests/bpf/progs/xdp_metadata.c @@ -23,15 +23,47 @@ extern int bpf_xdp_metadata_rx_timestamp(const struct xdp_md *ctx, __u64 *timestamp) __ksym; extern int bpf_xdp_metadata_rx_hash(const struct xdp_md *ctx, __u32 *hash, enum xdp_rss_hash_type *rss_type) __ksym; +extern int bpf_xdp_metadata_rx_vlan_tag(const struct xdp_md *ctx, + __be16 *vlan_proto, + __u16 *vlan_tci) __ksym; SEC("xdp") int rx(struct xdp_md *ctx) { - void *data, *data_meta; + void *data, *data_meta, *data_end; + struct ipv6hdr *ip6h = NULL; + struct ethhdr *eth = NULL; + struct udphdr *udp = NULL; + struct iphdr *iph = NULL; struct xdp_meta *meta; u64 timestamp = -1; int ret; + data = (void *)(long)ctx->data; + data_end = (void *)(long)ctx->data_end; + eth = data; + if (eth + 1 < data_end) { + if (eth->h_proto == bpf_htons(ETH_P_IP)) { + iph = (void *)(eth + 1); + if (iph + 1 < data_end && iph->protocol == IPPROTO_UDP) + udp = (void *)(iph + 1); + } + if (eth->h_proto == bpf_htons(ETH_P_IPV6)) { + ip6h = (void *)(eth + 1); + if (ip6h + 1 < data_end && ip6h->nexthdr == IPPROTO_UDP) + udp = (void *)(ip6h + 1); + } + if (udp && udp + 1 > data_end) + udp = NULL; + } + + if (!udp) + return XDP_PASS; + + /* Forwarding UDP:8080 to AF_XDP */ + if (udp->dest != bpf_htons(8080)) + return XDP_PASS; + /* Reserve enough for all custom metadata. */ ret = bpf_xdp_adjust_meta(ctx, -(int)sizeof(struct xdp_meta)); @@ -57,6 +89,8 @@ int rx(struct xdp_md *ctx) meta->rx_timestamp = 1; bpf_xdp_metadata_rx_hash(ctx, &meta->rx_hash, &meta->rx_hash_type); + bpf_xdp_metadata_rx_vlan_tag(ctx, &meta->rx_vlan_proto, + &meta->rx_vlan_tci); return bpf_redirect_map(&xsk, ctx->rx_queue_index, XDP_PASS); } diff --git a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c index 80f620602d50..518329c666e9 100644 --- a/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c +++ b/tools/testing/selftests/bpf/progs/xdp_synproxy_kern.c @@ -467,13 +467,13 @@ static __always_inline int tcp_lookup(void *ctx, struct header_pointers *hdr, bo unsigned long status = ct->status; bpf_ct_release(ct); - if (status & IPS_CONFIRMED_BIT) + if (status & IPS_CONFIRMED) return XDP_PASS; } else if (ct_lookup_opts.error != -ENOENT) { return XDP_ABORTED; } - /* error == -ENOENT || !(status & IPS_CONFIRMED_BIT) */ + /* error == -ENOENT || !(status & IPS_CONFIRMED) */ return XDP_TX; } diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index a350ecdfba4a..74ceb7877ae2 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -27,6 +27,7 @@ #define TEST_TAG_RETVAL_PFX_UNPRIV "comment:test_retval_unpriv=" #define TEST_TAG_AUXILIARY "comment:test_auxiliary" #define TEST_TAG_AUXILIARY_UNPRIV "comment:test_auxiliary_unpriv" +#define TEST_BTF_PATH "comment:test_btf_path=" /* Warning: duplicated in bpf_misc.h */ #define POINTER_VALUE 0xcafe4all @@ -58,6 +59,7 @@ struct test_spec { const char *prog_name; struct test_subspec priv; struct test_subspec unpriv; + const char *btf_custom_path; int log_level; int prog_flags; int mode_mask; @@ -288,6 +290,8 @@ static int parse_test_spec(struct test_loader *tester, goto cleanup; update_flags(&spec->prog_flags, flags, clear); } + } else if (str_has_pfx(s, TEST_BTF_PATH)) { + spec->btf_custom_path = s + sizeof(TEST_BTF_PATH) - 1; } } @@ -578,6 +582,9 @@ void run_subtest(struct test_loader *tester, } } + /* Implicitly reset to NULL if next test case doesn't specify */ + open_opts->btf_custom_path = spec->btf_custom_path; + tobj = bpf_object__open_mem(obj_bytes, obj_byte_cnt, open_opts); if (!ASSERT_OK_PTR(tobj, "obj_open_mem")) /* shouldn't happen */ goto subtest_cleanup; diff --git a/tools/testing/selftests/bpf/test_tunnel.sh b/tools/testing/selftests/bpf/test_tunnel.sh index 2dec7dbf29a2..d9661b9988ba 100755 --- a/tools/testing/selftests/bpf/test_tunnel.sh +++ b/tools/testing/selftests/bpf/test_tunnel.sh @@ -517,90 +517,6 @@ test_ip6ip6() echo -e ${GREEN}"PASS: ip6$TYPE"${NC} } -setup_xfrm_tunnel() -{ - auth=0x$(printf '1%.0s' {1..40}) - enc=0x$(printf '2%.0s' {1..32}) - spi_in_to_out=0x1 - spi_out_to_in=0x2 - # at_ns0 namespace - # at_ns0 -> root - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir out \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip netns exec at_ns0 \ - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip netns exec at_ns0 \ - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir in \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip netns exec at_ns0 \ - ip addr add dev veth0 10.1.1.100/32 - ip netns exec at_ns0 \ - ip route add 10.1.1.200 dev veth0 via 172.16.1.200 \ - src 10.1.1.100 - - # root namespace - # at_ns0 -> root - ip xfrm state add src 172.16.1.100 dst 172.16.1.200 proto esp \ - spi $spi_in_to_out reqid 1 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.100/32 dst 10.1.1.200/32 dir in \ - tmpl src 172.16.1.100 dst 172.16.1.200 proto esp reqid 1 \ - mode tunnel - # root -> at_ns0 - ip xfrm state add src 172.16.1.200 dst 172.16.1.100 proto esp \ - spi $spi_out_to_in reqid 2 mode tunnel \ - auth-trunc 'hmac(sha1)' $auth 96 enc 'cbc(aes)' $enc - ip xfrm policy add src 10.1.1.200/32 dst 10.1.1.100/32 dir out \ - tmpl src 172.16.1.200 dst 172.16.1.100 proto esp reqid 2 \ - mode tunnel - # address & route - ip addr add dev veth1 10.1.1.200/32 - ip route add 10.1.1.100 dev veth1 via 172.16.1.100 src 10.1.1.200 -} - -test_xfrm_tunnel() -{ - if [[ -e /sys/kernel/tracing/trace ]]; then - TRACE=/sys/kernel/tracing/trace - else - TRACE=/sys/kernel/debug/tracing/trace - fi - config_device - > ${TRACE} - setup_xfrm_tunnel - mkdir -p ${BPF_PIN_TUNNEL_DIR} - bpftool prog loadall ${BPF_FILE} ${BPF_PIN_TUNNEL_DIR} - tc qdisc add dev veth1 clsact - tc filter add dev veth1 proto ip ingress bpf da object-pinned \ - ${BPF_PIN_TUNNEL_DIR}/xfrm_get_state - ip netns exec at_ns0 ping $PING_ARG 10.1.1.200 - sleep 1 - grep "reqid 1" ${TRACE} - check_err $? - grep "spi 0x1" ${TRACE} - check_err $? - grep "remote ip 0xac100164" ${TRACE} - check_err $? - cleanup - - if [ $ret -ne 0 ]; then - echo -e ${RED}"FAIL: xfrm tunnel"${NC} - return 1 - fi - echo -e ${GREEN}"PASS: xfrm tunnel"${NC} -} - attach_bpf() { DEV=$1 @@ -630,10 +546,6 @@ cleanup() ip link del ip6geneve11 2> /dev/null ip link del erspan11 2> /dev/null ip link del ip6erspan11 2> /dev/null - ip xfrm policy delete dir out src 10.1.1.200/32 dst 10.1.1.100/32 2> /dev/null - ip xfrm policy delete dir in src 10.1.1.100/32 dst 10.1.1.200/32 2> /dev/null - ip xfrm state delete src 172.16.1.100 dst 172.16.1.200 proto esp spi 0x1 2> /dev/null - ip xfrm state delete src 172.16.1.200 dst 172.16.1.100 proto esp spi 0x2 2> /dev/null } cleanup_exit() @@ -716,10 +628,6 @@ bpf_tunnel_test() test_ip6ip6 errors=$(( $errors + $? )) - echo "Testing IPSec tunnel..." - test_xfrm_tunnel - errors=$(( $errors + $? )) - return $errors } diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 5b7a55136741..35284faff4f2 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -9,6 +9,9 @@ #include <bpf/libbpf.h> #include <time.h> +#define __TO_STR(x) #x +#define TO_STR(x) __TO_STR(x) + int parse_num_list(const char *s, bool **set, int *set_len); __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info); int bpf_prog_test_load(const char *file, enum bpf_prog_type type, diff --git a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c index 319337bdcfc8..9a7b1106fda8 100644 --- a/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c +++ b/tools/testing/selftests/bpf/verifier/atomic_cmpxchg.c @@ -84,17 +84,6 @@ .errstr = "!read_ok", }, { - "Can't use cmpxchg on uninit memory", - .insns = { - BPF_MOV64_IMM(BPF_REG_0, 3), - BPF_MOV64_IMM(BPF_REG_2, 4), - BPF_ATOMIC_OP(BPF_DW, BPF_CMPXCHG, BPF_REG_10, BPF_REG_2, -8), - BPF_EXIT_INSN(), - }, - .result = REJECT, - .errstr = "invalid read from stack", -}, -{ "BPF_W cmpxchg should zero top 32 bits", .insns = { /* r0 = U64_MAX; */ diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c index 3d5cd51071f0..ab25a81fd3a1 100644 --- a/tools/testing/selftests/bpf/verifier/calls.c +++ b/tools/testing/selftests/bpf/verifier/calls.c @@ -1505,7 +1505,9 @@ .prog_type = BPF_PROG_TYPE_XDP, .fixup_map_hash_8b = { 23 }, .result = REJECT, - .errstr = "invalid read from stack R7 off=-16 size=8", + .errstr = "R0 invalid mem access 'scalar'", + .result_unpriv = REJECT, + .errstr_unpriv = "invalid read from stack R7 off=-16 size=8", }, { "calls: two calls that receive map_value via arg=ptr_stack_of_caller. test1", diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 0d84dd1f38b6..8a2ff81d8350 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -140,10 +140,11 @@ .result = REJECT, }, { - "precise: ST insn causing spi > allocated_stack", + "precise: ST zero to stack insn is supported", .insns = { BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), + /* not a register spill, so we stop precision propagation for R4 here */ BPF_ST_MEM(BPF_DW, BPF_REG_3, -8, 0), BPF_LDX_MEM(BPF_DW, BPF_REG_4, BPF_REG_10, -8), BPF_MOV64_IMM(BPF_REG_0, -1), @@ -157,11 +158,11 @@ mark_precise: frame0: last_idx 4 first_idx 2\ mark_precise: frame0: regs=r4 stack= before 4\ mark_precise: frame0: regs=r4 stack= before 3\ - mark_precise: frame0: regs= stack=-8 before 2\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ mark_precise: frame0: last_idx 5 first_idx 5\ - mark_precise: frame0: parent state regs= stack=:", + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 4 first_idx 2\ + mark_precise: frame0: regs=r0 stack= before 4\ + 5: R0=-1 R4=0", .result = VERBOSE_ACCEPT, .retval = -1, }, @@ -169,6 +170,8 @@ "precise: STX insn causing spi > allocated_stack", .insns = { BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_prandom_u32), + /* make later reg spill more interesting by having somewhat known scalar */ + BPF_ALU64_IMM(BPF_AND, BPF_REG_0, 0xff), BPF_MOV64_REG(BPF_REG_3, BPF_REG_10), BPF_JMP_IMM(BPF_JNE, BPF_REG_3, 123, 0), BPF_STX_MEM(BPF_DW, BPF_REG_3, BPF_REG_0, -8), @@ -179,18 +182,21 @@ }, .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, - .errstr = "mark_precise: frame0: last_idx 6 first_idx 6\ + .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs=r4 stack=:\ - mark_precise: frame0: last_idx 5 first_idx 3\ - mark_precise: frame0: regs=r4 stack= before 5\ - mark_precise: frame0: regs=r4 stack= before 4\ - mark_precise: frame0: regs= stack=-8 before 3\ - mark_precise: frame0: falling back to forcing all scalars precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - force_precise: frame0: forcing r0 to be precise\ - mark_precise: frame0: last_idx 6 first_idx 6\ + mark_precise: frame0: last_idx 6 first_idx 4\ + mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ + mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ + mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 3 first_idx 3\ + mark_precise: frame0: regs=r0 stack= before 3: (55) if r3 != 0x7b goto pc+0\ + mark_precise: frame0: regs=r0 stack= before 2: (bf) r3 = r10\ + mark_precise: frame0: regs=r0 stack= before 1: (57) r0 &= 255\ + mark_precise: frame0: parent state regs=r0 stack=:\ + mark_precise: frame0: last_idx 0 first_idx 0\ + mark_precise: frame0: regs=r0 stack= before 0: (85) call bpf_get_prandom_u32#7\ + mark_precise: frame0: last_idx 7 first_idx 7\ mark_precise: frame0: parent state regs= stack=:", .result = VERBOSE_ACCEPT, .retval = -1, diff --git a/tools/testing/selftests/bpf/verify_sig_setup.sh b/tools/testing/selftests/bpf/verify_sig_setup.sh index ba08922b4a27..f2cac42298ba 100755 --- a/tools/testing/selftests/bpf/verify_sig_setup.sh +++ b/tools/testing/selftests/bpf/verify_sig_setup.sh @@ -60,6 +60,27 @@ cleanup() { rm -rf ${tmp_dir} } +fsverity_create_sign_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + sig_file=${tmp_dir}/sig-file + dd if=/dev/urandom of=$data_file bs=1 count=12345 2> /dev/null + fsverity sign --key ${tmp_dir}/signing_key.pem $data_file $sig_file + + # We do not want to enable fsverity on $data_file yet. Try whether + # the file system support fsverity on a different file. + touch ${tmp_dir}/tmp-file + fsverity enable ${tmp_dir}/tmp-file +} + +fsverity_enable_file() { + local tmp_dir="$1" + + data_file=${tmp_dir}/data-file + fsverity enable $data_file +} + catch() { local exit_code="$1" @@ -86,6 +107,10 @@ main() setup "${tmp_dir}" elif [[ "${action}" == "cleanup" ]]; then cleanup "${tmp_dir}" + elif [[ "${action}" == "fsverity-create-sign" ]]; then + fsverity_create_sign_file "${tmp_dir}" + elif [[ "${action}" == "fsverity-enable" ]]; then + fsverity_enable_file "${tmp_dir}" else echo "Unknown action: ${action}" exit 1 diff --git a/tools/testing/selftests/bpf/veristat.c b/tools/testing/selftests/bpf/veristat.c index 1d418d66e375..244d4996e06e 100644 --- a/tools/testing/selftests/bpf/veristat.c +++ b/tools/testing/selftests/bpf/veristat.c @@ -1254,7 +1254,7 @@ static int cmp_join_stat(const struct verif_stats_join *s1, bool asc, bool abs) { const char *str1 = NULL, *str2 = NULL; - double v1, v2; + double v1 = 0.0, v2 = 0.0; int cmp = 0; fetch_join_stat_value(s1, id, var, &str1, &v1); diff --git a/tools/testing/selftests/bpf/xdp_hw_metadata.c b/tools/testing/selftests/bpf/xdp_hw_metadata.c index 3291625ba4fb..878d68db0325 100644 --- a/tools/testing/selftests/bpf/xdp_hw_metadata.c +++ b/tools/testing/selftests/bpf/xdp_hw_metadata.c @@ -21,6 +21,9 @@ #include "xsk.h" #include <error.h> +#include <linux/kernel.h> +#include <linux/bits.h> +#include <linux/bitfield.h> #include <linux/errqueue.h> #include <linux/if_link.h> #include <linux/net_tstamp.h> @@ -79,7 +82,7 @@ static int open_xsk(int ifindex, struct xsk *xsk, __u32 queue_id) .flags = XSK_UMEM__DEFAULT_FLAGS, .tx_metadata_len = sizeof(struct xsk_tx_metadata), }; - __u32 idx; + __u32 idx = 0; u64 addr; int ret; int i; @@ -182,19 +185,31 @@ static void print_tstamp_delta(const char *name, const char *refname, (double)delta / 1000); } +#define VLAN_PRIO_MASK GENMASK(15, 13) /* Priority Code Point */ +#define VLAN_DEI_MASK GENMASK(12, 12) /* Drop Eligible Indicator */ +#define VLAN_VID_MASK GENMASK(11, 0) /* VLAN Identifier */ +static void print_vlan_tci(__u16 tag) +{ + __u16 vlan_id = FIELD_GET(VLAN_VID_MASK, tag); + __u8 pcp = FIELD_GET(VLAN_PRIO_MASK, tag); + bool dei = FIELD_GET(VLAN_DEI_MASK, tag); + + printf("PCP=%u, DEI=%d, VID=0x%X\n", pcp, dei, vlan_id); +} + static void verify_xdp_metadata(void *data, clockid_t clock_id) { struct xdp_meta *meta; meta = data - sizeof(*meta); - if (meta->rx_hash_err < 0) - printf("No rx_hash err=%d\n", meta->rx_hash_err); - else + if (meta->hint_valid & XDP_META_FIELD_RSS) printf("rx_hash: 0x%X with RSS type:0x%X\n", meta->rx_hash, meta->rx_hash_type); + else + printf("No rx_hash, err=%d\n", meta->rx_hash_err); - if (meta->rx_timestamp) { + if (meta->hint_valid & XDP_META_FIELD_TS) { __u64 ref_tstamp = gettime(clock_id); /* store received timestamps to calculate a delta at tx */ @@ -206,7 +221,16 @@ static void verify_xdp_metadata(void *data, clockid_t clock_id) print_tstamp_delta("XDP RX-time", "User RX-time", meta->xdp_timestamp, ref_tstamp); } else { - printf("No rx_timestamp\n"); + printf("No rx_timestamp, err=%d\n", meta->rx_timestamp_err); + } + + if (meta->hint_valid & XDP_META_FIELD_VLAN_TAG) { + printf("rx_vlan_proto: 0x%X\n", ntohs(meta->rx_vlan_proto)); + printf("rx_vlan_tci: "); + print_vlan_tci(meta->rx_vlan_tci); + } else { + printf("No rx_vlan_tci or rx_vlan_proto, err=%d\n", + meta->rx_vlan_tag_err); } } diff --git a/tools/testing/selftests/bpf/xdp_metadata.h b/tools/testing/selftests/bpf/xdp_metadata.h index 938a729bd307..87318ad1117a 100644 --- a/tools/testing/selftests/bpf/xdp_metadata.h +++ b/tools/testing/selftests/bpf/xdp_metadata.h @@ -9,12 +9,44 @@ #define ETH_P_IPV6 0x86DD #endif +#ifndef ETH_P_8021Q +#define ETH_P_8021Q 0x8100 +#endif + +#ifndef ETH_P_8021AD +#define ETH_P_8021AD 0x88A8 +#endif + +#ifndef BIT +#define BIT(nr) (1 << (nr)) +#endif + +/* Non-existent checksum status */ +#define XDP_CHECKSUM_MAGIC BIT(2) + +enum xdp_meta_field { + XDP_META_FIELD_TS = BIT(0), + XDP_META_FIELD_RSS = BIT(1), + XDP_META_FIELD_VLAN_TAG = BIT(2), +}; + struct xdp_meta { - __u64 rx_timestamp; + union { + __u64 rx_timestamp; + __s32 rx_timestamp_err; + }; __u64 xdp_timestamp; __u32 rx_hash; union { __u32 rx_hash_type; __s32 rx_hash_err; }; + union { + struct { + __be16 rx_vlan_proto; + __u16 rx_vlan_tci; + }; + __s32 rx_vlan_tag_err; + }; + enum xdp_meta_field hint_valid; }; diff --git a/tools/testing/selftests/bpf/xskxceiver.c b/tools/testing/selftests/bpf/xskxceiver.c index b604c570309a..b1102ee13faa 100644 --- a/tools/testing/selftests/bpf/xskxceiver.c +++ b/tools/testing/selftests/bpf/xskxceiver.c @@ -634,16 +634,24 @@ static u32 pkt_nb_frags(u32 frame_size, struct pkt_stream *pkt_stream, struct pk return nb_frags; } +static bool set_pkt_valid(int offset, u32 len) +{ + return len <= MAX_ETH_JUMBO_SIZE; +} + static void pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) { pkt->offset = offset; pkt->len = len; - if (len > MAX_ETH_JUMBO_SIZE) { - pkt->valid = false; - } else { - pkt->valid = true; - pkt_stream->nb_valid_entries++; - } + pkt->valid = set_pkt_valid(offset, len); +} + +static void pkt_stream_pkt_set(struct pkt_stream *pkt_stream, struct pkt *pkt, int offset, u32 len) +{ + bool prev_pkt_valid = pkt->valid; + + pkt_set(pkt_stream, pkt, offset, len); + pkt_stream->nb_valid_entries += pkt->valid - prev_pkt_valid; } static u32 pkt_get_buffer_len(struct xsk_umem_info *umem, u32 len) @@ -665,7 +673,7 @@ static struct pkt_stream *__pkt_stream_generate(u32 nb_pkts, u32 pkt_len, u32 nb for (i = 0; i < nb_pkts; i++) { struct pkt *pkt = &pkt_stream->pkts[i]; - pkt_set(pkt_stream, pkt, 0, pkt_len); + pkt_stream_pkt_set(pkt_stream, pkt, 0, pkt_len); pkt->pkt_nb = nb_start + i * nb_off; } @@ -700,10 +708,9 @@ static void __pkt_stream_replace_half(struct ifobject *ifobj, u32 pkt_len, pkt_stream = pkt_stream_clone(ifobj->xsk->pkt_stream); for (i = 1; i < ifobj->xsk->pkt_stream->nb_pkts; i += 2) - pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); + pkt_stream_pkt_set(pkt_stream, &pkt_stream->pkts[i], offset, pkt_len); ifobj->xsk->pkt_stream = pkt_stream; - pkt_stream->nb_valid_entries /= 2; } static void pkt_stream_replace_half(struct test_spec *test, u32 pkt_len, int offset) |