diff options
Diffstat (limited to 'tools')
131 files changed, 5205 insertions, 1125 deletions
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c index cb46667a6b2e..afde9d0c2ea1 100644 --- a/tools/bpf/bpftool/link.c +++ b/tools/bpf/bpftool/link.c @@ -249,18 +249,44 @@ static int get_prog_info(int prog_id, struct bpf_prog_info *info) return err; } -static int cmp_u64(const void *A, const void *B) +struct addr_cookie { + __u64 addr; + __u64 cookie; +}; + +static int cmp_addr_cookie(const void *A, const void *B) { - const __u64 *a = A, *b = B; + const struct addr_cookie *a = A, *b = B; - return *a - *b; + if (a->addr == b->addr) + return 0; + return a->addr < b->addr ? -1 : 1; +} + +static struct addr_cookie * +get_addr_cookie_array(__u64 *addrs, __u64 *cookies, __u32 count) +{ + struct addr_cookie *data; + __u32 i; + + data = calloc(count, sizeof(data[0])); + if (!data) { + p_err("mem alloc failed"); + return NULL; + } + for (i = 0; i < count; i++) { + data[i].addr = addrs[i]; + data[i].cookie = cookies[i]; + } + qsort(data, count, sizeof(data[0]), cmp_addr_cookie); + return data; } static void show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) { + struct addr_cookie *data; __u32 i, j = 0; - __u64 *addrs; jsonw_bool_field(json_wtr, "retprobe", info->kprobe_multi.flags & BPF_F_KPROBE_MULTI_RETURN); @@ -268,14 +294,20 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_uint_field(json_wtr, "missed", info->kprobe_multi.missed); jsonw_name(json_wtr, "funcs"); jsonw_start_array(json_wtr); - addrs = u64_to_ptr(info->kprobe_multi.addrs); - qsort(addrs, info->kprobe_multi.count, sizeof(addrs[0]), cmp_u64); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; /* Load it once for all. */ if (!dd.sym_count) kernel_syms_load(&dd); + if (!dd.sym_count) + goto error; + for (i = 0; i < dd.sym_count; i++) { - if (dd.sym_mapping[i].address != addrs[j]) + if (dd.sym_mapping[i].address != data[j].addr) continue; jsonw_start_object(json_wtr); jsonw_uint_field(json_wtr, "addr", dd.sym_mapping[i].address); @@ -287,11 +319,14 @@ show_kprobe_multi_json(struct bpf_link_info *info, json_writer_t *wtr) } else { jsonw_string_field(json_wtr, "module", dd.sym_mapping[i].module); } + jsonw_uint_field(json_wtr, "cookie", data[j].cookie); jsonw_end_object(json_wtr); if (j++ == info->kprobe_multi.count) break; } jsonw_end_array(json_wtr); +error: + free(data); } static __u64 *u64_to_arr(__u64 val) @@ -334,6 +369,7 @@ show_perf_event_kprobe_json(struct bpf_link_info *info, json_writer_t *wtr) u64_to_ptr(info->perf_event.kprobe.func_name)); jsonw_uint_field(wtr, "offset", info->perf_event.kprobe.offset); jsonw_uint_field(wtr, "missed", info->perf_event.kprobe.missed); + jsonw_uint_field(wtr, "cookie", info->perf_event.kprobe.cookie); } static void @@ -343,6 +379,7 @@ show_perf_event_uprobe_json(struct bpf_link_info *info, json_writer_t *wtr) jsonw_string_field(wtr, "file", u64_to_ptr(info->perf_event.uprobe.file_name)); jsonw_uint_field(wtr, "offset", info->perf_event.uprobe.offset); + jsonw_uint_field(wtr, "cookie", info->perf_event.uprobe.cookie); } static void @@ -350,6 +387,7 @@ show_perf_event_tracepoint_json(struct bpf_link_info *info, json_writer_t *wtr) { jsonw_string_field(wtr, "tracepoint", u64_to_ptr(info->perf_event.tracepoint.tp_name)); + jsonw_uint_field(wtr, "cookie", info->perf_event.tracepoint.cookie); } static char *perf_config_hw_cache_str(__u64 config) @@ -426,6 +464,8 @@ show_perf_event_event_json(struct bpf_link_info *info, json_writer_t *wtr) else jsonw_uint_field(wtr, "event_config", config); + jsonw_uint_field(wtr, "cookie", info->perf_event.event.cookie); + if (type == PERF_TYPE_HW_CACHE && perf_config) free((void *)perf_config); } @@ -670,8 +710,8 @@ void netfilter_dump_plain(const struct bpf_link_info *info) static void show_kprobe_multi_plain(struct bpf_link_info *info) { + struct addr_cookie *data; __u32 i, j = 0; - __u64 *addrs; if (!info->kprobe_multi.count) return; @@ -683,21 +723,24 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) printf("func_cnt %u ", info->kprobe_multi.count); if (info->kprobe_multi.missed) printf("missed %llu ", info->kprobe_multi.missed); - addrs = (__u64 *)u64_to_ptr(info->kprobe_multi.addrs); - qsort(addrs, info->kprobe_multi.count, sizeof(__u64), cmp_u64); + data = get_addr_cookie_array(u64_to_ptr(info->kprobe_multi.addrs), + u64_to_ptr(info->kprobe_multi.cookies), + info->kprobe_multi.count); + if (!data) + return; /* Load it once for all. */ if (!dd.sym_count) kernel_syms_load(&dd); if (!dd.sym_count) - return; + goto error; - printf("\n\t%-16s %s", "addr", "func [module]"); + printf("\n\t%-16s %-16s %s", "addr", "cookie", "func [module]"); for (i = 0; i < dd.sym_count; i++) { - if (dd.sym_mapping[i].address != addrs[j]) + if (dd.sym_mapping[i].address != data[j].addr) continue; - printf("\n\t%016lx %s", - dd.sym_mapping[i].address, dd.sym_mapping[i].name); + printf("\n\t%016lx %-16llx %s", + dd.sym_mapping[i].address, data[j].cookie, dd.sym_mapping[i].name); if (dd.sym_mapping[i].module[0] != '\0') printf(" [%s] ", dd.sym_mapping[i].module); else @@ -706,6 +749,8 @@ static void show_kprobe_multi_plain(struct bpf_link_info *info) if (j++ == info->kprobe_multi.count) break; } +error: + free(data); } static void show_uprobe_multi_plain(struct bpf_link_info *info) @@ -754,6 +799,8 @@ static void show_perf_event_kprobe_plain(struct bpf_link_info *info) printf("+%#x", info->perf_event.kprobe.offset); if (info->perf_event.kprobe.missed) printf(" missed %llu", info->perf_event.kprobe.missed); + if (info->perf_event.kprobe.cookie) + printf(" cookie %llu", info->perf_event.kprobe.cookie); printf(" "); } @@ -770,6 +817,8 @@ static void show_perf_event_uprobe_plain(struct bpf_link_info *info) else printf("\n\tuprobe "); printf("%s+%#x ", buf, info->perf_event.uprobe.offset); + if (info->perf_event.uprobe.cookie) + printf("cookie %llu ", info->perf_event.uprobe.cookie); } static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) @@ -781,6 +830,8 @@ static void show_perf_event_tracepoint_plain(struct bpf_link_info *info) return; printf("\n\ttracepoint %s ", buf); + if (info->perf_event.tracepoint.cookie) + printf("cookie %llu ", info->perf_event.tracepoint.cookie); } static void show_perf_event_event_plain(struct bpf_link_info *info) @@ -802,6 +853,9 @@ static void show_perf_event_event_plain(struct bpf_link_info *info) else printf("%llu ", config); + if (info->perf_event.event.cookie) + printf("cookie %llu ", info->perf_event.event.cookie); + if (type == PERF_TYPE_HW_CACHE && perf_config) free((void *)perf_config); } @@ -952,6 +1006,14 @@ again: return -ENOMEM; } info.kprobe_multi.addrs = ptr_to_u64(addrs); + cookies = calloc(count, sizeof(__u64)); + if (!cookies) { + p_err("mem alloc failed"); + free(addrs); + close(fd); + return -ENOMEM; + } + info.kprobe_multi.cookies = ptr_to_u64(cookies); goto again; } } @@ -977,7 +1039,7 @@ again: cookies = calloc(count, sizeof(__u64)); if (!cookies) { p_err("mem alloc failed"); - free(cookies); + free(ref_ctr_offsets); free(offsets); close(fd); return -ENOMEM; diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c index feb8e305804f..9cb42a3366c0 100644 --- a/tools/bpf/bpftool/prog.c +++ b/tools/bpf/bpftool/prog.c @@ -2298,7 +2298,7 @@ static int profile_open_perf_events(struct profiler_bpf *obj) int map_fd; profile_perf_events = calloc( - sizeof(int), obj->rodata->num_cpu * obj->rodata->num_metric); + obj->rodata->num_cpu * obj->rodata->num_metric, sizeof(int)); if (!profile_perf_events) { p_err("failed to allocate memory for perf_event array: %s", strerror(errno)); diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h index 7f24d898efbb..d96708380e52 100644 --- a/tools/include/uapi/linux/bpf.h +++ b/tools/include/uapi/linux/bpf.h @@ -847,6 +847,36 @@ union bpf_iter_link_info { * Returns zero on success. On error, -1 is returned and *errno* * is set appropriately. * + * BPF_TOKEN_CREATE + * Description + * Create BPF token with embedded information about what + * BPF-related functionality it allows: + * - a set of allowed bpf() syscall commands; + * - a set of allowed BPF map types to be created with + * BPF_MAP_CREATE command, if BPF_MAP_CREATE itself is allowed; + * - a set of allowed BPF program types and BPF program attach + * types to be loaded with BPF_PROG_LOAD command, if + * BPF_PROG_LOAD itself is allowed. + * + * BPF token is created (derived) from an instance of BPF FS, + * assuming it has necessary delegation mount options specified. + * This BPF token can be passed as an extra parameter to various + * bpf() syscall commands to grant BPF subsystem functionality to + * unprivileged processes. + * + * When created, BPF token is "associated" with the owning + * user namespace of BPF FS instance (super block) that it was + * derived from, and subsequent BPF operations performed with + * BPF token would be performing capabilities checks (i.e., + * CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN, CAP_SYS_ADMIN) within + * that user namespace. Without BPF token, such capabilities + * have to be granted in init user namespace, making bpf() + * syscall incompatible with user namespace, for the most part. + * + * Return + * A new file descriptor (a nonnegative integer), or -1 if an + * error occurred (in which case, *errno* is set appropriately). + * * NOTES * eBPF objects (maps and programs) can be shared between processes. * @@ -901,6 +931,8 @@ enum bpf_cmd { BPF_ITER_CREATE, BPF_LINK_DETACH, BPF_PROG_BIND_MAP, + BPF_TOKEN_CREATE, + __MAX_BPF_CMD, }; enum bpf_map_type { @@ -951,6 +983,7 @@ enum bpf_map_type { BPF_MAP_TYPE_BLOOM_FILTER, BPF_MAP_TYPE_USER_RINGBUF, BPF_MAP_TYPE_CGRP_STORAGE, + __MAX_BPF_MAP_TYPE }; /* Note that tracing related programs such as @@ -995,6 +1028,7 @@ enum bpf_prog_type { BPF_PROG_TYPE_SK_LOOKUP, BPF_PROG_TYPE_SYSCALL, /* a program that can execute syscalls */ BPF_PROG_TYPE_NETFILTER, + __MAX_BPF_PROG_TYPE }; enum bpf_attach_type { @@ -1330,6 +1364,12 @@ enum { /* Get path from provided FD in BPF_OBJ_PIN/BPF_OBJ_GET commands */ BPF_F_PATH_FD = (1U << 14), + +/* Flag for value_type_btf_obj_fd, the fd is available */ + BPF_F_VTYPE_BTF_OBJ_FD = (1U << 15), + +/* BPF token FD is passed in a corresponding command's token_fd field */ + BPF_F_TOKEN_FD = (1U << 16), }; /* Flags for BPF_PROG_QUERY. */ @@ -1403,6 +1443,15 @@ union bpf_attr { * to using 5 hash functions). */ __u64 map_extra; + + __s32 value_type_btf_obj_fd; /* fd pointing to a BTF + * type data for + * btf_vmlinux_value_type_id. + */ + /* BPF token FD to use with BPF_MAP_CREATE operation. + * If provided, map_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 map_token_fd; }; struct { /* anonymous struct used by BPF_MAP_*_ELEM commands */ @@ -1472,6 +1521,10 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 log_true_size; + /* BPF token FD to use with BPF_PROG_LOAD operation. + * If provided, prog_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 prog_token_fd; }; struct { /* anonymous struct used by BPF_OBJ_* commands */ @@ -1584,6 +1637,11 @@ union bpf_attr { * truncated), or smaller (if log buffer wasn't filled completely). */ __u32 btf_log_true_size; + __u32 btf_flags; + /* BPF token FD to use with BPF_BTF_LOAD operation. + * If provided, btf_flags should have BPF_F_TOKEN_FD flag set. + */ + __s32 btf_token_fd; }; struct { @@ -1714,6 +1772,11 @@ union bpf_attr { __u32 flags; /* extra flags */ } prog_bind_map; + struct { /* struct used by BPF_TOKEN_CREATE command */ + __u32 flags; + __u32 bpffs_fd; + } token_create; + } __attribute__((aligned(8))); /* The description below is an attempt at providing documentation to eBPF @@ -4839,9 +4902,9 @@ union bpf_attr { * going through the CPU's backlog queue. * * The *flags* argument is reserved and must be 0. The helper is - * currently only supported for tc BPF program types at the ingress - * hook and for veth device types. The peer device must reside in a - * different network namespace. + * currently only supported for tc BPF program types at the + * ingress hook and for veth and netkit target device types. The + * peer device must reside in a different network namespace. * Return * The helper returns **TC_ACT_REDIRECT** on success or * **TC_ACT_SHOT** on error. @@ -6487,7 +6550,7 @@ struct bpf_map_info { __u32 btf_id; __u32 btf_key_type_id; __u32 btf_value_type_id; - __u32 :32; /* alignment pad */ + __u32 btf_vmlinux_id; __u64 map_extra; } __attribute__((aligned(8))); @@ -6563,6 +6626,7 @@ struct bpf_link_info { __u32 count; /* in/out: kprobe_multi function count */ __u32 flags; __u64 missed; + __aligned_u64 cookies; } kprobe_multi; struct { __aligned_u64 path; @@ -6582,6 +6646,7 @@ struct bpf_link_info { __aligned_u64 file_name; /* in/out */ __u32 name_len; __u32 offset; /* offset from file_name */ + __u64 cookie; } uprobe; /* BPF_PERF_EVENT_UPROBE, BPF_PERF_EVENT_URETPROBE */ struct { __aligned_u64 func_name; /* in/out */ @@ -6589,14 +6654,19 @@ struct bpf_link_info { __u32 offset; /* offset from func_name */ __u64 addr; __u64 missed; + __u64 cookie; } kprobe; /* BPF_PERF_EVENT_KPROBE, BPF_PERF_EVENT_KRETPROBE */ struct { __aligned_u64 tp_name; /* in/out */ __u32 name_len; + __u32 :32; + __u64 cookie; } tracepoint; /* BPF_PERF_EVENT_TRACEPOINT */ struct { __u64 config; __u32 type; + __u32 :32; + __u64 cookie; } event; /* BPF_PERF_EVENT_EVENT */ }; } perf_event; @@ -6904,6 +6974,7 @@ enum { BPF_TCP_LISTEN, BPF_TCP_CLOSING, /* Now a valid state */ BPF_TCP_NEW_SYN_RECV, + BPF_TCP_BOUND_INACTIVE, BPF_TCP_MAX_STATES /* Leave at the end! */ }; diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h index a0aa05a28cf2..f0d71b2a3f1e 100644 --- a/tools/include/uapi/linux/if_link.h +++ b/tools/include/uapi/linux/if_link.h @@ -974,6 +974,7 @@ enum { IFLA_BOND_AD_LACP_ACTIVE, IFLA_BOND_MISSED_MAX, IFLA_BOND_NS_IP6_TARGET, + IFLA_BOND_COUPLED_CONTROL, __IFLA_BOND_MAX, }; diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 2d0c282c8588..b6619199a706 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1,4 +1,4 @@ libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \ netlink.o bpf_prog_linfo.o libbpf_probes.o hashmap.o \ btf_dump.o ringbuf.o strset.o linker.o gen_loader.o relo_core.o \ - usdt.o zip.o elf.o + usdt.o zip.o elf.o features.o diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 9dc9625651dc..97ec005c3c47 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -103,7 +103,7 @@ int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts) * [0] https://lore.kernel.org/bpf/20201201215900.3569844-1-guro@fb.com/ * [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") */ -int probe_memcg_account(void) +int probe_memcg_account(int token_fd) { const size_t attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); struct bpf_insn insns[] = { @@ -120,6 +120,9 @@ int probe_memcg_account(void) attr.insns = ptr_to_u64(insns); attr.insn_cnt = insn_cnt; attr.license = ptr_to_u64("GPL"); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, attr_sz); if (prog_fd >= 0) { @@ -146,7 +149,7 @@ int bump_rlimit_memlock(void) struct rlimit rlim; /* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ - if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) + if (memlock_bumped || feat_supported(NULL, FEAT_MEMCG_ACCOUNT)) return 0; memlock_bumped = true; @@ -169,7 +172,7 @@ int bpf_map_create(enum bpf_map_type map_type, __u32 max_entries, const struct bpf_map_create_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, map_extra); + const size_t attr_sz = offsetofend(union bpf_attr, map_token_fd); union bpf_attr attr; int fd; @@ -181,7 +184,7 @@ int bpf_map_create(enum bpf_map_type map_type, return libbpf_err(-EINVAL); attr.map_type = map_type; - if (map_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (map_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); attr.key_size = key_size; attr.value_size = value_size; @@ -191,6 +194,7 @@ int bpf_map_create(enum bpf_map_type map_type, attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + attr.value_type_btf_obj_fd = OPTS_GET(opts, value_type_btf_obj_fd, 0); attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); attr.map_flags = OPTS_GET(opts, map_flags, 0); @@ -198,6 +202,8 @@ int bpf_map_create(enum bpf_map_type map_type, attr.numa_node = OPTS_GET(opts, numa_node, 0); attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + attr.map_token_fd = OPTS_GET(opts, token_fd, 0); + fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz); return libbpf_err_errno(fd); } @@ -232,7 +238,7 @@ int bpf_prog_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, size_t insn_cnt, struct bpf_prog_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, prog_token_fd); void *finfo = NULL, *linfo = NULL; const char *func_info, *line_info; __u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; @@ -261,8 +267,9 @@ int bpf_prog_load(enum bpf_prog_type prog_type, attr.prog_flags = OPTS_GET(opts, prog_flags, 0); attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); attr.kern_version = OPTS_GET(opts, kern_version, 0); + attr.prog_token_fd = OPTS_GET(opts, token_fd, 0); - if (prog_name && kernel_supports(NULL, FEAT_PROG_NAME)) + if (prog_name && feat_supported(NULL, FEAT_PROG_NAME)) libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); attr.license = ptr_to_u64(license); @@ -1182,7 +1189,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts) { - const size_t attr_sz = offsetofend(union bpf_attr, btf_log_true_size); + const size_t attr_sz = offsetofend(union bpf_attr, btf_token_fd); union bpf_attr attr; char *log_buf; size_t log_size; @@ -1207,6 +1214,10 @@ int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts attr.btf = ptr_to_u64(btf_data); attr.btf_size = btf_size; + + attr.btf_flags = OPTS_GET(opts, btf_flags, 0); + attr.btf_token_fd = OPTS_GET(opts, token_fd, 0); + /* log_level == 0 and log_buf != NULL means "try loading without * log_buf, but retry with log_buf and log_level=1 on error", which is * consistent across low-level and high-level BTF and program loading @@ -1287,3 +1298,20 @@ int bpf_prog_bind_map(int prog_fd, int map_fd, ret = sys_bpf(BPF_PROG_BIND_MAP, &attr, attr_sz); return libbpf_err_errno(ret); } + +int bpf_token_create(int bpffs_fd, struct bpf_token_create_opts *opts) +{ + const size_t attr_sz = offsetofend(union bpf_attr, token_create); + union bpf_attr attr; + int fd; + + if (!OPTS_VALID(opts, bpf_token_create_opts)) + return libbpf_err(-EINVAL); + + memset(&attr, 0, attr_sz); + attr.token_create.bpffs_fd = bpffs_fd; + attr.token_create.flags = OPTS_GET(opts, flags, 0); + + fd = sys_bpf_fd(BPF_TOKEN_CREATE, &attr, attr_sz); + return libbpf_err_errno(fd); +} diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index d0f53772bdc0..1441f642c563 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -51,8 +51,12 @@ struct bpf_map_create_opts { __u32 numa_node; __u32 map_ifindex; + __s32 value_type_btf_obj_fd; + + __u32 token_fd; + size_t :0; }; -#define bpf_map_create_opts__last_field map_ifindex +#define bpf_map_create_opts__last_field token_fd LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, const char *map_name, @@ -102,9 +106,10 @@ struct bpf_prog_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + __u32 token_fd; size_t :0; }; -#define bpf_prog_load_opts__last_field log_true_size +#define bpf_prog_load_opts__last_field token_fd LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, const char *prog_name, const char *license, @@ -130,9 +135,12 @@ struct bpf_btf_load_opts { * If kernel doesn't support this feature, log_size is left unchanged. */ __u32 log_true_size; + + __u32 btf_flags; + __u32 token_fd; size_t :0; }; -#define bpf_btf_load_opts__last_field log_true_size +#define bpf_btf_load_opts__last_field token_fd LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, struct bpf_btf_load_opts *opts); @@ -640,6 +648,30 @@ struct bpf_test_run_opts { LIBBPF_API int bpf_prog_test_run_opts(int prog_fd, struct bpf_test_run_opts *opts); +struct bpf_token_create_opts { + size_t sz; /* size of this struct for forward/backward compatibility */ + __u32 flags; + size_t :0; +}; +#define bpf_token_create_opts__last_field flags + +/** + * @brief **bpf_token_create()** creates a new instance of BPF token derived + * from specified BPF FS mount point. + * + * BPF token created with this API can be passed to bpf() syscall for + * commands like BPF_PROG_LOAD, BPF_MAP_CREATE, etc. + * + * @param bpffs_fd FD for BPF FS instance from which to derive a BPF token + * instance. + * @param opts optional BPF token creation options, can be NULL + * + * @return BPF token FD > 0, on success; negative error code, otherwise (errno + * is also set to the error code) + */ +LIBBPF_API int bpf_token_create(int bpffs_fd, + struct bpf_token_create_opts *opts); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h index 7325a12692a3..5aec301e9585 100644 --- a/tools/lib/bpf/bpf_core_read.h +++ b/tools/lib/bpf/bpf_core_read.h @@ -268,7 +268,7 @@ enum bpf_enum_value_kind { * a relocation, which records BTF type ID describing root struct/union and an * accessor string which describes exact embedded field that was used to take * an address. See detailed description of this relocation format and - * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * semantics in comments to struct bpf_core_relo in include/uapi/linux/bpf.h. * * This relocation allows libbpf to adjust BPF instruction to use correct * actual field offset, based on target kernel BTF type that matches original diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index ee95fd379d4d..ec92b87cae01 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1317,7 +1317,9 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf) static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level) +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd) { LIBBPF_OPTS(bpf_btf_load_opts, opts); __u32 buf_sz = 0, raw_size; @@ -1367,6 +1369,10 @@ retry_load: opts.log_level = log_level; } + opts.token_fd = token_fd; + if (token_fd) + opts.btf_flags |= BPF_F_TOKEN_FD; + btf->fd = bpf_btf_load(raw_data, raw_size, &opts); if (btf->fd < 0) { /* time to turn on verbose mode and try again */ @@ -1394,7 +1400,7 @@ done: int btf__load_into_kernel(struct btf *btf) { - return btf_load_into_kernel(btf, NULL, 0, 0); + return btf_load_into_kernel(btf, NULL, 0, 0, 0); } int btf__fd(const struct btf *btf) diff --git a/tools/lib/bpf/elf.c b/tools/lib/bpf/elf.c index b02faec748a5..c92e02394159 100644 --- a/tools/lib/bpf/elf.c +++ b/tools/lib/bpf/elf.c @@ -11,8 +11,6 @@ #include "libbpf_internal.h" #include "str_error.h" -#define STRERR_BUFSIZE 128 - /* A SHT_GNU_versym section holds 16-bit words. This bit is set if * the symbol is hidden and can only be seen when referenced using an * explicit version number. This is a GNU extension. diff --git a/tools/lib/bpf/features.c b/tools/lib/bpf/features.c new file mode 100644 index 000000000000..5a5c766bf615 --- /dev/null +++ b/tools/lib/bpf/features.c @@ -0,0 +1,503 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#include <linux/kernel.h> +#include <linux/filter.h> +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_common.h" +#include "libbpf_internal.h" +#include "str_error.h" + +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64)(unsigned long)ptr; +} + +int probe_fd(int fd) +{ + if (fd >= 0) + close(fd); + return fd >= 0; +} + +static int probe_kern_prog_name(int token_fd) +{ + const size_t attr_sz = offsetofend(union bpf_attr, prog_name); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + union bpf_attr attr; + int ret; + + memset(&attr, 0, attr_sz); + attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; + attr.license = ptr_to_u64("GPL"); + attr.insns = ptr_to_u64(insns); + attr.insn_cnt = (__u32)ARRAY_SIZE(insns); + attr.prog_token_fd = token_fd; + if (token_fd) + attr.prog_flags |= BPF_F_TOKEN_FD; + libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); + + /* make sure loading with name works */ + ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); + return probe_fd(ret); +} + +static int probe_kern_global_data(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), + BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, map, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + insns[0].imm = map; + + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + close(map); + return probe_fd(ret); +} + +static int probe_kern_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_func_global(int token_fd) +{ + static const char strs[] = "\0int\0x\0a"; + /* static void x(int a) {} */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* FUNC_PROTO */ /* [2] */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), + BTF_PARAM_ENC(7, 1), + /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ + BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_datasec(int token_fd) +{ + static const char strs[] = "\0x\0.data"; + /* static int a; */ + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* DATASEC val */ /* [3] */ + BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), + BTF_VAR_SECINFO_ENC(2, 0, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_float(int token_fd) +{ + static const char strs[] = "\0float"; + __u32 types[] = { + /* float */ + BTF_TYPE_FLOAT_ENC(1, 4), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_decl_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* VAR x */ /* [2] */ + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), + BTF_VAR_STATIC, + /* attr */ + BTF_TYPE_DECL_TAG_ENC(1, 2, -1), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_btf_type_tag(int token_fd) +{ + static const char strs[] = "\0tag"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ + /* attr */ + BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ + /* ptr */ + BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +static int probe_kern_array_mmap(int token_fd) +{ + LIBBPF_OPTS(bpf_map_create_opts, opts, + .map_flags = BPF_F_MMAPABLE | (token_fd ? BPF_F_TOKEN_FD : 0), + .token_fd = token_fd, + ); + int fd; + + fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); + return probe_fd(fd); +} + +static int probe_kern_exp_attach_type(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + /* use any valid combination of program type and (optional) + * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) + * to see if kernel supports expected_attach_type field for + * BPF_PROG_LOAD command + */ + fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_kern_probe_read_kernel(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + struct bpf_insn insns[] = { + BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ + BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ + BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ + BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), + BPF_EXIT_INSN(), + }; + int fd, insn_cnt = ARRAY_SIZE(insns); + + fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(fd); +} + +static int probe_prog_bind_map(int token_fd) +{ + char *cp, errmsg[STRERR_BUFSIZE]; + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_map_create_opts, map_opts, + .token_fd = token_fd, + .map_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); + + map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, &map_opts); + if (map < 0) { + ret = -errno; + cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); + pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", + __func__, cp, -ret); + return ret; + } + + prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &prog_opts); + if (prog < 0) { + close(map); + return 0; + } + + ret = bpf_prog_bind_map(prog, map, NULL); + + close(map); + close(prog); + + return ret >= 0; +} + +static int probe_module_btf(int token_fd) +{ + static const char strs[] = "\0int"; + __u32 types[] = { + /* int */ + BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), + }; + struct bpf_btf_info info; + __u32 len = sizeof(info); + char name[16]; + int fd, err; + + fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), token_fd); + if (fd < 0) + return 0; /* BTF not supported at all */ + + memset(&info, 0, sizeof(info)); + info.name = ptr_to_u64(name); + info.name_len = sizeof(name); + + /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; + * kernel's module BTF support coincides with support for + * name/name_len fields in struct bpf_btf_info. + */ + err = bpf_btf_get_info_by_fd(fd, &info, &len); + close(fd); + return !err; +} + +static int probe_perf_link(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int prog_fd, link_fd, err; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", + insns, ARRAY_SIZE(insns), &opts); + if (prog_fd < 0) + return -errno; + + /* use invalid perf_event FD to get EBADF, if link is supported; + * otherwise EINVAL should be returned + */ + link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_uprobe_multi_link(int token_fd) +{ + LIBBPF_OPTS(bpf_prog_load_opts, load_opts, + .expected_attach_type = BPF_TRACE_UPROBE_MULTI, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + LIBBPF_OPTS(bpf_link_create_opts, link_opts); + struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int prog_fd, link_fd, err; + unsigned long offset = 0; + + prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", + insns, ARRAY_SIZE(insns), &load_opts); + if (prog_fd < 0) + return -errno; + + /* Creating uprobe in '/' binary should fail with -EBADF. */ + link_opts.uprobe_multi.path = "/"; + link_opts.uprobe_multi.offsets = &offset; + link_opts.uprobe_multi.cnt = 1; + + link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); + err = -errno; /* close() can clobber errno */ + + if (link_fd >= 0) + close(link_fd); + close(prog_fd); + + return link_fd < 0 && err == -EBADF; +} + +static int probe_kern_bpf_cookie(int token_fd) +{ + struct bpf_insn insns[] = { + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), + BPF_EXIT_INSN(), + }; + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = token_fd, + .prog_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); + int ret, insn_cnt = ARRAY_SIZE(insns); + + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); + return probe_fd(ret); +} + +static int probe_kern_btf_enum64(int token_fd) +{ + static const char strs[] = "\0enum64"; + __u32 types[] = { + BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), + }; + + return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), + strs, sizeof(strs), token_fd)); +} + +typedef int (*feature_probe_fn)(int /* token_fd */); + +static struct kern_feature_cache feature_cache; + +static struct kern_feature_desc { + const char *desc; + feature_probe_fn probe; +} feature_probes[__FEAT_CNT] = { + [FEAT_PROG_NAME] = { + "BPF program name", probe_kern_prog_name, + }, + [FEAT_GLOBAL_DATA] = { + "global variables", probe_kern_global_data, + }, + [FEAT_BTF] = { + "minimal BTF", probe_kern_btf, + }, + [FEAT_BTF_FUNC] = { + "BTF functions", probe_kern_btf_func, + }, + [FEAT_BTF_GLOBAL_FUNC] = { + "BTF global function", probe_kern_btf_func_global, + }, + [FEAT_BTF_DATASEC] = { + "BTF data section and variable", probe_kern_btf_datasec, + }, + [FEAT_ARRAY_MMAP] = { + "ARRAY map mmap()", probe_kern_array_mmap, + }, + [FEAT_EXP_ATTACH_TYPE] = { + "BPF_PROG_LOAD expected_attach_type attribute", + probe_kern_exp_attach_type, + }, + [FEAT_PROBE_READ_KERN] = { + "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, + }, + [FEAT_PROG_BIND_MAP] = { + "BPF_PROG_BIND_MAP support", probe_prog_bind_map, + }, + [FEAT_MODULE_BTF] = { + "module BTF support", probe_module_btf, + }, + [FEAT_BTF_FLOAT] = { + "BTF_KIND_FLOAT support", probe_kern_btf_float, + }, + [FEAT_PERF_LINK] = { + "BPF perf link support", probe_perf_link, + }, + [FEAT_BTF_DECL_TAG] = { + "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, + }, + [FEAT_BTF_TYPE_TAG] = { + "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, + }, + [FEAT_MEMCG_ACCOUNT] = { + "memcg-based memory accounting", probe_memcg_account, + }, + [FEAT_BPF_COOKIE] = { + "BPF cookie support", probe_kern_bpf_cookie, + }, + [FEAT_BTF_ENUM64] = { + "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, + }, + [FEAT_SYSCALL_WRAPPER] = { + "Kernel using syscall wrapper", probe_kern_syscall_wrapper, + }, + [FEAT_UPROBE_MULTI_LINK] = { + "BPF multi-uprobe link support", probe_uprobe_multi_link, + }, +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id) +{ + struct kern_feature_desc *feat = &feature_probes[feat_id]; + int ret; + + /* assume global feature cache, unless custom one is provided */ + if (!cache) + cache = &feature_cache; + + if (READ_ONCE(cache->res[feat_id]) == FEAT_UNKNOWN) { + ret = feat->probe(cache->token_fd); + if (ret > 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_SUPPORTED); + } else if (ret == 0) { + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } else { + pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); + WRITE_ONCE(cache->res[feat_id], FEAT_MISSING); + } + } + + return READ_ONCE(cache->res[feat_id]) == FEAT_SUPPORTED; +} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index afd09571c482..fa7094ff3e66 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -59,6 +59,8 @@ #define BPF_FS_MAGIC 0xcafe4a11 #endif +#define BPF_FS_DEFAULT_PATH "/sys/fs/bpf" + #define BPF_INSN_SZ (sizeof(struct bpf_insn)) /* vsprintf() in __base_pr() uses nonliteral format string. It may break @@ -70,6 +72,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj); static bool prog_is_subprog(const struct bpf_object *obj, const struct bpf_program *prog); +static int map_set_def_max_entries(struct bpf_map *map); static const char * const attach_type_name[] = { [BPF_CGROUP_INET_INGRESS] = "cgroup_inet_ingress", @@ -527,6 +530,7 @@ struct bpf_map { struct bpf_map_def def; __u32 numa_node; __u32 btf_var_idx; + int mod_btf_fd; __u32 btf_key_type_id; __u32 btf_value_type_id; __u32 btf_vmlinux_value_type_id; @@ -693,6 +697,10 @@ struct bpf_object { struct usdt_manager *usdt_man; + struct kern_feature_cache *feat_cache; + char *token_path; + int token_fd; + char path[]; }; @@ -930,22 +938,29 @@ find_member_by_name(const struct btf *btf, const struct btf_type *t, return NULL; } +static int find_ksym_btf_id(struct bpf_object *obj, const char *ksym_name, + __u16 kind, struct btf **res_btf, + struct module_btf **res_mod_btf); + #define STRUCT_OPS_VALUE_PREFIX "bpf_struct_ops_" static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix, const char *name, __u32 kind); static int -find_struct_ops_kern_types(const struct btf *btf, const char *tname, +find_struct_ops_kern_types(struct bpf_object *obj, const char *tname, + struct module_btf **mod_btf, const struct btf_type **type, __u32 *type_id, const struct btf_type **vtype, __u32 *vtype_id, const struct btf_member **data_member) { const struct btf_type *kern_type, *kern_vtype; const struct btf_member *kern_data_member; + struct btf *btf; __s32 kern_vtype_id, kern_type_id; __u32 i; - kern_type_id = btf__find_by_name_kind(btf, tname, BTF_KIND_STRUCT); + kern_type_id = find_ksym_btf_id(obj, tname, BTF_KIND_STRUCT, + &btf, mod_btf); if (kern_type_id < 0) { pr_warn("struct_ops init_kern: struct %s is not found in kernel BTF\n", tname); @@ -999,14 +1014,16 @@ static bool bpf_map__is_struct_ops(const struct bpf_map *map) } /* Init the map's fields that depend on kern_btf */ -static int bpf_map__init_kern_struct_ops(struct bpf_map *map, - const struct btf *btf, - const struct btf *kern_btf) +static int bpf_map__init_kern_struct_ops(struct bpf_map *map) { const struct btf_member *member, *kern_member, *kern_data_member; const struct btf_type *type, *kern_type, *kern_vtype; __u32 i, kern_type_id, kern_vtype_id, kern_data_off; + struct bpf_object *obj = map->obj; + const struct btf *btf = obj->btf; struct bpf_struct_ops *st_ops; + const struct btf *kern_btf; + struct module_btf *mod_btf; void *data, *kern_data; const char *tname; int err; @@ -1014,16 +1031,19 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, st_ops = map->st_ops; type = st_ops->type; tname = st_ops->tname; - err = find_struct_ops_kern_types(kern_btf, tname, + err = find_struct_ops_kern_types(obj, tname, &mod_btf, &kern_type, &kern_type_id, &kern_vtype, &kern_vtype_id, &kern_data_member); if (err) return err; + kern_btf = mod_btf ? mod_btf->btf : obj->btf_vmlinux; + pr_debug("struct_ops init_kern %s: type_id:%u kern_type_id:%u kern_vtype_id:%u\n", map->name, st_ops->type_id, kern_type_id, kern_vtype_id); + map->mod_btf_fd = mod_btf ? mod_btf->fd : -1; map->def.value_size = kern_vtype->size; map->btf_vmlinux_value_type_id = kern_vtype_id; @@ -1099,6 +1119,8 @@ static int bpf_map__init_kern_struct_ops(struct bpf_map *map, return -ENOTSUP; } + if (mod_btf) + prog->attach_btf_obj_fd = mod_btf->fd; prog->attach_btf_id = kern_type_id; prog->expected_attach_type = kern_member_idx; @@ -1141,8 +1163,7 @@ static int bpf_object__init_kern_struct_ops_maps(struct bpf_object *obj) if (!bpf_map__is_struct_ops(map)) continue; - err = bpf_map__init_kern_struct_ops(map, obj->btf, - obj->btf_vmlinux); + err = bpf_map__init_kern_struct_ops(map); if (err) return err; } @@ -2216,7 +2237,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path) int err; if (!path) - path = "/sys/fs/bpf"; + path = BPF_FS_DEFAULT_PATH; err = pathname_concat(buf, sizeof(buf), path, bpf_map__name(map)); if (err) @@ -3225,7 +3246,7 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj) } else { /* currently BPF_BTF_LOAD only supports log_level 1 */ err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, - obj->log_level ? 1 : 0); + obj->log_level ? 1 : 0, obj->token_fd); } if (sanitize) { if (!err) { @@ -4546,6 +4567,58 @@ int bpf_map__set_max_entries(struct bpf_map *map, __u32 max_entries) return 0; } +static int bpf_object_prepare_token(struct bpf_object *obj) +{ + const char *bpffs_path; + int bpffs_fd = -1, token_fd, err; + bool mandatory; + enum libbpf_print_level level; + + /* token is explicitly prevented */ + if (obj->token_path && obj->token_path[0] == '\0') { + pr_debug("object '%s': token is prevented, skipping...\n", obj->name); + return 0; + } + + mandatory = obj->token_path != NULL; + level = mandatory ? LIBBPF_WARN : LIBBPF_DEBUG; + + bpffs_path = obj->token_path ?: BPF_FS_DEFAULT_PATH; + bpffs_fd = open(bpffs_path, O_DIRECTORY, O_RDWR); + if (bpffs_fd < 0) { + err = -errno; + __pr(level, "object '%s': failed (%d) to open BPF FS mount at '%s'%s\n", + obj->name, err, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? err : 0; + } + + token_fd = bpf_token_create(bpffs_fd, 0); + close(bpffs_fd); + if (token_fd < 0) { + if (!mandatory && token_fd == -ENOENT) { + pr_debug("object '%s': BPF FS at '%s' doesn't have BPF token delegation set up, skipping...\n", + obj->name, bpffs_path); + return 0; + } + __pr(level, "object '%s': failed (%d) to create BPF token from '%s'%s\n", + obj->name, token_fd, bpffs_path, + mandatory ? "" : ", skipping optional step..."); + return mandatory ? token_fd : 0; + } + + obj->feat_cache = calloc(1, sizeof(*obj->feat_cache)); + if (!obj->feat_cache) { + close(token_fd); + return -ENOMEM; + } + + obj->token_fd = token_fd; + obj->feat_cache->token_fd = token_fd; + + return 0; +} + static int bpf_object__probe_loading(struct bpf_object *obj) { @@ -4555,6 +4628,10 @@ bpf_object__probe_loading(struct bpf_object *obj) BPF_EXIT_INSN(), }; int ret, insn_cnt = ARRAY_SIZE(insns); + LIBBPF_OPTS(bpf_prog_load_opts, opts, + .token_fd = obj->token_fd, + .prog_flags = obj->token_fd ? BPF_F_TOKEN_FD : 0, + ); if (obj->gen_loader) return 0; @@ -4564,9 +4641,9 @@ bpf_object__probe_loading(struct bpf_object *obj) pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); /* make sure basic loading works */ - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) - ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); + ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, &opts); if (ret < 0) { ret = errno; cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4581,462 +4658,18 @@ bpf_object__probe_loading(struct bpf_object *obj) return 0; } -static int probe_fd(int fd) -{ - if (fd >= 0) - close(fd); - return fd >= 0; -} - -static int probe_kern_prog_name(void) -{ - const size_t attr_sz = offsetofend(union bpf_attr, prog_name); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - union bpf_attr attr; - int ret; - - memset(&attr, 0, attr_sz); - attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; - attr.license = ptr_to_u64("GPL"); - attr.insns = ptr_to_u64(insns); - attr.insn_cnt = (__u32)ARRAY_SIZE(insns); - libbpf_strlcpy(attr.prog_name, "libbpf_nametest", sizeof(attr.prog_name)); - - /* make sure loading with name works */ - ret = sys_bpf_prog_load(&attr, attr_sz, PROG_LOAD_ATTEMPTS); - return probe_fd(ret); -} - -static int probe_kern_global_data(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), - BPF_ST_MEM(BPF_DW, BPF_REG_1, 0, 42), - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_global", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - insns[0].imm = map; - - ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - close(map); - return probe_fd(ret); -} - -static int probe_kern_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, 0), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_func_global(void) -{ - static const char strs[] = "\0int\0x\0a"; - /* static void x(int a) {} */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* FUNC_PROTO */ /* [2] */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_FUNC_PROTO, 0, 1), 0), - BTF_PARAM_ENC(7, 1), - /* FUNC x BTF_FUNC_GLOBAL */ /* [3] */ - BTF_TYPE_ENC(5, BTF_INFO_ENC(BTF_KIND_FUNC, 0, BTF_FUNC_GLOBAL), 2), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_datasec(void) -{ - static const char strs[] = "\0x\0.data"; - /* static int a; */ - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* DATASEC val */ /* [3] */ - BTF_TYPE_ENC(3, BTF_INFO_ENC(BTF_KIND_DATASEC, 0, 1), 4), - BTF_VAR_SECINFO_ENC(2, 0, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_float(void) -{ - static const char strs[] = "\0float"; - __u32 types[] = { - /* float */ - BTF_TYPE_FLOAT_ENC(1, 4), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_decl_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* VAR x */ /* [2] */ - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_VAR, 0, 0), 1), - BTF_VAR_STATIC, - /* attr */ - BTF_TYPE_DECL_TAG_ENC(1, 2, -1), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_btf_type_tag(void) -{ - static const char strs[] = "\0tag"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4), /* [1] */ - /* attr */ - BTF_TYPE_TYPE_TAG_ENC(1, 1), /* [2] */ - /* ptr */ - BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2), /* [3] */ - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_array_mmap(void) -{ - LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); - int fd; - - fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_mmap", sizeof(int), sizeof(int), 1, &opts); - return probe_fd(fd); -} - -static int probe_kern_exp_attach_type(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - /* use any valid combination of program type and (optional) - * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS) - * to see if kernel supports expected_attach_type field for - * BPF_PROG_LOAD command - */ - fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); - return probe_fd(fd); -} - -static int probe_kern_probe_read_kernel(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_REG(BPF_REG_1, BPF_REG_10), /* r1 = r10 (fp) */ - BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8), /* r1 += -8 */ - BPF_MOV64_IMM(BPF_REG_2, 8), /* r2 = 8 */ - BPF_MOV64_IMM(BPF_REG_3, 0), /* r3 = 0 */ - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel), - BPF_EXIT_INSN(), - }; - int fd, insn_cnt = ARRAY_SIZE(insns); - - fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(fd); -} - -static int probe_prog_bind_map(void) -{ - char *cp, errmsg[STRERR_BUFSIZE]; - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); - - map = bpf_map_create(BPF_MAP_TYPE_ARRAY, "libbpf_det_bind", sizeof(int), 32, 1, NULL); - if (map < 0) { - ret = -errno; - cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); - pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", - __func__, cp, -ret); - return ret; - } - - prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); - if (prog < 0) { - close(map); - return 0; - } - - ret = bpf_prog_bind_map(prog, map, NULL); - - close(map); - close(prog); - - return ret >= 0; -} - -static int probe_module_btf(void) -{ - static const char strs[] = "\0int"; - __u32 types[] = { - /* int */ - BTF_TYPE_INT_ENC(1, BTF_INT_SIGNED, 0, 32, 4), - }; - struct bpf_btf_info info; - __u32 len = sizeof(info); - char name[16]; - int fd, err; - - fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); - if (fd < 0) - return 0; /* BTF not supported at all */ - - memset(&info, 0, sizeof(info)); - info.name = ptr_to_u64(name); - info.name_len = sizeof(name); - - /* check that BPF_OBJ_GET_INFO_BY_FD supports specifying name pointer; - * kernel's module BTF support coincides with support for - * name/name_len fields in struct bpf_btf_info. - */ - err = bpf_btf_get_info_by_fd(fd, &info, &len); - close(fd); - return !err; -} - -static int probe_perf_link(void) -{ - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", - insns, ARRAY_SIZE(insns), NULL); - if (prog_fd < 0) - return -errno; - - /* use invalid perf_event FD to get EBADF, if link is supported; - * otherwise EINVAL should be returned - */ - link_fd = bpf_link_create(prog_fd, -1, BPF_PERF_EVENT, NULL); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_uprobe_multi_link(void) -{ - LIBBPF_OPTS(bpf_prog_load_opts, load_opts, - .expected_attach_type = BPF_TRACE_UPROBE_MULTI, - ); - LIBBPF_OPTS(bpf_link_create_opts, link_opts); - struct bpf_insn insns[] = { - BPF_MOV64_IMM(BPF_REG_0, 0), - BPF_EXIT_INSN(), - }; - int prog_fd, link_fd, err; - unsigned long offset = 0; - - prog_fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", - insns, ARRAY_SIZE(insns), &load_opts); - if (prog_fd < 0) - return -errno; - - /* Creating uprobe in '/' binary should fail with -EBADF. */ - link_opts.uprobe_multi.path = "/"; - link_opts.uprobe_multi.offsets = &offset; - link_opts.uprobe_multi.cnt = 1; - - link_fd = bpf_link_create(prog_fd, -1, BPF_TRACE_UPROBE_MULTI, &link_opts); - err = -errno; /* close() can clobber errno */ - - if (link_fd >= 0) - close(link_fd); - close(prog_fd); - - return link_fd < 0 && err == -EBADF; -} - -static int probe_kern_bpf_cookie(void) -{ - struct bpf_insn insns[] = { - BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_attach_cookie), - BPF_EXIT_INSN(), - }; - int ret, insn_cnt = ARRAY_SIZE(insns); - - ret = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); - return probe_fd(ret); -} - -static int probe_kern_btf_enum64(void) -{ - static const char strs[] = "\0enum64"; - __u32 types[] = { - BTF_TYPE_ENC(1, BTF_INFO_ENC(BTF_KIND_ENUM64, 0, 0), 8), - }; - - return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs))); -} - -static int probe_kern_syscall_wrapper(void); - -enum kern_feature_result { - FEAT_UNKNOWN = 0, - FEAT_SUPPORTED = 1, - FEAT_MISSING = 2, -}; - -typedef int (*feature_probe_fn)(void); - -static struct kern_feature_desc { - const char *desc; - feature_probe_fn probe; - enum kern_feature_result res; -} feature_probes[__FEAT_CNT] = { - [FEAT_PROG_NAME] = { - "BPF program name", probe_kern_prog_name, - }, - [FEAT_GLOBAL_DATA] = { - "global variables", probe_kern_global_data, - }, - [FEAT_BTF] = { - "minimal BTF", probe_kern_btf, - }, - [FEAT_BTF_FUNC] = { - "BTF functions", probe_kern_btf_func, - }, - [FEAT_BTF_GLOBAL_FUNC] = { - "BTF global function", probe_kern_btf_func_global, - }, - [FEAT_BTF_DATASEC] = { - "BTF data section and variable", probe_kern_btf_datasec, - }, - [FEAT_ARRAY_MMAP] = { - "ARRAY map mmap()", probe_kern_array_mmap, - }, - [FEAT_EXP_ATTACH_TYPE] = { - "BPF_PROG_LOAD expected_attach_type attribute", - probe_kern_exp_attach_type, - }, - [FEAT_PROBE_READ_KERN] = { - "bpf_probe_read_kernel() helper", probe_kern_probe_read_kernel, - }, - [FEAT_PROG_BIND_MAP] = { - "BPF_PROG_BIND_MAP support", probe_prog_bind_map, - }, - [FEAT_MODULE_BTF] = { - "module BTF support", probe_module_btf, - }, - [FEAT_BTF_FLOAT] = { - "BTF_KIND_FLOAT support", probe_kern_btf_float, - }, - [FEAT_PERF_LINK] = { - "BPF perf link support", probe_perf_link, - }, - [FEAT_BTF_DECL_TAG] = { - "BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag, - }, - [FEAT_BTF_TYPE_TAG] = { - "BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, - }, - [FEAT_MEMCG_ACCOUNT] = { - "memcg-based memory accounting", probe_memcg_account, - }, - [FEAT_BPF_COOKIE] = { - "BPF cookie support", probe_kern_bpf_cookie, - }, - [FEAT_BTF_ENUM64] = { - "BTF_KIND_ENUM64 support", probe_kern_btf_enum64, - }, - [FEAT_SYSCALL_WRAPPER] = { - "Kernel using syscall wrapper", probe_kern_syscall_wrapper, - }, - [FEAT_UPROBE_MULTI_LINK] = { - "BPF multi-uprobe link support", probe_uprobe_multi_link, - }, -}; - bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) { - struct kern_feature_desc *feat = &feature_probes[feat_id]; - int ret; - if (obj && obj->gen_loader) /* To generate loader program assume the latest kernel * to avoid doing extra prog_load, map_create syscalls. */ return true; - if (READ_ONCE(feat->res) == FEAT_UNKNOWN) { - ret = feat->probe(); - if (ret > 0) { - WRITE_ONCE(feat->res, FEAT_SUPPORTED); - } else if (ret == 0) { - WRITE_ONCE(feat->res, FEAT_MISSING); - } else { - pr_warn("Detection of kernel %s support failed: %d\n", feat->desc, ret); - WRITE_ONCE(feat->res, FEAT_MISSING); - } - } + if (obj->token_fd) + return feat_supported(obj->feat_cache, feat_id); - return READ_ONCE(feat->res) == FEAT_SUPPORTED; + return feat_supported(NULL, feat_id); } static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) @@ -5160,9 +4793,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b create_attr.map_flags = def->map_flags; create_attr.numa_node = map->numa_node; create_attr.map_extra = map->map_extra; + create_attr.token_fd = obj->token_fd; + if (obj->token_fd) + create_attr.map_flags |= BPF_F_TOKEN_FD; - if (bpf_map__is_struct_ops(map)) + if (bpf_map__is_struct_ops(map)) { create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; + if (map->mod_btf_fd >= 0) { + create_attr.value_type_btf_obj_fd = map->mod_btf_fd; + create_attr.map_flags |= BPF_F_VTYPE_BTF_OBJ_FD; + } + } if (obj->btf && btf__fd(obj->btf) >= 0) { create_attr.btf_fd = btf__fd(obj->btf); @@ -5172,6 +4813,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b if (bpf_map_type__is_map_in_map(def->type)) { if (map->inner_map) { + err = map_set_def_max_entries(map->inner_map); + if (err) + return err; err = bpf_object__create_map(obj, map->inner_map, true); if (err) { pr_warn("map '%s': failed to create inner map: %d\n", @@ -6864,7 +6508,7 @@ static int probe_kern_arg_ctx_tag(void) if (cached_result >= 0) return cached_result; - btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs)); + btf_fd = libbpf__load_raw_btf((char *)types, sizeof(types), strs, sizeof(strs), 0); if (btf_fd < 0) return 0; @@ -7473,6 +7117,10 @@ static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog load_attr.prog_flags = prog->prog_flags; load_attr.fd_array = obj->fd_array; + load_attr.token_fd = obj->token_fd; + if (obj->token_fd) + load_attr.prog_flags |= BPF_F_TOKEN_FD; + /* adjust load_attr if sec_def provides custom preload callback */ if (prog->sec_def && prog->sec_def->prog_prepare_load_fn) { err = prog->sec_def->prog_prepare_load_fn(prog, &load_attr, prog->sec_def->cookie); @@ -7918,7 +7566,7 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, const struct bpf_object_open_opts *opts) { - const char *obj_name, *kconfig, *btf_tmp_path; + const char *obj_name, *kconfig, *btf_tmp_path, *token_path; struct bpf_object *obj; char tmp_name[64]; int err; @@ -7955,6 +7603,16 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, if (log_size && !log_buf) return ERR_PTR(-EINVAL); + token_path = OPTS_GET(opts, bpf_token_path, NULL); + /* if user didn't specify bpf_token_path explicitly, check if + * LIBBPF_BPF_TOKEN_PATH envvar was set and treat it as bpf_token_path + * option + */ + if (!token_path) + token_path = getenv("LIBBPF_BPF_TOKEN_PATH"); + if (token_path && strlen(token_path) >= PATH_MAX) + return ERR_PTR(-ENAMETOOLONG); + obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name); if (IS_ERR(obj)) return obj; @@ -7963,6 +7621,14 @@ static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, obj->log_size = log_size; obj->log_level = log_level; + if (token_path) { + obj->token_path = strdup(token_path); + if (!obj->token_path) { + err = -ENOMEM; + goto out; + } + } + btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL); if (btf_tmp_path) { if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -8473,7 +8139,8 @@ static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const ch if (obj->gen_loader) bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps); - err = bpf_object__probe_loading(obj); + err = bpf_object_prepare_token(obj); + err = err ? : bpf_object__probe_loading(obj); err = err ? : bpf_object__load_vmlinux_btf(obj, false); err = err ? : bpf_object__resolve_externs(obj, obj->kconfig); err = err ? : bpf_object__sanitize_maps(obj); @@ -9008,6 +8675,11 @@ void bpf_object__close(struct bpf_object *obj) } zfree(&obj->programs); + zfree(&obj->feat_cache); + zfree(&obj->token_path); + if (obj->token_fd > 0) + close(obj->token_fd); + free(obj); } @@ -9966,7 +9638,9 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac *btf_obj_fd = 0; *btf_type_id = 1; } else { - err = find_kernel_btf_id(prog->obj, attach_name, attach_type, btf_obj_fd, btf_type_id); + err = find_kernel_btf_id(prog->obj, attach_name, + attach_type, btf_obj_fd, + btf_type_id); } if (err) { pr_warn("prog '%s': failed to find kernel BTF type ID of '%s': %d\n", @@ -11028,7 +10702,7 @@ static const char *arch_specific_syscall_pfx(void) #endif } -static int probe_kern_syscall_wrapper(void) +int probe_kern_syscall_wrapper(int token_fd) { char syscall_name[64]; const char *ksys_pfx; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 6cd9c501624f..5723cbbfcc41 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -177,10 +177,29 @@ struct bpf_object_open_opts { * logs through its print callback. */ __u32 kernel_log_level; + /* Path to BPF FS mount point to derive BPF token from. + * + * Created BPF token will be used for all bpf() syscall operations + * that accept BPF token (e.g., map creation, BTF and program loads, + * etc) automatically within instantiated BPF object. + * + * If bpf_token_path is not specified, libbpf will consult + * LIBBPF_BPF_TOKEN_PATH environment variable. If set, it will be + * taken as a value of bpf_token_path option and will force libbpf to + * either create BPF token from provided custom BPF FS path, or will + * disable implicit BPF token creation, if envvar value is an empty + * string. bpf_token_path overrides LIBBPF_BPF_TOKEN_PATH, if both are + * set at the same time. + * + * Setting bpf_token_path option to empty string disables libbpf's + * automatic attempt to create BPF token from default BPF FS mount + * point (/sys/fs/bpf), in case this default behavior is undesirable. + */ + const char *bpf_token_path; size_t :0; }; -#define bpf_object_open_opts__last_field kernel_log_level +#define bpf_object_open_opts__last_field bpf_token_path /** * @brief **bpf_object__open()** creates a bpf_object by opening diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 91c5aef7dae7..d9e1f57534fa 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -411,4 +411,5 @@ LIBBPF_1.3.0 { } LIBBPF_1.2.0; LIBBPF_1.4.0 { + bpf_token_create; } LIBBPF_1.3.0; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 27e4e320e1a6..930cc9616527 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -15,6 +15,7 @@ #include <linux/err.h> #include <fcntl.h> #include <unistd.h> +#include <sys/syscall.h> #include <libelf.h> #include "relo_core.h" @@ -360,15 +361,32 @@ enum kern_feature_id { __FEAT_CNT, }; -int probe_memcg_account(void); +enum kern_feature_result { + FEAT_UNKNOWN = 0, + FEAT_SUPPORTED = 1, + FEAT_MISSING = 2, +}; + +struct kern_feature_cache { + enum kern_feature_result res[__FEAT_CNT]; + int token_fd; +}; + +bool feat_supported(struct kern_feature_cache *cache, enum kern_feature_id feat_id); bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); + +int probe_kern_syscall_wrapper(int token_fd); +int probe_memcg_account(int token_fd); int bump_rlimit_memlock(void); int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz); int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz); int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len); -int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level); + const char *str_sec, size_t str_len, + int token_fd); +int btf_load_into_kernel(struct btf *btf, + char *log_buf, size_t log_sz, __u32 log_level, + int token_fd); struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf); void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, @@ -532,6 +550,17 @@ static inline bool is_ldimm64_insn(struct bpf_insn *insn) return insn->code == (BPF_LD | BPF_IMM | BPF_DW); } +/* Unconditionally dup FD, ensuring it doesn't use [0, 2] range. + * Original FD is not closed or altered in any other way. + * Preserves original FD value, if it's invalid (negative). + */ +static inline int dup_good_fd(int fd) +{ + if (fd < 0) + return fd; + return fcntl(fd, F_DUPFD_CLOEXEC, 3); +} + /* if fd is stdin, stdout, or stderr, dup to a fd greater than 2 * Takes ownership of the fd passed in, and closes it if calling * fcntl(fd, F_DUPFD_CLOEXEC, 3). @@ -543,7 +572,7 @@ static inline int ensure_good_fd(int fd) if (fd < 0) return fd; if (fd < 3) { - fd = fcntl(fd, F_DUPFD_CLOEXEC, 3); + fd = dup_good_fd(fd); saved_errno = errno; close(old_fd); errno = saved_errno; @@ -555,6 +584,15 @@ static inline int ensure_good_fd(int fd) return fd; } +static inline int sys_dup2(int oldfd, int newfd) +{ +#ifdef __NR_dup2 + return syscall(__NR_dup2, oldfd, newfd); +#else + return syscall(__NR_dup3, oldfd, newfd, 0); +#endif +} + /* Point *fixed_fd* to the same file that *tmp_fd* points to. * Regardless of success, *tmp_fd* is closed. * Whatever *fixed_fd* pointed to is closed silently. @@ -563,7 +601,7 @@ static inline int reuse_fd(int fixed_fd, int tmp_fd) { int err; - err = dup2(tmp_fd, fixed_fd); + err = sys_dup2(tmp_fd, fixed_fd); err = err < 0 ? -errno : 0; close(tmp_fd); /* clean up temporary FD */ return err; @@ -613,4 +651,6 @@ int elf_resolve_syms_offsets(const char *binary_path, int cnt, int elf_resolve_pattern_offsets(const char *binary_path, const char *pattern, unsigned long **poffsets, size_t *pcnt); +int probe_fd(int fd); + #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 9c4db90b92b6..ee9b1dbea9eb 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -219,7 +219,8 @@ int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) } int libbpf__load_raw_btf(const char *raw_types, size_t types_len, - const char *str_sec, size_t str_len) + const char *str_sec, size_t str_len, + int token_fd) { struct btf_header hdr = { .magic = BTF_MAGIC, @@ -229,6 +230,10 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, .str_off = types_len, .str_len = str_len, }; + LIBBPF_OPTS(bpf_btf_load_opts, opts, + .token_fd = token_fd, + .btf_flags = token_fd ? BPF_F_TOKEN_FD : 0, + ); int btf_fd, btf_len; __u8 *raw_btf; @@ -241,7 +246,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len, memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len); memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); - btf_fd = bpf_btf_load(raw_btf, btf_len, NULL); + btf_fd = bpf_btf_load(raw_btf, btf_len, &opts); free(raw_btf); return btf_fd; @@ -271,7 +276,7 @@ static int load_local_storage_btf(void) }; return libbpf__load_raw_btf((char *)types, sizeof(types), - strs, sizeof(strs)); + strs, sizeof(strs), 0); } static int probe_map_create(enum bpf_map_type map_type) @@ -326,6 +331,7 @@ static int probe_map_create(enum bpf_map_type map_type) case BPF_MAP_TYPE_STRUCT_OPS: /* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ opts.btf_vmlinux_value_type_id = 1; + opts.value_type_btf_obj_fd = -1; exp_err = -524; /* -ENOTSUPP */ break; case BPF_MAP_TYPE_BLOOM_FILTER: diff --git a/tools/lib/bpf/str_error.h b/tools/lib/bpf/str_error.h index a139334d57b6..626d7ffb03d6 100644 --- a/tools/lib/bpf/str_error.h +++ b/tools/lib/bpf/str_error.h @@ -2,5 +2,8 @@ #ifndef __LIBBPF_STR_ERROR_H #define __LIBBPF_STR_ERROR_H +#define STRERR_BUFSIZE 128 + char *libbpf_strerror_r(int err, char *dst, int len); + #endif /* __LIBBPF_STR_ERROR_H */ diff --git a/tools/net/ynl/Makefile.deps b/tools/net/ynl/Makefile.deps index 3110f84dd029..e6154a1255f8 100644 --- a/tools/net/ynl/Makefile.deps +++ b/tools/net/ynl/Makefile.deps @@ -15,7 +15,9 @@ UAPI_PATH:=../../../../include/uapi/ get_hdr_inc=-D$(1) -include $(UAPI_PATH)/linux/$(2) CFLAGS_devlink:=$(call get_hdr_inc,_LINUX_DEVLINK_H_,devlink.h) +CFLAGS_dpll:=$(call get_hdr_inc,_LINUX_DPLL_H_,dpll.h) CFLAGS_ethtool:=$(call get_hdr_inc,_LINUX_ETHTOOL_NETLINK_H_,ethtool_netlink.h) CFLAGS_handshake:=$(call get_hdr_inc,_LINUX_HANDSHAKE_H,handshake.h) +CFLAGS_mptcp_pm:=$(call get_hdr_inc,_LINUX_MPTCP_PM_H_,mptcp_pm.h) CFLAGS_netdev:=$(call get_hdr_inc,_LINUX_NETDEV_H,netdev.h) CFLAGS_nfsd:=$(call get_hdr_inc,_LINUX_NFSD_NETLINK_H,nfsd_netlink.h) diff --git a/tools/net/ynl/cli.py b/tools/net/ynl/cli.py index 2ad9ec0f5545..0f8239979670 100755 --- a/tools/net/ynl/cli.py +++ b/tools/net/ynl/cli.py @@ -9,6 +9,15 @@ import time from lib import YnlFamily, Netlink +class YnlEncoder(json.JSONEncoder): + def default(self, obj): + if isinstance(obj, bytes): + return bytes.hex(obj) + if isinstance(obj, set): + return list(obj) + return json.JSONEncoder.default(self, obj) + + def main(): parser = argparse.ArgumentParser(description='YNL CLI sample') parser.add_argument('--spec', dest='spec', type=str, required=True) @@ -28,8 +37,15 @@ def main(): parser.add_argument('--append', dest='flags', action='append_const', const=Netlink.NLM_F_APPEND) parser.add_argument('--process-unknown', action=argparse.BooleanOptionalAction) + parser.add_argument('--output-json', action='store_true') args = parser.parse_args() + def output(msg): + if args.output_json: + print(json.dumps(msg, cls=YnlEncoder)) + else: + pprint.PrettyPrinter().pprint(msg) + if args.no_schema: args.schema = '' @@ -47,14 +63,14 @@ def main(): if args.do: reply = ynl.do(args.do, attrs, args.flags) - pprint.PrettyPrinter().pprint(reply) + output(reply) if args.dump: reply = ynl.dump(args.dump, attrs) - pprint.PrettyPrinter().pprint(reply) + output(reply) if args.ntf: ynl.check_ntf() - pprint.PrettyPrinter().pprint(ynl.async_msg_queue) + output(ynl.async_msg_queue) if __name__ == "__main__": diff --git a/tools/net/ynl/generated/Makefile b/tools/net/ynl/generated/Makefile index 84cbabdd02a8..7135028cb449 100644 --- a/tools/net/ynl/generated/Makefile +++ b/tools/net/ynl/generated/Makefile @@ -14,7 +14,10 @@ YNL_GEN_ARG_ethtool:=--user-header linux/ethtool_netlink.h \ TOOL:=../ynl-gen-c.py -GENS:=ethtool devlink handshake fou netdev nfsd +GENS_PATHS=$(shell grep -nrI --files-without-match \ + 'protocol: netlink' \ + ../../../../Documentation/netlink/specs/) +GENS=$(patsubst ../../../../Documentation/netlink/specs/%.yaml,%,${GENS_PATHS}) SRCS=$(patsubst %,%-user.c,${GENS}) HDRS=$(patsubst %,%-user.h,${GENS}) OBJS=$(patsubst %,%-user.o,${GENS}) diff --git a/tools/net/ynl/lib/nlspec.py b/tools/net/ynl/lib/nlspec.py index 44f13e383e8a..fbce52395b3b 100644 --- a/tools/net/ynl/lib/nlspec.py +++ b/tools/net/ynl/lib/nlspec.py @@ -144,7 +144,7 @@ class SpecEnumSet(SpecElement): class SpecAttr(SpecElement): - """ Single Netlink atttribute type + """ Single Netlink attribute type Represents a single attribute type within an attr space. @@ -248,6 +248,7 @@ class SpecStructMember(SpecElement): len integer, optional byte length of binary types display_hint string, hint to help choose format specifier when displaying the value + struct string, name of nested struct type """ def __init__(self, family, yaml): super().__init__(family, yaml) @@ -256,6 +257,7 @@ class SpecStructMember(SpecElement): self.enum = yaml.get('enum') self.len = yaml.get('len') self.display_hint = yaml.get('display-hint') + self.struct = yaml.get('struct') class SpecStruct(SpecElement): @@ -306,10 +308,9 @@ class SpecSubMessage(SpecElement): class SpecSubMessageFormat(SpecElement): - """ Netlink sub-message definition + """ Netlink sub-message format definition - Represents a set of sub-message formats for polymorphic nlattrs - that contain type-specific sub messages. + Represents a single format for a sub-message. Attributes: value attribute value to match against type selector diff --git a/tools/net/ynl/lib/ynl.py b/tools/net/ynl/lib/ynl.py index 1e10512b2117..03c7ca6aaae9 100644 --- a/tools/net/ynl/lib/ynl.py +++ b/tools/net/ynl/lib/ynl.py @@ -113,20 +113,6 @@ class NlAttr: else format.little return format.native - @classmethod - def formatted_string(cls, raw, display_hint): - if display_hint == 'mac': - formatted = ':'.join('%02x' % b for b in raw) - elif display_hint == 'hex': - formatted = bytes.hex(raw, ' ') - elif display_hint in [ 'ipv4', 'ipv6' ]: - formatted = format(ipaddress.ip_address(raw)) - elif display_hint == 'uuid': - formatted = str(uuid.UUID(bytes=raw)) - else: - formatted = raw - return formatted - def as_scalar(self, attr_type, byte_order=None): format = self.get_format(attr_type, byte_order) return format.unpack(self.raw)[0] @@ -148,23 +134,6 @@ class NlAttr: format = self.get_format(type) return [ x[0] for x in format.iter_unpack(self.raw) ] - def as_struct(self, members): - value = dict() - offset = 0 - for m in members: - # TODO: handle non-scalar members - if m.type == 'binary': - decoded = self.raw[offset : offset + m['len']] - offset += m['len'] - elif m.type in NlAttr.type_formats: - format = self.get_format(m.type, m.byte_order) - [ decoded ] = format.unpack_from(self.raw, offset) - offset += format.size - if m.display_hint: - decoded = self.formatted_string(decoded, m.display_hint) - value[m.name] = decoded - return value - def __repr__(self): return f"[type:{self.type} len:{self._len}] {self.raw}" @@ -370,7 +339,7 @@ class NetlinkProtocol: fixed_header_size = 0 if ynl: op = ynl.rsp_by_value[msg.cmd()] - fixed_header_size = ynl._fixed_header_size(op.fixed_header) + fixed_header_size = ynl._struct_size(op.fixed_header) msg.raw_attrs = NlAttrs(msg.raw, fixed_header_size) return msg @@ -405,6 +374,26 @@ class GenlProtocol(NetlinkProtocol): return self.genl_family['mcast'][mcast_name] + +class SpaceAttrs: + SpecValuesPair = namedtuple('SpecValuesPair', ['spec', 'values']) + + def __init__(self, attr_space, attrs, outer = None): + outer_scopes = outer.scopes if outer else [] + inner_scope = self.SpecValuesPair(attr_space, attrs) + self.scopes = [inner_scope] + outer_scopes + + def lookup(self, name): + for scope in self.scopes: + if name in scope.spec: + if name in scope.values: + return scope.values[name] + spec_name = scope.spec.yaml['name'] + raise Exception( + f"No value for '{name}' in attribute space '{spec_name}'") + raise Exception(f"Attribute '{name}' not defined in any attribute-set") + + # # YNL implementation details. # @@ -449,17 +438,26 @@ class YnlFamily(SpecFamily): self.sock.setsockopt(Netlink.SOL_NETLINK, Netlink.NETLINK_ADD_MEMBERSHIP, mcast_id) - def _add_attr(self, space, name, value): + def _add_attr(self, space, name, value, search_attrs): try: attr = self.attr_sets[space][name] except KeyError: raise Exception(f"Space '{space}' has no attribute '{name}'") nl_type = attr.value + + if attr.is_multi and isinstance(value, list): + attr_payload = b'' + for subvalue in value: + attr_payload += self._add_attr(space, name, subvalue, search_attrs) + return attr_payload + if attr["type"] == 'nest': nl_type |= Netlink.NLA_F_NESTED attr_payload = b'' + sub_attrs = SpaceAttrs(self.attr_sets[space], value, search_attrs) for subname, subvalue in value.items(): - attr_payload += self._add_attr(attr['nested-attributes'], subname, subvalue) + attr_payload += self._add_attr(attr['nested-attributes'], + subname, subvalue, sub_attrs) elif attr["type"] == 'flag': attr_payload = b'' elif attr["type"] == 'string': @@ -469,6 +467,8 @@ class YnlFamily(SpecFamily): attr_payload = value elif isinstance(value, str): attr_payload = bytes.fromhex(value) + elif isinstance(value, dict) and attr.struct_name: + attr_payload = self._encode_struct(attr.struct_name, value) else: raise Exception(f'Unknown type for binary attribute, value: {value}') elif attr.is_auto_scalar: @@ -481,6 +481,20 @@ class YnlFamily(SpecFamily): attr_payload = format.pack(int(value)) elif attr['type'] in "bitfield32": attr_payload = struct.pack("II", int(value["value"]), int(value["selector"])) + elif attr['type'] == 'sub-message': + msg_format = self._resolve_selector(attr, search_attrs) + attr_payload = b'' + if msg_format.fixed_header: + attr_payload += self._encode_struct(msg_format.fixed_header, value) + if msg_format.attr_set: + if msg_format.attr_set in self.attr_sets: + nl_type |= Netlink.NLA_F_NESTED + sub_attrs = SpaceAttrs(msg_format.attr_set, value, search_attrs) + for subname, subvalue in value.items(): + attr_payload += self._add_attr(msg_format.attr_set, + subname, subvalue, sub_attrs) + else: + raise Exception(f"Unknown attribute-set '{msg_format.attr_set}'") else: raise Exception(f'Unknown type at {space} {name} {value} {attr["type"]}') @@ -503,17 +517,13 @@ class YnlFamily(SpecFamily): def _decode_binary(self, attr, attr_spec): if attr_spec.struct_name: - members = self.consts[attr_spec.struct_name] - decoded = attr.as_struct(members) - for m in members: - if m.enum: - decoded[m.name] = self._decode_enum(decoded[m.name], m) + decoded = self._decode_struct(attr.raw, attr_spec.struct_name) elif attr_spec.sub_type: decoded = attr.as_c_array(attr_spec.sub_type) else: decoded = attr.as_bin() if attr_spec.display_hint: - decoded = NlAttr.formatted_string(decoded, attr_spec.display_hint) + decoded = self._formatted_string(decoded, attr_spec.display_hint) return decoded def _decode_array_nest(self, attr, attr_spec): @@ -548,29 +558,27 @@ class YnlFamily(SpecFamily): else: rsp[name] = [decoded] - def _resolve_selector(self, attr_spec, vals): + def _resolve_selector(self, attr_spec, search_attrs): sub_msg = attr_spec.sub_message if sub_msg not in self.sub_msgs: raise Exception(f"No sub-message spec named {sub_msg} for {attr_spec.name}") sub_msg_spec = self.sub_msgs[sub_msg] selector = attr_spec.selector - if selector not in vals: - raise Exception(f"There is no value for {selector} to resolve '{attr_spec.name}'") - value = vals[selector] + value = search_attrs.lookup(selector) if value not in sub_msg_spec.formats: raise Exception(f"No message format for '{value}' in sub-message spec '{sub_msg}'") spec = sub_msg_spec.formats[value] return spec - def _decode_sub_msg(self, attr, attr_spec, rsp): - msg_format = self._resolve_selector(attr_spec, rsp) + def _decode_sub_msg(self, attr, attr_spec, search_attrs): + msg_format = self._resolve_selector(attr_spec, search_attrs) decoded = {} offset = 0 if msg_format.fixed_header: - decoded.update(self._decode_fixed_header(attr, msg_format.fixed_header)); - offset = self._fixed_header_size(msg_format.fixed_header) + decoded.update(self._decode_struct(attr.raw, msg_format.fixed_header)); + offset = self._struct_size(msg_format.fixed_header) if msg_format.attr_set: if msg_format.attr_set in self.attr_sets: subdict = self._decode(NlAttrs(attr.raw, offset), msg_format.attr_set) @@ -579,10 +587,12 @@ class YnlFamily(SpecFamily): raise Exception(f"Unknown attribute-set '{attr_space}' when decoding '{attr_spec.name}'") return decoded - def _decode(self, attrs, space): + def _decode(self, attrs, space, outer_attrs = None): if space: attr_space = self.attr_sets[space] rsp = dict() + search_attrs = SpaceAttrs(attr_space, rsp, outer_attrs) + for attr in attrs: try: attr_spec = attr_space.attrs_by_val[attr.type] @@ -594,7 +604,7 @@ class YnlFamily(SpecFamily): continue if attr_spec["type"] == 'nest': - subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes']) + subdict = self._decode(NlAttrs(attr.raw), attr_spec['nested-attributes'], search_attrs) decoded = subdict elif attr_spec["type"] == 'string': decoded = attr.as_strz() @@ -617,7 +627,7 @@ class YnlFamily(SpecFamily): selector = self._decode_enum(selector, attr_spec) decoded = {"value": value, "selector": selector} elif attr_spec["type"] == 'sub-message': - decoded = self._decode_sub_msg(attr, attr_spec, rsp) + decoded = self._decode_sub_msg(attr, attr_spec, search_attrs) else: if not self.process_unknown: raise Exception(f'Unknown {attr_spec["type"]} with name {attr_spec["name"]}') @@ -658,20 +668,23 @@ class YnlFamily(SpecFamily): return msg = self.nlproto.decode(self, NlMsg(request, 0, op.attr_set)) - offset = 20 + self._fixed_header_size(op.fixed_header) + offset = 20 + self._struct_size(op.fixed_header) path = self._decode_extack_path(msg.raw_attrs, op.attr_set, offset, extack['bad-attr-offs']) if path: del extack['bad-attr-offs'] extack['bad-attr'] = path - def _fixed_header_size(self, name): + def _struct_size(self, name): if name: - fixed_header_members = self.consts[name].members + members = self.consts[name].members size = 0 - for m in fixed_header_members: + for m in members: if m.type in ['pad', 'binary']: - size += m.len + if m.struct: + size += self._struct_size(m.struct) + else: + size += m.len else: format = NlAttr.get_format(m.type, m.byte_order) size += format.size @@ -679,26 +692,71 @@ class YnlFamily(SpecFamily): else: return 0 - def _decode_fixed_header(self, msg, name): - fixed_header_members = self.consts[name].members - fixed_header_attrs = dict() + def _decode_struct(self, data, name): + members = self.consts[name].members + attrs = dict() offset = 0 - for m in fixed_header_members: + for m in members: value = None if m.type == 'pad': offset += m.len elif m.type == 'binary': - value = msg.raw[offset : offset + m.len] - offset += m.len + if m.struct: + len = self._struct_size(m.struct) + value = self._decode_struct(data[offset : offset + len], + m.struct) + offset += len + else: + value = data[offset : offset + m.len] + offset += m.len else: format = NlAttr.get_format(m.type, m.byte_order) - [ value ] = format.unpack_from(msg.raw, offset) + [ value ] = format.unpack_from(data, offset) offset += format.size if value is not None: if m.enum: value = self._decode_enum(value, m) - fixed_header_attrs[m.name] = value - return fixed_header_attrs + elif m.display_hint: + value = self._formatted_string(value, m.display_hint) + attrs[m.name] = value + return attrs + + def _encode_struct(self, name, vals): + members = self.consts[name].members + attr_payload = b'' + for m in members: + value = vals.pop(m.name) if m.name in vals else None + if m.type == 'pad': + attr_payload += bytearray(m.len) + elif m.type == 'binary': + if m.struct: + if value is None: + value = dict() + attr_payload += self._encode_struct(m.struct, value) + else: + if value is None: + attr_payload += bytearray(m.len) + else: + attr_payload += bytes.fromhex(value) + else: + if value is None: + value = 0 + format = NlAttr.get_format(m.type, m.byte_order) + attr_payload += format.pack(value) + return attr_payload + + def _formatted_string(self, raw, display_hint): + if display_hint == 'mac': + formatted = ':'.join('%02x' % b for b in raw) + elif display_hint == 'hex': + formatted = bytes.hex(raw, ' ') + elif display_hint in [ 'ipv4', 'ipv6' ]: + formatted = format(ipaddress.ip_address(raw)) + elif display_hint == 'uuid': + formatted = str(uuid.UUID(bytes=raw)) + else: + formatted = raw + return formatted def handle_ntf(self, decoded): msg = dict() @@ -707,7 +765,7 @@ class YnlFamily(SpecFamily): op = self.rsp_by_value[decoded.cmd()] attrs = self._decode(decoded.raw_attrs, op.attr_set.name) if op.fixed_header: - attrs.update(self._decode_fixed_header(decoded, op.fixed_header)) + attrs.update(self._decode_struct(decoded.raw, op.fixed_header)) msg['name'] = op['name'] msg['msg'] = attrs @@ -759,20 +817,11 @@ class YnlFamily(SpecFamily): req_seq = random.randint(1024, 65535) msg = self.nlproto.message(nl_flags, op.req_value, 1, req_seq) - fixed_header_members = [] if op.fixed_header: - fixed_header_members = self.consts[op.fixed_header].members - for m in fixed_header_members: - value = vals.pop(m.name) if m.name in vals else 0 - if m.type == 'pad': - msg += bytearray(m.len) - elif m.type == 'binary': - msg += bytes.fromhex(value) - else: - format = NlAttr.get_format(m.type, m.byte_order) - msg += format.pack(value) + msg += self._encode_struct(op.fixed_header, vals) + search_attrs = SpaceAttrs(op.attr_set, vals) for name, value in vals.items(): - msg += self._add_attr(op.attr_set.name, name, value) + msg += self._add_attr(op.attr_set.name, name, value, search_attrs) msg = _genl_msg_finalize(msg) self.sock.send(msg, 0) @@ -808,7 +857,7 @@ class YnlFamily(SpecFamily): rsp_msg = self._decode(decoded.raw_attrs, op.attr_set.name) if op.fixed_header: - rsp_msg.update(self._decode_fixed_header(decoded, op.fixed_header)) + rsp_msg.update(self._decode_struct(decoded.raw, op.fixed_header)) rsp.append(rsp_msg) if not rsp: diff --git a/tools/net/ynl/samples/.gitignore b/tools/net/ynl/samples/.gitignore index 49637b26c482..dda6686257a7 100644 --- a/tools/net/ynl/samples/.gitignore +++ b/tools/net/ynl/samples/.gitignore @@ -1,4 +1,5 @@ ethtool devlink netdev +ovs page-pool
\ No newline at end of file diff --git a/tools/net/ynl/samples/ovs.c b/tools/net/ynl/samples/ovs.c new file mode 100644 index 000000000000..3e975c003d77 --- /dev/null +++ b/tools/net/ynl/samples/ovs.c @@ -0,0 +1,60 @@ +// SPDX-License-Identifier: GPL-2.0 +#include <stdio.h> +#include <string.h> + +#include <ynl.h> + +#include "ovs_datapath-user.h" + +int main(int argc, char **argv) +{ + struct ynl_sock *ys; + int err; + + ys = ynl_sock_create(&ynl_ovs_datapath_family, NULL); + if (!ys) + return 1; + + if (argc > 1) { + struct ovs_datapath_new_req *req; + + req = ovs_datapath_new_req_alloc(); + if (!req) + goto err_close; + + ovs_datapath_new_req_set_upcall_pid(req, 1); + ovs_datapath_new_req_set_name(req, argv[1]); + + err = ovs_datapath_new(ys, req); + ovs_datapath_new_req_free(req); + if (err) + goto err_close; + } else { + struct ovs_datapath_get_req_dump *req; + struct ovs_datapath_get_list *dps; + + printf("Dump:\n"); + req = ovs_datapath_get_req_dump_alloc(); + + dps = ovs_datapath_get_dump(ys, req); + ovs_datapath_get_req_dump_free(req); + if (!dps) + goto err_close; + + ynl_dump_foreach(dps, dp) { + printf(" %s(%d): pid:%u cache:%u\n", + dp->name, dp->_hdr.dp_ifindex, + dp->upcall_pid, dp->masks_cache_size); + } + ovs_datapath_get_list_free(dps); + } + + ynl_sock_destroy(ys); + + return 0; + +err_close: + fprintf(stderr, "YNL (%d): %s\n", ys->err.code, ys->err.msg); + ynl_sock_destroy(ys); + return 2; +} diff --git a/tools/net/ynl/ynl-gen-rst.py b/tools/net/ynl/ynl-gen-rst.py index 262d88f88696..927407b3efb3 100755 --- a/tools/net/ynl/ynl-gen-rst.py +++ b/tools/net/ynl/ynl-gen-rst.py @@ -189,12 +189,19 @@ def parse_operations(operations: List[Dict[str, Any]]) -> str: def parse_entries(entries: List[Dict[str, Any]], level: int) -> str: """Parse a list of entries""" + ignored = ["pad"] lines = [] for entry in entries: if isinstance(entry, dict): # entries could be a list or a dictionary + field_name = entry.get("name", "") + if field_name in ignored: + continue + type_ = entry.get("type") + if type_: + field_name += f" ({inline(type_)})" lines.append( - rst_fields(entry.get("name", ""), sanitize(entry.get("doc", "")), level) + rst_fields(field_name, sanitize(entry.get("doc", "")), level) ) elif isinstance(entry, list): lines.append(rst_list_inline(entry, level)) diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 15b6a111c3be..082db6b68060 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -191,6 +191,8 @@ run_tests: all @for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET run_tests \ + SRC_PATH=$(shell readlink -e $$(pwd)) \ + OBJ_PATH=$(BUILD) \ O=$(abs_objtree); \ done; @@ -241,7 +243,10 @@ ifdef INSTALL_PATH @ret=1; \ for TARGET in $(TARGETS); do \ BUILD_TARGET=$$BUILD/$$TARGET; \ - $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install \ + $(MAKE) OUTPUT=$$BUILD_TARGET -C $$TARGET install \ + INSTALL_PATH=$(INSTALL_PATH)/$$TARGET \ + SRC_PATH=$(shell readlink -e $$(pwd)) \ + OBJ_PATH=$(INSTALL_PATH) \ O=$(abs_objtree) \ $(if $(FORCE_TARGETS),|| exit); \ ret=$$((ret * $$?)); \ diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst index 9af79c7a9b58..9b974e425af3 100644 --- a/tools/testing/selftests/bpf/README.rst +++ b/tools/testing/selftests/bpf/README.rst @@ -115,7 +115,7 @@ the insn 20 undoes map_value addition. It is currently impossible for the verifier to understand such speculative pointer arithmetic. Hence `this patch`__ addresses it on the compiler side. It was committed on llvm 12. -__ https://reviews.llvm.org/D85570 +__ https://github.com/llvm/llvm-project/commit/ddf1864ace484035e3cde5e83b3a31ac81e059c6 The corresponding C code @@ -165,7 +165,7 @@ This is due to a llvm BPF backend bug. `The fix`__ has been pushed to llvm 10.x release branch and will be available in 10.0.1. The patch is available in llvm 11.0.0 trunk. -__ https://reviews.llvm.org/D78466 +__ https://github.com/llvm/llvm-project/commit/3cb7e7bf959dcd3b8080986c62e10a75c7af43f0 bpf_verif_scale/loop6.bpf.o test failure with Clang 12 ====================================================== @@ -204,7 +204,7 @@ r5(w5) is eventually saved on stack at insn #24 for later use. This cause later verifier failure. The bug has been `fixed`__ in Clang 13. -__ https://reviews.llvm.org/D97479 +__ https://github.com/llvm/llvm-project/commit/1959ead525b8830cc8a345f45e1c3ef9902d3229 BPF CO-RE-based tests and Clang version ======================================= @@ -221,11 +221,11 @@ failures: - __builtin_btf_type_id() [0_, 1_, 2_]; - __builtin_preserve_type_info(), __builtin_preserve_enum_value() [3_, 4_]. -.. _0: https://reviews.llvm.org/D74572 -.. _1: https://reviews.llvm.org/D74668 -.. _2: https://reviews.llvm.org/D85174 -.. _3: https://reviews.llvm.org/D83878 -.. _4: https://reviews.llvm.org/D83242 +.. _0: https://github.com/llvm/llvm-project/commit/6b01b465388b204d543da3cf49efd6080db094a9 +.. _1: https://github.com/llvm/llvm-project/commit/072cde03aaa13a2c57acf62d79876bf79aa1919f +.. _2: https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99 +.. _3: https://github.com/llvm/llvm-project/commit/6d218b4adb093ff2e9764febbbc89f429412006c +.. _4: https://github.com/llvm/llvm-project/commit/6d6750696400e7ce988d66a1a00e1d0cb32815f8 Floating-point tests and Clang version ====================================== @@ -234,7 +234,7 @@ Certain selftests, e.g. core_reloc, require support for the floating-point types, which was introduced in `Clang 13`__. The older Clang versions will either crash when compiling these tests, or generate an incorrect BTF. -__ https://reviews.llvm.org/D83289 +__ https://github.com/llvm/llvm-project/commit/a7137b238a07d9399d3ae96c0b461571bd5aa8b2 Kernel function call test and Clang version =========================================== @@ -248,7 +248,7 @@ Without it, the error from compiling bpf selftests looks like: libbpf: failed to find BTF for extern 'tcp_slow_start' [25] section: -2 -__ https://reviews.llvm.org/D93563 +__ https://github.com/llvm/llvm-project/commit/886f9ff53155075bd5f1e994f17b85d1e1b7470c btf_tag test and Clang version ============================== @@ -264,8 +264,8 @@ Without them, the btf_tag selftest will be skipped and you will observe: #<test_num> btf_tag:SKIP -.. _0: https://reviews.llvm.org/D111588 -.. _1: https://reviews.llvm.org/D111199 +.. _0: https://github.com/llvm/llvm-project/commit/a162b67c98066218d0d00aa13b99afb95d9bb5e6 +.. _1: https://github.com/llvm/llvm-project/commit/3466e00716e12e32fdb100e3fcfca5c2b3e8d784 Clang dependencies for static linking tests =========================================== @@ -274,7 +274,7 @@ linked_vars, linked_maps, and linked_funcs tests depend on `Clang fix`__ to generate valid BTF information for weak variables. Please make sure you use Clang that contains the fix. -__ https://reviews.llvm.org/D100362 +__ https://github.com/llvm/llvm-project/commit/968292cb93198442138128d850fd54dc7edc0035 Clang relocation changes ======================== @@ -292,7 +292,7 @@ Here, ``type 2`` refers to new relocation type ``R_BPF_64_ABS64``. To fix this issue, user newer libbpf. .. Links -.. _clang reloc patch: https://reviews.llvm.org/D102712 +.. _clang reloc patch: https://github.com/llvm/llvm-project/commit/6a2ea84600ba4bd3b2733bd8f08f5115eb32164b .. _kernel llvm reloc: /Documentation/bpf/llvm_reloc.rst Clang dependencies for the u32 spill test (xdpwall) @@ -304,6 +304,6 @@ from running test_progs will look like: .. code-block:: console - test_xdpwall:FAIL:Does LLVM have https://reviews.llvm.org/D109073? unexpected error: -4007 + test_xdpwall:FAIL:Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5? unexpected error: -4007 -__ https://reviews.llvm.org/D109073 +__ https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5 diff --git a/tools/testing/selftests/bpf/bpf_experimental.h b/tools/testing/selftests/bpf/bpf_experimental.h index f44875f8b367..0d749006d107 100644 --- a/tools/testing/selftests/bpf/bpf_experimental.h +++ b/tools/testing/selftests/bpf/bpf_experimental.h @@ -260,11 +260,11 @@ extern void bpf_throw(u64 cookie) __ksym; #define __is_signed_type(type) (((type)(-1)) < (type)1) -#define __bpf_cmp(LHS, OP, SIGN, PRED, RHS, DEFAULT) \ +#define __bpf_cmp(LHS, OP, PRED, RHS, DEFAULT) \ ({ \ __label__ l_true; \ bool ret = DEFAULT; \ - asm volatile goto("if %[lhs] " SIGN #OP " %[rhs] goto %l[l_true]" \ + asm volatile goto("if %[lhs] " OP " %[rhs] goto %l[l_true]" \ :: [lhs] "r"((short)LHS), [rhs] PRED (RHS) :: l_true); \ ret = !DEFAULT; \ l_true: \ @@ -276,7 +276,7 @@ l_true: \ * __lhs OP __rhs below will catch the mistake. * Be aware that we check only __lhs to figure out the sign of compare. */ -#define _bpf_cmp(LHS, OP, RHS, NOFLIP) \ +#define _bpf_cmp(LHS, OP, RHS, UNLIKELY) \ ({ \ typeof(LHS) __lhs = (LHS); \ typeof(RHS) __rhs = (RHS); \ @@ -285,14 +285,17 @@ l_true: \ (void)(__lhs OP __rhs); \ if (__cmp_cannot_be_signed(OP) || !__is_signed_type(typeof(__lhs))) { \ if (sizeof(__rhs) == 8) \ - ret = __bpf_cmp(__lhs, OP, "", "r", __rhs, NOFLIP); \ + /* "i" will truncate 64-bit constant into s32, \ + * so we have to use extra register via "r". \ + */ \ + ret = __bpf_cmp(__lhs, #OP, "r", __rhs, UNLIKELY); \ else \ - ret = __bpf_cmp(__lhs, OP, "", "i", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, #OP, "ri", __rhs, UNLIKELY); \ } else { \ if (sizeof(__rhs) == 8) \ - ret = __bpf_cmp(__lhs, OP, "s", "r", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, "s"#OP, "r", __rhs, UNLIKELY); \ else \ - ret = __bpf_cmp(__lhs, OP, "s", "i", __rhs, NOFLIP); \ + ret = __bpf_cmp(__lhs, "s"#OP, "ri", __rhs, UNLIKELY); \ } \ ret; \ }) @@ -304,7 +307,7 @@ l_true: \ #ifndef bpf_cmp_likely #define bpf_cmp_likely(LHS, OP, RHS) \ ({ \ - bool ret; \ + bool ret = 0; \ if (__builtin_strcmp(#OP, "==") == 0) \ ret = _bpf_cmp(LHS, !=, RHS, false); \ else if (__builtin_strcmp(#OP, "!=") == 0) \ @@ -318,7 +321,7 @@ l_true: \ else if (__builtin_strcmp(#OP, ">=") == 0) \ ret = _bpf_cmp(LHS, <, RHS, false); \ else \ - (void) "bug"; \ + asm volatile("r0 " #OP " invalid compare"); \ ret; \ }) #endif diff --git a/tools/testing/selftests/bpf/bpf_kfuncs.h b/tools/testing/selftests/bpf/bpf_kfuncs.h index b4e78c1eb37b..23c24f852f4f 100644 --- a/tools/testing/selftests/bpf/bpf_kfuncs.h +++ b/tools/testing/selftests/bpf/bpf_kfuncs.h @@ -51,6 +51,16 @@ extern int bpf_dynptr_clone(const struct bpf_dynptr *ptr, struct bpf_dynptr *clo extern int bpf_sock_addr_set_sun_path(struct bpf_sock_addr_kern *sa_kern, const __u8 *sun_path, __u32 sun_path__sz) __ksym; +/* Description + * Allocate and configure a reqsk and link it with a listener and skb. + * Returns + * Error code + */ +struct sock; +struct bpf_tcp_req_attrs; +extern int bpf_sk_assign_tcp_reqsk(struct __sk_buff *skb, struct sock *sk, + struct bpf_tcp_req_attrs *attrs, int attrs__sz) __ksym; + void *bpf_cast_to_kern_ctx(void *) __ksym; void *bpf_rdonly_cast(void *obj, __u32 btf_id) __ksym; diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c index 91907b321f91..8befaf17d454 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.c @@ -1,7 +1,9 @@ // SPDX-License-Identifier: GPL-2.0 /* Copyright (c) 2020 Facebook */ +#include <linux/bpf.h> #include <linux/btf.h> #include <linux/btf_ids.h> +#include <linux/delay.h> #include <linux/error-injection.h> #include <linux/init.h> #include <linux/module.h> @@ -520,11 +522,75 @@ BTF_ID_FLAGS(func, bpf_kfunc_call_test_static_unused_arg) BTF_ID_FLAGS(func, bpf_kfunc_call_test_offset) BTF_SET8_END(bpf_testmod_check_kfunc_ids) +static int bpf_testmod_ops_init(struct btf *btf) +{ + return 0; +} + +static bool bpf_testmod_ops_is_valid_access(int off, int size, + enum bpf_access_type type, + const struct bpf_prog *prog, + struct bpf_insn_access_aux *info) +{ + return bpf_tracing_btf_ctx_access(off, size, type, prog, info); +} + +static int bpf_testmod_ops_init_member(const struct btf_type *t, + const struct btf_member *member, + void *kdata, const void *udata) +{ + return 0; +} + static const struct btf_kfunc_id_set bpf_testmod_kfunc_set = { .owner = THIS_MODULE, .set = &bpf_testmod_check_kfunc_ids, }; +static const struct bpf_verifier_ops bpf_testmod_verifier_ops = { + .is_valid_access = bpf_testmod_ops_is_valid_access, +}; + +static int bpf_dummy_reg(void *kdata) +{ + struct bpf_testmod_ops *ops = kdata; + int r; + + r = ops->test_2(4, 3); + + return 0; +} + +static void bpf_dummy_unreg(void *kdata) +{ +} + +static int bpf_testmod_test_1(void) +{ + return 0; +} + +static int bpf_testmod_test_2(int a, int b) +{ + return 0; +} + +static struct bpf_testmod_ops __bpf_testmod_ops = { + .test_1 = bpf_testmod_test_1, + .test_2 = bpf_testmod_test_2, +}; + +struct bpf_struct_ops bpf_bpf_testmod_ops = { + .verifier_ops = &bpf_testmod_verifier_ops, + .init = bpf_testmod_ops_init, + .init_member = bpf_testmod_ops_init_member, + .reg = bpf_dummy_reg, + .unreg = bpf_dummy_unreg, + .cfi_stubs = &__bpf_testmod_ops, + .name = "bpf_testmod_ops", + .owner = THIS_MODULE, +}; + extern int bpf_fentry_test1(int a); static int bpf_testmod_init(void) @@ -535,6 +601,7 @@ static int bpf_testmod_init(void) ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SCHED_CLS, &bpf_testmod_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_TRACING, &bpf_testmod_kfunc_set); ret = ret ?: register_btf_kfunc_id_set(BPF_PROG_TYPE_SYSCALL, &bpf_testmod_kfunc_set); + ret = ret ?: register_bpf_struct_ops(&bpf_bpf_testmod_ops, bpf_testmod_ops); if (ret < 0) return ret; if (bpf_fentry_test1(0) < 0) @@ -544,6 +611,14 @@ static int bpf_testmod_init(void) static void bpf_testmod_exit(void) { + /* Need to wait for all references to be dropped because + * bpf_kfunc_call_test_release() which currently resides in kernel can + * be called after bpf_testmod is unloaded. Once release function is + * moved into the module this wait can be removed. + */ + while (refcount_read(&prog_test_struct.cnt) > 1) + msleep(20); + return sysfs_remove_bin_file(kernel_kobj, &bin_attr_bpf_testmod_file); } diff --git a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h index f32793efe095..ca5435751c79 100644 --- a/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h +++ b/tools/testing/selftests/bpf/bpf_testmod/bpf_testmod.h @@ -28,4 +28,9 @@ struct bpf_iter_testmod_seq { int cnt; }; +struct bpf_testmod_ops { + int (*test_1)(void); + int (*test_2)(int a, int b); +}; + #endif /* _BPF_TESTMOD_H */ diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config index c125c441abc7..01f241ea2c67 100644 --- a/tools/testing/selftests/bpf/config +++ b/tools/testing/selftests/bpf/config @@ -81,6 +81,7 @@ CONFIG_NF_NAT=y CONFIG_RC_CORE=y CONFIG_SECURITY=y CONFIG_SECURITYFS=y +CONFIG_SYN_COOKIES=y CONFIG_TEST_BPF=m CONFIG_USERFAULTFD=y CONFIG_VSOCKETS=y diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c index e770912fc1d2..4c6ada5b270b 100644 --- a/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c +++ b/tools/testing/selftests/bpf/prog_tests/bpf_verif_scale.c @@ -35,7 +35,7 @@ static int check_load(const char *file, enum bpf_prog_type type) } bpf_program__set_type(prog, type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); + bpf_program__set_flags(prog, testing_prog_flags()); bpf_program__set_log_level(prog, 4 | extra_prog_load_log_flags); err = bpf_object__load(obj); diff --git a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c index 4951aa978f33..3b7c57fe55a5 100644 --- a/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c +++ b/tools/testing/selftests/bpf/prog_tests/ctx_rewrite.c @@ -626,50 +626,6 @@ err: return false; } -/* Request BPF program instructions after all rewrites are applied, - * e.g. verifier.c:convert_ctx_access() is done. - */ -static int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) -{ - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 xlated_prog_len; - __u32 buf_element_size = sizeof(struct bpf_insn); - - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("bpf_prog_get_info_by_fd failed"); - return -1; - } - - xlated_prog_len = info.xlated_prog_len; - if (xlated_prog_len % buf_element_size) { - printf("Program length %d is not multiple of %d\n", - xlated_prog_len, buf_element_size); - return -1; - } - - *cnt = xlated_prog_len / buf_element_size; - *buf = calloc(*cnt, buf_element_size); - if (!buf) { - perror("can't allocate xlated program buffer"); - return -ENOMEM; - } - - bzero(&info, sizeof(info)); - info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)*buf; - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("second bpf_prog_get_info_by_fd failed"); - goto out_free_buf; - } - - return 0; - -out_free_buf: - free(*buf); - return -1; -} - static void print_insn(void *private_data, const char *fmt, ...) { va_list args; diff --git a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c index d4b1901f7879..f3932941bbaa 100644 --- a/tools/testing/selftests/bpf/prog_tests/fill_link_info.c +++ b/tools/testing/selftests/bpf/prog_tests/fill_link_info.c @@ -19,6 +19,7 @@ static const char *kmulti_syms[] = { }; #define KMULTI_CNT ARRAY_SIZE(kmulti_syms) static __u64 kmulti_addrs[KMULTI_CNT]; +static __u64 kmulti_cookies[] = { 3, 1, 2 }; #define KPROBE_FUNC "bpf_fentry_test1" static __u64 kprobe_addr; @@ -31,6 +32,8 @@ static noinline void uprobe_func(void) asm volatile (""); } +#define PERF_EVENT_COOKIE 0xdeadbeef + static int verify_perf_link_info(int fd, enum bpf_perf_event_type type, long addr, ssize_t offset, ssize_t entry_offset) { @@ -62,6 +65,8 @@ again: ASSERT_EQ(info.perf_event.kprobe.addr, addr + entry_offset, "kprobe_addr"); + ASSERT_EQ(info.perf_event.kprobe.cookie, PERF_EVENT_COOKIE, "kprobe_cookie"); + if (!info.perf_event.kprobe.func_name) { ASSERT_EQ(info.perf_event.kprobe.name_len, 0, "name_len"); info.perf_event.kprobe.func_name = ptr_to_u64(&buf); @@ -81,6 +86,8 @@ again: goto again; } + ASSERT_EQ(info.perf_event.tracepoint.cookie, PERF_EVENT_COOKIE, "tracepoint_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.tracepoint.tp_name), TP_NAME, strlen(TP_NAME)); ASSERT_EQ(err, 0, "cmp_tp_name"); @@ -96,10 +103,17 @@ again: goto again; } + ASSERT_EQ(info.perf_event.uprobe.cookie, PERF_EVENT_COOKIE, "uprobe_cookie"); + err = strncmp(u64_to_ptr(info.perf_event.uprobe.file_name), UPROBE_FILE, strlen(UPROBE_FILE)); ASSERT_EQ(err, 0, "cmp_file_name"); break; + case BPF_PERF_EVENT_EVENT: + ASSERT_EQ(info.perf_event.event.type, PERF_TYPE_SOFTWARE, "event_type"); + ASSERT_EQ(info.perf_event.event.config, PERF_COUNT_SW_PAGE_FAULTS, "event_config"); + ASSERT_EQ(info.perf_event.event.cookie, PERF_EVENT_COOKIE, "event_cookie"); + break; default: err = -1; break; @@ -139,6 +153,7 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, DECLARE_LIBBPF_OPTS(bpf_kprobe_opts, opts, .attach_mode = PROBE_ATTACH_MODE_LINK, .retprobe = type == BPF_PERF_EVENT_KRETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, ); ssize_t entry_offset = 0; struct bpf_link *link; @@ -163,10 +178,13 @@ static void test_kprobe_fill_link_info(struct test_fill_link_info *skel, static void test_tp_fill_link_info(struct test_fill_link_info *skel) { + DECLARE_LIBBPF_OPTS(bpf_tracepoint_opts, opts, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_tracepoint(skel->progs.tp_run, TP_CAT, TP_NAME); + link = bpf_program__attach_tracepoint_opts(skel->progs.tp_run, TP_CAT, TP_NAME, &opts); if (!ASSERT_OK_PTR(link, "attach_tp")) return; @@ -176,16 +194,53 @@ static void test_tp_fill_link_info(struct test_fill_link_info *skel) bpf_link__destroy(link); } +static void test_event_fill_link_info(struct test_fill_link_info *skel) +{ + DECLARE_LIBBPF_OPTS(bpf_perf_event_opts, opts, + .bpf_cookie = PERF_EVENT_COOKIE, + ); + struct bpf_link *link; + int link_fd, err, pfd; + struct perf_event_attr attr = { + .type = PERF_TYPE_SOFTWARE, + .config = PERF_COUNT_SW_PAGE_FAULTS, + .freq = 1, + .sample_freq = 1, + .size = sizeof(struct perf_event_attr), + }; + + pfd = syscall(__NR_perf_event_open, &attr, -1 /* pid */, 0 /* cpu 0 */, + -1 /* group id */, 0 /* flags */); + if (!ASSERT_GE(pfd, 0, "perf_event_open")) + return; + + link = bpf_program__attach_perf_event_opts(skel->progs.event_run, pfd, &opts); + if (!ASSERT_OK_PTR(link, "attach_event")) + goto error; + + link_fd = bpf_link__fd(link); + err = verify_perf_link_info(link_fd, BPF_PERF_EVENT_EVENT, 0, 0, 0); + ASSERT_OK(err, "verify_perf_link_info"); + bpf_link__destroy(link); + +error: + close(pfd); +} + static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, enum bpf_perf_event_type type) { + DECLARE_LIBBPF_OPTS(bpf_uprobe_opts, opts, + .retprobe = type == BPF_PERF_EVENT_URETPROBE, + .bpf_cookie = PERF_EVENT_COOKIE, + ); struct bpf_link *link; int link_fd, err; - link = bpf_program__attach_uprobe(skel->progs.uprobe_run, - type == BPF_PERF_EVENT_URETPROBE, - 0, /* self pid */ - UPROBE_FILE, uprobe_offset); + link = bpf_program__attach_uprobe_opts(skel->progs.uprobe_run, + 0, /* self pid */ + UPROBE_FILE, uprobe_offset, + &opts); if (!ASSERT_OK_PTR(link, "attach_uprobe")) return; @@ -195,11 +250,11 @@ static void test_uprobe_fill_link_info(struct test_fill_link_info *skel, bpf_link__destroy(link); } -static int verify_kmulti_link_info(int fd, bool retprobe) +static int verify_kmulti_link_info(int fd, bool retprobe, bool has_cookies) { + __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT]; struct bpf_link_info info; __u32 len = sizeof(info); - __u64 addrs[KMULTI_CNT]; int flags, i, err; memset(&info, 0, sizeof(info)); @@ -221,18 +276,22 @@ again: if (!info.kprobe_multi.addrs) { info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.cookies = ptr_to_u64(cookies); goto again; } - for (i = 0; i < KMULTI_CNT; i++) + for (i = 0; i < KMULTI_CNT; i++) { ASSERT_EQ(addrs[i], kmulti_addrs[i], "kmulti_addrs"); + ASSERT_EQ(cookies[i], has_cookies ? kmulti_cookies[i] : 0, + "kmulti_cookies_value"); + } return 0; } static void verify_kmulti_invalid_user_buffer(int fd) { + __u64 addrs[KMULTI_CNT], cookies[KMULTI_CNT]; struct bpf_link_info info; __u32 len = sizeof(info); - __u64 addrs[KMULTI_CNT]; int err, i; memset(&info, 0, sizeof(info)); @@ -266,7 +325,20 @@ static void verify_kmulti_invalid_user_buffer(int fd) info.kprobe_multi.count = KMULTI_CNT; info.kprobe_multi.addrs = 0x1; /* invalid addr */ err = bpf_link_get_info_by_fd(fd, &info, &len); - ASSERT_EQ(err, -EFAULT, "invalid_buff"); + ASSERT_EQ(err, -EFAULT, "invalid_buff_addrs"); + + info.kprobe_multi.count = KMULTI_CNT; + info.kprobe_multi.addrs = ptr_to_u64(addrs); + info.kprobe_multi.cookies = 0x1; /* invalid addr */ + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EFAULT, "invalid_buff_cookies"); + + /* cookies && !count */ + info.kprobe_multi.count = 0; + info.kprobe_multi.addrs = ptr_to_u64(NULL); + info.kprobe_multi.cookies = ptr_to_u64(cookies); + err = bpf_link_get_info_by_fd(fd, &info, &len); + ASSERT_EQ(err, -EINVAL, "invalid_cookies_count"); } static int symbols_cmp_r(const void *a, const void *b) @@ -278,13 +350,15 @@ static int symbols_cmp_r(const void *a, const void *b) } static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, - bool retprobe, bool invalid) + bool retprobe, bool cookies, + bool invalid) { LIBBPF_OPTS(bpf_kprobe_multi_opts, opts); struct bpf_link *link; int link_fd, err; opts.syms = kmulti_syms; + opts.cookies = cookies ? kmulti_cookies : NULL; opts.cnt = KMULTI_CNT; opts.retprobe = retprobe; link = bpf_program__attach_kprobe_multi_opts(skel->progs.kmulti_run, NULL, &opts); @@ -293,7 +367,7 @@ static void test_kprobe_multi_fill_link_info(struct test_fill_link_info *skel, link_fd = bpf_link__fd(link); if (!invalid) { - err = verify_kmulti_link_info(link_fd, retprobe); + err = verify_kmulti_link_info(link_fd, retprobe, cookies); ASSERT_OK(err, "verify_kmulti_link_info"); } else { verify_kmulti_invalid_user_buffer(link_fd); @@ -513,6 +587,8 @@ void test_fill_link_info(void) test_kprobe_fill_link_info(skel, BPF_PERF_EVENT_KPROBE, true); if (test__start_subtest("tracepoint_link_info")) test_tp_fill_link_info(skel); + if (test__start_subtest("event_link_info")) + test_event_fill_link_info(skel); uprobe_offset = get_uprobe_offset(&uprobe_func); if (test__start_subtest("uprobe_link_info")) @@ -523,12 +599,16 @@ void test_fill_link_info(void) qsort(kmulti_syms, KMULTI_CNT, sizeof(kmulti_syms[0]), symbols_cmp_r); for (i = 0; i < KMULTI_CNT; i++) kmulti_addrs[i] = ksym_get_addr(kmulti_syms[i]); - if (test__start_subtest("kprobe_multi_link_info")) - test_kprobe_multi_fill_link_info(skel, false, false); - if (test__start_subtest("kretprobe_multi_link_info")) - test_kprobe_multi_fill_link_info(skel, true, false); + if (test__start_subtest("kprobe_multi_link_info")) { + test_kprobe_multi_fill_link_info(skel, false, false, false); + test_kprobe_multi_fill_link_info(skel, false, true, false); + } + if (test__start_subtest("kretprobe_multi_link_info")) { + test_kprobe_multi_fill_link_info(skel, true, false, false); + test_kprobe_multi_fill_link_info(skel, true, true, false); + } if (test__start_subtest("kprobe_multi_invalid_ubuff")) - test_kprobe_multi_fill_link_info(skel, true, true); + test_kprobe_multi_fill_link_info(skel, true, true, true); if (test__start_subtest("uprobe_multi_link_info")) test_uprobe_multi_fill_link_info(skel, false, false); diff --git a/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c new file mode 100644 index 000000000000..15144943e88b --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/kptr_xchg_inline.c @@ -0,0 +1,51 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <test_progs.h> + +#include "linux/filter.h" +#include "kptr_xchg_inline.skel.h" + +void test_kptr_xchg_inline(void) +{ + struct kptr_xchg_inline *skel; + struct bpf_insn *insn = NULL; + struct bpf_insn exp; + unsigned int cnt; + int err; + +#if !(defined(__x86_64__) || defined(__aarch64__)) + test__skip(); + return; +#endif + + skel = kptr_xchg_inline__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_load")) + return; + + err = get_xlated_program(bpf_program__fd(skel->progs.kptr_xchg_inline), &insn, &cnt); + if (!ASSERT_OK(err, "prog insn")) + goto out; + + /* The original instructions are: + * r1 = map[id:xxx][0]+0 + * r2 = 0 + * call bpf_kptr_xchg#yyy + * + * call bpf_kptr_xchg#yyy will be inlined as: + * r0 = r2 + * r0 = atomic64_xchg((u64 *)(r1 +0), r0) + */ + if (!ASSERT_GT(cnt, 5, "insn cnt")) + goto out; + + exp = BPF_MOV64_REG(BPF_REG_0, BPF_REG_2); + if (!ASSERT_OK(memcmp(&insn[3], &exp, sizeof(exp)), "mov")) + goto out; + + exp = BPF_ATOMIC_OP(BPF_DW, BPF_XCHG, BPF_REG_1, BPF_REG_0, 0); + if (!ASSERT_OK(memcmp(&insn[4], &exp, sizeof(exp)), "xchg")) + goto out; +out: + free(insn); + kptr_xchg_inline__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c index 9f766ddd946a..4ed46ed58a7b 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_probes.c @@ -30,6 +30,8 @@ void test_libbpf_probe_prog_types(void) if (prog_type == BPF_PROG_TYPE_UNSPEC) continue; + if (strcmp(prog_type_name, "__MAX_BPF_PROG_TYPE") == 0) + continue; if (!test__start_subtest(prog_type_name)) continue; @@ -68,6 +70,8 @@ void test_libbpf_probe_map_types(void) if (map_type == BPF_MAP_TYPE_UNSPEC) continue; + if (strcmp(map_type_name, "__MAX_BPF_MAP_TYPE") == 0) + continue; if (!test__start_subtest(map_type_name)) continue; diff --git a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c index eb34d612d6f8..62ea855ec4d0 100644 --- a/tools/testing/selftests/bpf/prog_tests/libbpf_str.c +++ b/tools/testing/selftests/bpf/prog_tests/libbpf_str.c @@ -132,6 +132,9 @@ static void test_libbpf_bpf_map_type_str(void) const char *map_type_str; char buf[256]; + if (map_type == __MAX_BPF_MAP_TYPE) + continue; + map_type_name = btf__str_by_offset(btf, e->name_off); map_type_str = libbpf_bpf_map_type_str(map_type); ASSERT_OK_PTR(map_type_str, map_type_name); @@ -186,6 +189,9 @@ static void test_libbpf_bpf_prog_type_str(void) const char *prog_type_str; char buf[256]; + if (prog_type == __MAX_BPF_PROG_TYPE) + continue; + prog_type_name = btf__str_by_offset(btf, e->name_off); prog_type_str = libbpf_bpf_prog_type_str(prog_type); ASSERT_OK_PTR(prog_type_str, prog_type_name); diff --git a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c index 820d0bcfc474..eb74363f9f70 100644 --- a/tools/testing/selftests/bpf/prog_tests/reg_bounds.c +++ b/tools/testing/selftests/bpf/prog_tests/reg_bounds.c @@ -840,7 +840,7 @@ static int load_range_cmp_prog(struct range x, struct range y, enum op op, .log_level = 2, .log_buf = log_buf, .log_size = log_sz, - .prog_flags = BPF_F_TEST_REG_INVARIANTS, + .prog_flags = testing_prog_flags(), ); /* ; skip exit block below diff --git a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c index 518f143c5b0f..dbe06aeaa2b2 100644 --- a/tools/testing/selftests/bpf/prog_tests/tc_redirect.c +++ b/tools/testing/selftests/bpf/prog_tests/tc_redirect.c @@ -188,6 +188,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) { struct nstoken *nstoken = NULL; char src_fwd_addr[IFADDR_STR_LEN+1] = {}; + char src_addr[IFADDR_STR_LEN + 1] = {}; int err; if (result->dev_mode == MODE_VETH) { @@ -208,6 +209,9 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) if (get_ifaddr("src_fwd", src_fwd_addr)) goto fail; + if (get_ifaddr("src", src_addr)) + goto fail; + result->ifindex_src = if_nametoindex("src"); if (!ASSERT_GT(result->ifindex_src, 0, "ifindex_src")) goto fail; @@ -270,6 +274,13 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS(fail, "ip route add " IP4_DST "/32 dev dst_fwd scope global"); SYS(fail, "ip route add " IP6_DST "/128 dev dst_fwd scope global"); + if (result->dev_mode == MODE_VETH) { + SYS(fail, "ip neigh add " IP4_SRC " dev src_fwd lladdr %s", src_addr); + SYS(fail, "ip neigh add " IP6_SRC " dev src_fwd lladdr %s", src_addr); + SYS(fail, "ip neigh add " IP4_DST " dev dst_fwd lladdr %s", MAC_DST); + SYS(fail, "ip neigh add " IP6_DST " dev dst_fwd lladdr %s", MAC_DST); + } + close_netns(nstoken); /** setup in 'dst' namespace */ @@ -280,6 +291,7 @@ static int netns_setup_links_and_routes(struct netns_setup_result *result) SYS(fail, "ip addr add " IP4_DST "/32 dev dst"); SYS(fail, "ip addr add " IP6_DST "/128 dev dst nodad"); SYS(fail, "ip link set dev dst up"); + SYS(fail, "ip link set dev lo up"); SYS(fail, "ip route add " IP4_SRC "/32 dev dst scope global"); SYS(fail, "ip route add " IP4_NET "/16 dev dst scope global"); @@ -457,7 +469,7 @@ static int set_forwarding(bool enable) return 0; } -static void rcv_tstamp(int fd, const char *expected, size_t s) +static int __rcv_tstamp(int fd, const char *expected, size_t s, __u64 *tstamp) { struct __kernel_timespec pkt_ts = {}; char ctl[CMSG_SPACE(sizeof(pkt_ts))]; @@ -478,7 +490,7 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) ret = recvmsg(fd, &msg, 0); if (!ASSERT_EQ(ret, s, "recvmsg")) - return; + return -1; ASSERT_STRNEQ(data, expected, s, "expected rcv data"); cmsg = CMSG_FIRSTHDR(&msg); @@ -487,6 +499,12 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) memcpy(&pkt_ts, CMSG_DATA(cmsg), sizeof(pkt_ts)); pkt_ns = pkt_ts.tv_sec * NSEC_PER_SEC + pkt_ts.tv_nsec; + if (tstamp) { + /* caller will check the tstamp itself */ + *tstamp = pkt_ns; + return 0; + } + ASSERT_NEQ(pkt_ns, 0, "pkt rcv tstamp"); ret = clock_gettime(CLOCK_REALTIME, &now_ts); @@ -496,6 +514,60 @@ static void rcv_tstamp(int fd, const char *expected, size_t s) if (ASSERT_GE(now_ns, pkt_ns, "check rcv tstamp")) ASSERT_LT(now_ns - pkt_ns, 5 * NSEC_PER_SEC, "check rcv tstamp"); + return 0; +} + +static void rcv_tstamp(int fd, const char *expected, size_t s) +{ + __rcv_tstamp(fd, expected, s, NULL); +} + +static int wait_netstamp_needed_key(void) +{ + int opt = 1, srv_fd = -1, cli_fd = -1, nretries = 0, err, n; + char buf[] = "testing testing"; + struct nstoken *nstoken; + __u64 tstamp = 0; + + nstoken = open_netns(NS_DST); + if (!nstoken) + return -1; + + srv_fd = start_server(AF_INET6, SOCK_DGRAM, "::1", 0, 0); + if (!ASSERT_GE(srv_fd, 0, "start_server")) + goto done; + + err = setsockopt(srv_fd, SOL_SOCKET, SO_TIMESTAMPNS_NEW, + &opt, sizeof(opt)); + if (!ASSERT_OK(err, "setsockopt(SO_TIMESTAMPNS_NEW)")) + goto done; + + cli_fd = connect_to_fd(srv_fd, TIMEOUT_MILLIS); + if (!ASSERT_GE(cli_fd, 0, "connect_to_fd")) + goto done; + +again: + n = write(cli_fd, buf, sizeof(buf)); + if (!ASSERT_EQ(n, sizeof(buf), "send to server")) + goto done; + err = __rcv_tstamp(srv_fd, buf, sizeof(buf), &tstamp); + if (!ASSERT_OK(err, "__rcv_tstamp")) + goto done; + if (!tstamp && nretries++ < 5) { + sleep(1); + printf("netstamp_needed_key retry#%d\n", nretries); + goto again; + } + +done: + if (!tstamp && srv_fd != -1) { + close(srv_fd); + srv_fd = -1; + } + if (cli_fd != -1) + close(cli_fd); + close_netns(nstoken); + return srv_fd; } static void snd_tstamp(int fd, char *b, size_t s) @@ -832,11 +904,20 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) { struct test_tc_dtime *skel; struct nstoken *nstoken; - int err; + int hold_tstamp_fd, err; + + /* Hold a sk with the SOCK_TIMESTAMP set to ensure there + * is no delay in the kernel net_enable_timestamp(). + * This ensures the following tests must have + * non zero rcv tstamp in the recvmsg(). + */ + hold_tstamp_fd = wait_netstamp_needed_key(); + if (!ASSERT_GE(hold_tstamp_fd, 0, "wait_netstamp_needed_key")) + return; skel = test_tc_dtime__open(); if (!ASSERT_OK_PTR(skel, "test_tc_dtime__open")) - return; + goto done; skel->rodata->IFINDEX_SRC = setup_result->ifindex_src_fwd; skel->rodata->IFINDEX_DST = setup_result->ifindex_dst_fwd; @@ -881,6 +962,7 @@ static void test_tc_redirect_dtime(struct netns_setup_result *setup_result) done: test_tc_dtime__destroy(skel); + close(hold_tstamp_fd); } static void test_tc_redirect_neigh_fib(struct netns_setup_result *setup_result) diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c new file mode 100644 index 000000000000..eaf441dc7e79 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/tcp_custom_syncookie.c @@ -0,0 +1,150 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#define _GNU_SOURCE +#include <sched.h> +#include <stdlib.h> +#include <net/if.h> + +#include "test_progs.h" +#include "cgroup_helpers.h" +#include "network_helpers.h" +#include "test_tcp_custom_syncookie.skel.h" + +static struct test_tcp_custom_syncookie_case { + int family, type; + char addr[16]; + char name[10]; +} test_cases[] = { + { + .name = "IPv4 TCP", + .family = AF_INET, + .type = SOCK_STREAM, + .addr = "127.0.0.1", + }, + { + .name = "IPv6 TCP", + .family = AF_INET6, + .type = SOCK_STREAM, + .addr = "::1", + }, +}; + +static int setup_netns(void) +{ + if (!ASSERT_OK(unshare(CLONE_NEWNET), "create netns")) + return -1; + + if (!ASSERT_OK(system("ip link set dev lo up"), "ip")) + goto err; + + if (!ASSERT_OK(write_sysctl("/proc/sys/net/ipv4/tcp_ecn", "1"), + "write_sysctl")) + goto err; + + return 0; +err: + return -1; +} + +static int setup_tc(struct test_tcp_custom_syncookie *skel) +{ + LIBBPF_OPTS(bpf_tc_hook, qdisc_lo, .attach_point = BPF_TC_INGRESS); + LIBBPF_OPTS(bpf_tc_opts, tc_attach, + .prog_fd = bpf_program__fd(skel->progs.tcp_custom_syncookie)); + + qdisc_lo.ifindex = if_nametoindex("lo"); + if (!ASSERT_OK(bpf_tc_hook_create(&qdisc_lo), "qdisc add dev lo clsact")) + goto err; + + if (!ASSERT_OK(bpf_tc_attach(&qdisc_lo, &tc_attach), + "filter add dev lo ingress")) + goto err; + + return 0; +err: + return -1; +} + +#define msg "Hello World" +#define msglen 11 + +static void transfer_message(int sender, int receiver) +{ + char buf[msglen]; + int ret; + + ret = send(sender, msg, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "send")) + return; + + memset(buf, 0, sizeof(buf)); + + ret = recv(receiver, buf, msglen, 0); + if (!ASSERT_EQ(ret, msglen, "recv")) + return; + + ret = strncmp(buf, msg, msglen); + if (!ASSERT_EQ(ret, 0, "strncmp")) + return; +} + +static void create_connection(struct test_tcp_custom_syncookie_case *test_case) +{ + int server, client, child; + + server = start_server(test_case->family, test_case->type, test_case->addr, 0, 0); + if (!ASSERT_NEQ(server, -1, "start_server")) + return; + + client = connect_to_fd(server, 0); + if (!ASSERT_NEQ(client, -1, "connect_to_fd")) + goto close_server; + + child = accept(server, NULL, 0); + if (!ASSERT_NEQ(child, -1, "accept")) + goto close_client; + + transfer_message(client, child); + transfer_message(child, client); + + close(child); +close_client: + close(client); +close_server: + close(server); +} + +void test_tcp_custom_syncookie(void) +{ + struct test_tcp_custom_syncookie *skel; + int i; + + if (setup_netns()) + return; + + skel = test_tcp_custom_syncookie__open_and_load(); + if (!ASSERT_OK_PTR(skel, "open_and_load")) + return; + + if (setup_tc(skel)) + goto destroy_skel; + + for (i = 0; i < ARRAY_SIZE(test_cases); i++) { + if (!test__start_subtest(test_cases[i].name)) + continue; + + skel->bss->handled_syn = false; + skel->bss->handled_ack = false; + + create_connection(&test_cases[i]); + + ASSERT_EQ(skel->bss->handled_syn, true, "SYN is not handled at tc."); + ASSERT_EQ(skel->bss->handled_ack, true, "ACK is not handled at tc"); + } + +destroy_skel: + system("tc qdisc del dev lo clsact"); + + test_tcp_custom_syncookie__destroy(skel); +} diff --git a/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c new file mode 100644 index 000000000000..8d833f0c7580 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/test_struct_ops_module.c @@ -0,0 +1,75 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <test_progs.h> +#include <time.h> + +#include "struct_ops_module.skel.h" + +static void check_map_info(struct bpf_map_info *info) +{ + struct bpf_btf_info btf_info; + char btf_name[256]; + u32 btf_info_len = sizeof(btf_info); + int err, fd; + + fd = bpf_btf_get_fd_by_id(info->btf_vmlinux_id); + if (!ASSERT_GE(fd, 0, "get_value_type_btf_obj_fd")) + return; + + memset(&btf_info, 0, sizeof(btf_info)); + btf_info.name = ptr_to_u64(btf_name); + btf_info.name_len = sizeof(btf_name); + err = bpf_btf_get_info_by_fd(fd, &btf_info, &btf_info_len); + if (!ASSERT_OK(err, "get_value_type_btf_obj_info")) + goto cleanup; + + if (!ASSERT_EQ(strcmp(btf_name, "bpf_testmod"), 0, "get_value_type_btf_obj_name")) + goto cleanup; + +cleanup: + close(fd); +} + +static void test_struct_ops_load(void) +{ + DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts); + struct struct_ops_module *skel; + struct bpf_map_info info = {}; + struct bpf_link *link; + int err; + u32 len; + + skel = struct_ops_module__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "struct_ops_module_open")) + return; + + err = struct_ops_module__load(skel); + if (!ASSERT_OK(err, "struct_ops_module_load")) + goto cleanup; + + len = sizeof(info); + err = bpf_map_get_info_by_fd(bpf_map__fd(skel->maps.testmod_1), &info, + &len); + if (!ASSERT_OK(err, "bpf_map_get_info_by_fd")) + goto cleanup; + + link = bpf_map__attach_struct_ops(skel->maps.testmod_1); + ASSERT_OK_PTR(link, "attach_test_mod_1"); + + /* test_2() will be called from bpf_dummy_reg() in bpf_testmod.c */ + ASSERT_EQ(skel->bss->test_2_result, 7, "test_2_result"); + + bpf_link__destroy(link); + + check_map_info(&info); + +cleanup: + struct_ops_module__destroy(skel); +} + +void serial_test_struct_ops_module(void) +{ + if (test__start_subtest("test_struct_ops_load")) + test_struct_ops_load(); +} + diff --git a/tools/testing/selftests/bpf/prog_tests/token.c b/tools/testing/selftests/bpf/prog_tests/token.c new file mode 100644 index 000000000000..fc4a175d8d76 --- /dev/null +++ b/tools/testing/selftests/bpf/prog_tests/token.c @@ -0,0 +1,1052 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ +#define _GNU_SOURCE +#include <test_progs.h> +#include <bpf/btf.h> +#include "cap_helpers.h" +#include <fcntl.h> +#include <sched.h> +#include <signal.h> +#include <unistd.h> +#include <linux/filter.h> +#include <linux/unistd.h> +#include <linux/mount.h> +#include <sys/socket.h> +#include <sys/stat.h> +#include <sys/syscall.h> +#include <sys/un.h> +#include "priv_map.skel.h" +#include "priv_prog.skel.h" +#include "dummy_st_ops_success.skel.h" +#include "token_lsm.skel.h" + +static inline int sys_mount(const char *dev_name, const char *dir_name, + const char *type, unsigned long flags, + const void *data) +{ + return syscall(__NR_mount, dev_name, dir_name, type, flags, data); +} + +static inline int sys_fsopen(const char *fsname, unsigned flags) +{ + return syscall(__NR_fsopen, fsname, flags); +} + +static inline int sys_fspick(int dfd, const char *path, unsigned flags) +{ + return syscall(__NR_fspick, dfd, path, flags); +} + +static inline int sys_fsconfig(int fs_fd, unsigned cmd, const char *key, const void *val, int aux) +{ + return syscall(__NR_fsconfig, fs_fd, cmd, key, val, aux); +} + +static inline int sys_fsmount(int fs_fd, unsigned flags, unsigned ms_flags) +{ + return syscall(__NR_fsmount, fs_fd, flags, ms_flags); +} + +static inline int sys_move_mount(int from_dfd, const char *from_path, + int to_dfd, const char *to_path, + unsigned flags) +{ + return syscall(__NR_move_mount, from_dfd, from_path, to_dfd, to_path, flags); +} + +static int drop_priv_caps(__u64 *old_caps) +{ + return cap_disable_effective((1ULL << CAP_BPF) | + (1ULL << CAP_PERFMON) | + (1ULL << CAP_NET_ADMIN) | + (1ULL << CAP_SYS_ADMIN), old_caps); +} + +static int restore_priv_caps(__u64 old_caps) +{ + return cap_enable_effective(old_caps, NULL); +} + +static int set_delegate_mask(int fs_fd, const char *key, __u64 mask, const char *mask_str) +{ + char buf[32]; + int err; + + if (!mask_str) { + if (mask == ~0ULL) { + mask_str = "any"; + } else { + snprintf(buf, sizeof(buf), "0x%llx", (unsigned long long)mask); + mask_str = buf; + } + } + + err = sys_fsconfig(fs_fd, FSCONFIG_SET_STRING, key, + mask_str, 0); + if (err < 0) + err = -errno; + return err; +} + +#define zclose(fd) do { if (fd >= 0) close(fd); fd = -1; } while (0) + +struct bpffs_opts { + __u64 cmds; + __u64 maps; + __u64 progs; + __u64 attachs; + const char *cmds_str; + const char *maps_str; + const char *progs_str; + const char *attachs_str; +}; + +static int create_bpffs_fd(void) +{ + int fs_fd; + + /* create VFS context */ + fs_fd = sys_fsopen("bpf", 0); + ASSERT_GE(fs_fd, 0, "fs_fd"); + + return fs_fd; +} + +static int materialize_bpffs_fd(int fs_fd, struct bpffs_opts *opts) +{ + int mnt_fd, err; + + /* set up token delegation mount options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", opts->cmds, opts->cmds_str); + if (!ASSERT_OK(err, "fs_cfg_cmds")) + return err; + err = set_delegate_mask(fs_fd, "delegate_maps", opts->maps, opts->maps_str); + if (!ASSERT_OK(err, "fs_cfg_maps")) + return err; + err = set_delegate_mask(fs_fd, "delegate_progs", opts->progs, opts->progs_str); + if (!ASSERT_OK(err, "fs_cfg_progs")) + return err; + err = set_delegate_mask(fs_fd, "delegate_attachs", opts->attachs, opts->attachs_str); + if (!ASSERT_OK(err, "fs_cfg_attachs")) + return err; + + /* instantiate FS object */ + err = sys_fsconfig(fs_fd, FSCONFIG_CMD_CREATE, NULL, NULL, 0); + if (err < 0) + return -errno; + + /* create O_PATH fd for detached mount */ + mnt_fd = sys_fsmount(fs_fd, 0, 0); + if (err < 0) + return -errno; + + return mnt_fd; +} + +/* send FD over Unix domain (AF_UNIX) socket */ +static int sendfd(int sockfd, int fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1] = { fd }, err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + cmsg = CMSG_FIRSTHDR(&msg); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + cmsg->cmsg_len = CMSG_LEN(sizeof(fds)); + memcpy(CMSG_DATA(cmsg), fds, sizeof(fds)); + + err = sendmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "sendmsg")) + return -EINVAL; + + return 0; +} + +/* receive FD over Unix domain (AF_UNIX) socket */ +static int recvfd(int sockfd, int *fd) +{ + struct msghdr msg = {}; + struct cmsghdr *cmsg; + int fds[1], err; + char iobuf[1]; + struct iovec io = { + .iov_base = iobuf, + .iov_len = sizeof(iobuf), + }; + union { + char buf[CMSG_SPACE(sizeof(fds))]; + struct cmsghdr align; + } u; + + msg.msg_iov = &io; + msg.msg_iovlen = 1; + msg.msg_control = u.buf; + msg.msg_controllen = sizeof(u.buf); + + err = recvmsg(sockfd, &msg, 0); + if (err < 0) + err = -errno; + if (!ASSERT_EQ(err, 1, "recvmsg")) + return -EINVAL; + + cmsg = CMSG_FIRSTHDR(&msg); + if (!ASSERT_OK_PTR(cmsg, "cmsg_null") || + !ASSERT_EQ(cmsg->cmsg_len, CMSG_LEN(sizeof(fds)), "cmsg_len") || + !ASSERT_EQ(cmsg->cmsg_level, SOL_SOCKET, "cmsg_level") || + !ASSERT_EQ(cmsg->cmsg_type, SCM_RIGHTS, "cmsg_type")) + return -EINVAL; + + memcpy(fds, CMSG_DATA(cmsg), sizeof(fds)); + *fd = fds[0]; + + return 0; +} + +static ssize_t write_nointr(int fd, const void *buf, size_t count) +{ + ssize_t ret; + + do { + ret = write(fd, buf, count); + } while (ret < 0 && errno == EINTR); + + return ret; +} + +static int write_file(const char *path, const void *buf, size_t count) +{ + int fd; + ssize_t ret; + + fd = open(path, O_WRONLY | O_CLOEXEC | O_NOCTTY | O_NOFOLLOW); + if (fd < 0) + return -1; + + ret = write_nointr(fd, buf, count); + close(fd); + if (ret < 0 || (size_t)ret != count) + return -1; + + return 0; +} + +static int create_and_enter_userns(void) +{ + uid_t uid; + gid_t gid; + char map[100]; + + uid = getuid(); + gid = getgid(); + + if (unshare(CLONE_NEWUSER)) + return -1; + + if (write_file("/proc/self/setgroups", "deny", sizeof("deny") - 1) && + errno != ENOENT) + return -1; + + snprintf(map, sizeof(map), "0 %d 1", uid); + if (write_file("/proc/self/uid_map", map, strlen(map))) + return -1; + + + snprintf(map, sizeof(map), "0 %d 1", gid); + if (write_file("/proc/self/gid_map", map, strlen(map))) + return -1; + + if (setgid(0)) + return -1; + + if (setuid(0)) + return -1; + + return 0; +} + +typedef int (*child_callback_fn)(int bpffs_fd, struct token_lsm *lsm_skel); + +static void child(int sock_fd, struct bpffs_opts *opts, child_callback_fn callback) +{ + int mnt_fd = -1, fs_fd = -1, err = 0, bpffs_fd = -1, token_fd = -1; + struct token_lsm *lsm_skel = NULL; + + /* load and attach LSM "policy" before we go into unpriv userns */ + lsm_skel = token_lsm__open_and_load(); + if (!ASSERT_OK_PTR(lsm_skel, "lsm_skel_load")) { + err = -EINVAL; + goto cleanup; + } + lsm_skel->bss->my_pid = getpid(); + err = token_lsm__attach(lsm_skel); + if (!ASSERT_OK(err, "lsm_skel_attach")) + goto cleanup; + + /* setup userns with root mappings */ + err = create_and_enter_userns(); + if (!ASSERT_OK(err, "create_and_enter_userns")) + goto cleanup; + + /* setup mountns to allow creating BPF FS (fsopen("bpf")) from unpriv process */ + err = unshare(CLONE_NEWNS); + if (!ASSERT_OK(err, "create_mountns")) + goto cleanup; + + err = sys_mount(NULL, "/", NULL, MS_REC | MS_PRIVATE, 0); + if (!ASSERT_OK(err, "remount_root")) + goto cleanup; + + fs_fd = create_bpffs_fd(); + if (!ASSERT_GE(fs_fd, 0, "create_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm"); + err = set_delegate_mask(fs_fd, "delegate_maps", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_maps_eperm"); + err = set_delegate_mask(fs_fd, "delegate_progs", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_progs_eperm"); + err = set_delegate_mask(fs_fd, "delegate_attachs", 0x1, NULL); + ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm"); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, fs_fd); + if (!ASSERT_OK(err, "send_fs_fd")) + goto cleanup; + zclose(fs_fd); + + /* avoid mucking around with mount namespaces and mounting at + * well-known path, just get detach-mounted BPF FS fd back from parent + */ + err = recvfd(sock_fd, &mnt_fd); + if (!ASSERT_OK(err, "recv_mnt_fd")) + goto cleanup; + + /* try to fspick() BPF FS and try to add some delegation options */ + fs_fd = sys_fspick(mnt_fd, "", FSPICK_EMPTY_PATH); + if (!ASSERT_GE(fs_fd, 0, "bpffs_fspick")) { + err = -EINVAL; + goto cleanup; + } + + /* ensure unprivileged child cannot reconfigure to set delegation options */ + err = set_delegate_mask(fs_fd, "delegate_cmds", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_cmd_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_maps", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_maps_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_progs", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_progs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + err = set_delegate_mask(fs_fd, "delegate_attachs", 0, "any"); + if (!ASSERT_EQ(err, -EPERM, "delegate_attachs_eperm_reconfig")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + bpffs_fd = openat(mnt_fd, ".", 0, O_RDWR); + if (!ASSERT_GE(bpffs_fd, 0, "bpffs_open")) { + err = -EINVAL; + goto cleanup; + } + + /* create BPF token FD and pass it to parent for some extra checks */ + token_fd = bpf_token_create(bpffs_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "child_token_create")) { + err = -EINVAL; + goto cleanup; + } + err = sendfd(sock_fd, token_fd); + if (!ASSERT_OK(err, "send_token_fd")) + goto cleanup; + zclose(token_fd); + + /* do custom test logic with customly set up BPF FS instance */ + err = callback(bpffs_fd, lsm_skel); + if (!ASSERT_OK(err, "test_callback")) + goto cleanup; + + err = 0; +cleanup: + zclose(sock_fd); + zclose(mnt_fd); + zclose(fs_fd); + zclose(bpffs_fd); + zclose(token_fd); + + lsm_skel->bss->my_pid = 0; + token_lsm__destroy(lsm_skel); + + exit(-err); +} + +static int wait_for_pid(pid_t pid) +{ + int status, ret; + +again: + ret = waitpid(pid, &status, 0); + if (ret == -1) { + if (errno == EINTR) + goto again; + + return -1; + } + + if (!WIFEXITED(status)) + return -1; + + return WEXITSTATUS(status); +} + +static void parent(int child_pid, struct bpffs_opts *bpffs_opts, int sock_fd) +{ + int fs_fd = -1, mnt_fd = -1, token_fd = -1, err; + + err = recvfd(sock_fd, &fs_fd); + if (!ASSERT_OK(err, "recv_bpffs_fd")) + goto cleanup; + + mnt_fd = materialize_bpffs_fd(fs_fd, bpffs_opts); + if (!ASSERT_GE(mnt_fd, 0, "materialize_bpffs_fd")) { + err = -EINVAL; + goto cleanup; + } + zclose(fs_fd); + + /* pass BPF FS context object to parent */ + err = sendfd(sock_fd, mnt_fd); + if (!ASSERT_OK(err, "send_mnt_fd")) + goto cleanup; + zclose(mnt_fd); + + /* receive BPF token FD back from child for some extra tests */ + err = recvfd(sock_fd, &token_fd); + if (!ASSERT_OK(err, "recv_token_fd")) + goto cleanup; + + err = wait_for_pid(child_pid); + ASSERT_OK(err, "waitpid_child"); + +cleanup: + zclose(sock_fd); + zclose(fs_fd); + zclose(mnt_fd); + zclose(token_fd); + + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static void subtest_userns(struct bpffs_opts *bpffs_opts, + child_callback_fn child_cb) +{ + int sock_fds[2] = { -1, -1 }; + int child_pid = 0, err; + + err = socketpair(AF_UNIX, SOCK_STREAM, 0, sock_fds); + if (!ASSERT_OK(err, "socketpair")) + goto cleanup; + + child_pid = fork(); + if (!ASSERT_GE(child_pid, 0, "fork")) + goto cleanup; + + if (child_pid == 0) { + zclose(sock_fds[0]); + return child(sock_fds[1], bpffs_opts, child_cb); + + } else { + zclose(sock_fds[1]); + return parent(child_pid, bpffs_opts, sock_fds[0]); + } + +cleanup: + zclose(sock_fds[0]); + zclose(sock_fds[1]); + if (child_pid > 0) + (void)kill(child_pid, SIGKILL); +} + +static int userns_map_create(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_map_create_opts, map_opts); + int err, token_fd = -1, map_fd = -1; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no token, no CAP_BPF -> fail */ + map_opts.map_flags = 0; + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* token without CAP_BPF -> fail */ + map_opts.map_flags = BPF_F_TOKEN_FD; + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_wo_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_w_token_wo_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* CAP_BPF without token -> fail */ + map_opts.map_flags = 0; + map_opts.token_fd = 0; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "wo_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_LT(map_fd, 0, "stack_map_wo_token_w_cap_bpf_should_fail")) { + err = -EINVAL; + goto cleanup; + } + + /* finally, namespaced CAP_BPF + token -> success */ + map_opts.map_flags = BPF_F_TOKEN_FD; + map_opts.token_fd = token_fd; + map_fd = bpf_map_create(BPF_MAP_TYPE_STACK, "w_token_w_bpf", 0, 8, 1, &map_opts); + if (!ASSERT_GT(map_fd, 0, "stack_map_w_token_w_cap_bpf")) { + err = -EINVAL; + goto cleanup; + } + +cleanup: + zclose(token_fd); + zclose(map_fd); + return err; +} + +static int userns_btf_load(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_btf_load_opts, btf_opts); + int err, token_fd = -1, btf_fd = -1; + const void *raw_btf_data; + struct btf *btf = NULL; + __u32 raw_btf_size; + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* while inside non-init userns, we need both a BPF token *and* + * CAP_BPF inside current userns to create privileged map; let's test + * that neither BPF token alone nor namespaced CAP_BPF is sufficient + */ + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* setup a trivial BTF data to load to the kernel */ + btf = btf__new_empty(); + if (!ASSERT_OK_PTR(btf, "empty_btf")) + goto cleanup; + + ASSERT_GT(btf__add_int(btf, "int", 4, 0), 0, "int_type"); + + raw_btf_data = btf__raw_data(btf, &raw_btf_size); + if (!ASSERT_OK_PTR(raw_btf_data, "raw_btf_data")) + goto cleanup; + + /* no token + no CAP_BPF -> failure */ + btf_opts.btf_flags = 0; + btf_opts.token_fd = 0; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "no_token_no_cap_should_fail")) + goto cleanup; + + /* token + no CAP_BPF -> failure */ + btf_opts.btf_flags = BPF_F_TOKEN_FD; + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_LT(btf_fd, 0, "token_no_cap_should_fail")) + goto cleanup; + + /* get back effective local CAP_BPF (and CAP_SYS_ADMIN) */ + err = restore_priv_caps(old_caps); + if (!ASSERT_OK(err, "restore_caps")) + goto cleanup; + + /* token + CAP_BPF -> success */ + btf_opts.btf_flags = BPF_F_TOKEN_FD; + btf_opts.token_fd = token_fd; + btf_fd = bpf_btf_load(raw_btf_data, raw_btf_size, &btf_opts); + if (!ASSERT_GT(btf_fd, 0, "token_and_cap_success")) + goto cleanup; + + err = 0; +cleanup: + btf__free(btf); + zclose(btf_fd); + zclose(token_fd); + return err; +} + +static int userns_prog_load(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_prog_load_opts, prog_opts); + int err, token_fd = -1, prog_fd = -1; + struct bpf_insn insns[] = { + /* bpf_jiffies64() requires CAP_BPF */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_jiffies64), + /* bpf_get_current_task() requires CAP_PERFMON */ + BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_get_current_task), + /* r0 = 0; exit; */ + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + size_t insn_cnt = ARRAY_SIZE(insns); + __u64 old_caps = 0; + + /* create BPF token from BPF FS mount */ + token_fd = bpf_token_create(mnt_fd, NULL); + if (!ASSERT_GT(token_fd, 0, "token_create")) { + err = -EINVAL; + goto cleanup; + } + + /* validate we can successfully load BPF program with token; this + * being XDP program (CAP_NET_ADMIN) using bpf_jiffies64() (CAP_BPF) + * and bpf_get_current_task() (CAP_PERFMON) helpers validates we have + * BPF token wired properly in a bunch of places in the kernel + */ + prog_opts.prog_flags = BPF_F_TOKEN_FD; + prog_opts.token_fd = token_fd; + prog_opts.expected_attach_type = BPF_XDP; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_GT(prog_fd, 0, "prog_fd")) { + err = -EPERM; + goto cleanup; + } + + /* no token + caps -> failure */ + prog_opts.prog_flags = 0; + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = drop_priv_caps(&old_caps); + if (!ASSERT_OK(err, "drop_caps")) + goto cleanup; + + /* no caps + token -> failure */ + prog_opts.prog_flags = BPF_F_TOKEN_FD; + prog_opts.token_fd = token_fd; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + /* no caps + no token -> definitely a failure */ + prog_opts.prog_flags = 0; + prog_opts.token_fd = 0; + prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, "token_prog", "GPL", + insns, insn_cnt, &prog_opts); + if (!ASSERT_EQ(prog_fd, -EPERM, "prog_fd_eperm")) { + err = -EPERM; + goto cleanup; + } + + err = 0; +cleanup: + zclose(prog_fd); + zclose(token_fd); + return err; +} + +static int userns_obj_priv_map(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_map *skel; + int err; + + skel = priv_map__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_map__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_map__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = priv_map__load(skel); + priv_map__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_prog(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct priv_prog *skel; + int err; + + skel = priv_prog__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + priv_prog__destroy(skel); + return -EINVAL; + } + + /* use bpf_token_path to provide BPF FS path */ + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + + /* provide BPF token, but reject bpf_token_capable() with LSM */ + lsm_skel->bss->reject_capable = true; + lsm_skel->bss->reject_cmd = false; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cap_open")) + return -EINVAL; + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_ERR(err, "obj_token_lsm_reject_cap_load")) + return -EINVAL; + + /* provide BPF token, but reject bpf_token_cmd() with LSM */ + lsm_skel->bss->reject_capable = false; + lsm_skel->bss->reject_cmd = true; + skel = priv_prog__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_lsm_reject_cmd_open")) + return -EINVAL; + err = priv_prog__load(skel); + priv_prog__destroy(skel); + if (!ASSERT_ERR(err, "obj_token_lsm_reject_cmd_load")) + return -EINVAL; + + return 0; +} + +/* this test is called with BPF FS that doesn't delegate BPF_BTF_LOAD command, + * which should cause struct_ops application to fail, as BTF won't be uploaded + * into the kernel, even if STRUCT_OPS programs themselves are allowed + */ +static int validate_struct_ops_load(int mnt_fd, bool expect_success) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + char buf[256]; + struct dummy_st_ops_success *skel; + int err; + + snprintf(buf, sizeof(buf), "/proc/self/fd/%d", mnt_fd); + opts.bpf_token_path = buf; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (expect_success) { + if (!ASSERT_OK(err, "obj_token_path_load")) + return -EINVAL; + } else /* expect failure */ { + if (!ASSERT_ERR(err, "obj_token_path_load")) + return -EINVAL; + } + + return 0; +} + +static int userns_obj_priv_btf_fail(int mnt_fd, struct token_lsm *lsm_skel) +{ + return validate_struct_ops_load(mnt_fd, false /* should fail */); +} + +static int userns_obj_priv_btf_success(int mnt_fd, struct token_lsm *lsm_skel) +{ + return validate_struct_ops_load(mnt_fd, true /* should succeed */); +} + +#define TOKEN_ENVVAR "LIBBPF_BPF_TOKEN_PATH" +#define TOKEN_BPFFS_CUSTOM "/bpf-token-fs" + +static int userns_obj_priv_implicit_token(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over /sys/fs/bpf so that libbpf can create BPF + * token automatically and implicitly + */ + err = sys_move_mount(mnt_fd, "", AT_FDCWD, "/sys/fs/bpf", MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + return -EINVAL; + + /* disable implicit BPF token creation by setting + * LIBBPF_BPF_TOKEN_PATH envvar to empty value, load should fail + */ + err = setenv(TOKEN_ENVVAR, "", 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + return -EINVAL; + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_token_envvar_disabled_load")) { + unsetenv(TOKEN_ENVVAR); + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + unsetenv(TOKEN_ENVVAR); + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from /sys/fs/bpf mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + return -EINVAL; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, should fail */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + return -EINVAL; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + return -EINVAL; + + return 0; +} + +static int userns_obj_priv_implicit_token_envvar(int mnt_fd, struct token_lsm *lsm_skel) +{ + LIBBPF_OPTS(bpf_object_open_opts, opts); + struct dummy_st_ops_success *skel; + int err; + + /* before we mount BPF FS with token delegation, struct_ops skeleton + * should fail to load + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load")) { + dummy_st_ops_success__destroy(skel); + return -EINVAL; + } + + /* mount custom BPF FS over custom location, so libbpf can't create + * BPF token implicitly, unless pointed to it through + * LIBBPF_BPF_TOKEN_PATH envvar + */ + rmdir(TOKEN_BPFFS_CUSTOM); + if (!ASSERT_OK(mkdir(TOKEN_BPFFS_CUSTOM, 0777), "mkdir_bpffs_custom")) + goto err_out; + err = sys_move_mount(mnt_fd, "", AT_FDCWD, TOKEN_BPFFS_CUSTOM, MOVE_MOUNT_F_EMPTY_PATH); + if (!ASSERT_OK(err, "move_mount_bpffs")) + goto err_out; + + /* even though we have BPF FS with delegation, it's not at default + * /sys/fs/bpf location, so we still fail to load until envvar is set up + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_ERR_PTR(skel, "obj_tokenless_load2")) { + dummy_st_ops_success__destroy(skel); + goto err_out; + } + + err = setenv(TOKEN_ENVVAR, TOKEN_BPFFS_CUSTOM, 1 /*overwrite*/); + if (!ASSERT_OK(err, "setenv_token_path")) + goto err_out; + + /* now the same struct_ops skeleton should succeed thanks to libppf + * creating BPF token from custom mount point + */ + skel = dummy_st_ops_success__open_and_load(); + if (!ASSERT_OK_PTR(skel, "obj_implicit_token_load")) + goto err_out; + + dummy_st_ops_success__destroy(skel); + + /* now disable implicit token through empty bpf_token_path, envvar + * will be ignored, should fail + */ + opts.bpf_token_path = ""; + skel = dummy_st_ops_success__open_opts(&opts); + if (!ASSERT_OK_PTR(skel, "obj_empty_token_path_open")) + goto err_out; + + err = dummy_st_ops_success__load(skel); + dummy_st_ops_success__destroy(skel); + if (!ASSERT_ERR(err, "obj_empty_token_path_load")) + goto err_out; + + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return 0; +err_out: + rmdir(TOKEN_BPFFS_CUSTOM); + unsetenv(TOKEN_ENVVAR); + return -EINVAL; +} + +#define bit(n) (1ULL << (n)) + +void test_token(void) +{ + if (test__start_subtest("map_token")) { + struct bpffs_opts opts = { + .cmds_str = "map_create", + .maps_str = "stack", + }; + + subtest_userns(&opts, userns_map_create); + } + if (test__start_subtest("btf_token")) { + struct bpffs_opts opts = { + .cmds = 1ULL << BPF_BTF_LOAD, + }; + + subtest_userns(&opts, userns_btf_load); + } + if (test__start_subtest("prog_token")) { + struct bpffs_opts opts = { + .cmds_str = "PROG_LOAD", + .progs_str = "XDP", + .attachs_str = "xdp", + }; + + subtest_userns(&opts, userns_prog_load); + } + if (test__start_subtest("obj_priv_map")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_MAP_CREATE), + .maps = bit(BPF_MAP_TYPE_QUEUE), + }; + + subtest_userns(&opts, userns_obj_priv_map); + } + if (test__start_subtest("obj_priv_prog")) { + struct bpffs_opts opts = { + .cmds = bit(BPF_PROG_LOAD), + .progs = bit(BPF_PROG_TYPE_KPROBE), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_prog); + } + if (test__start_subtest("obj_priv_btf_fail")) { + struct bpffs_opts opts = { + /* disallow BTF loading */ + .cmds = bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_fail); + } + if (test__start_subtest("obj_priv_btf_success")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_btf_success); + } + if (test__start_subtest("obj_priv_implicit_token")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token); + } + if (test__start_subtest("obj_priv_implicit_token_envvar")) { + struct bpffs_opts opts = { + /* allow BTF loading */ + .cmds = bit(BPF_BTF_LOAD) | bit(BPF_MAP_CREATE) | bit(BPF_PROG_LOAD), + .maps = bit(BPF_MAP_TYPE_STRUCT_OPS), + .progs = bit(BPF_PROG_TYPE_STRUCT_OPS), + .attachs = ~0ULL, + }; + + subtest_userns(&opts, userns_obj_priv_implicit_token_envvar); + } +} diff --git a/tools/testing/selftests/bpf/prog_tests/xdpwall.c b/tools/testing/selftests/bpf/prog_tests/xdpwall.c index f3927829a55a..4599154c8e9b 100644 --- a/tools/testing/selftests/bpf/prog_tests/xdpwall.c +++ b/tools/testing/selftests/bpf/prog_tests/xdpwall.c @@ -9,7 +9,7 @@ void test_xdpwall(void) struct xdpwall *skel; skel = xdpwall__open_and_load(); - ASSERT_OK_PTR(skel, "Does LLMV have https://reviews.llvm.org/D109073?"); + ASSERT_OK_PTR(skel, "Does LLVM have https://github.com/llvm/llvm-project/commit/ea72b0319d7b0f0c2fcf41d121afa5d031b319d5?"); xdpwall__destroy(skel); } diff --git a/tools/testing/selftests/bpf/progs/bpf_misc.h b/tools/testing/selftests/bpf/progs/bpf_misc.h index 2fd59970c43a..fb2f5513e29e 100644 --- a/tools/testing/selftests/bpf/progs/bpf_misc.h +++ b/tools/testing/selftests/bpf/progs/bpf_misc.h @@ -80,7 +80,7 @@ #define __imm(name) [name]"i"(name) #define __imm_const(name, expr) [name]"i"(expr) #define __imm_addr(name) [name]"i"(&name) -#define __imm_ptr(name) [name]"p"(&name) +#define __imm_ptr(name) [name]"r"(&name) #define __imm_insn(name, expr) [name]"i"(*(long *)&(expr)) /* Magic constants used with __retval() */ diff --git a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h index e8bd4b7b5ef7..7001965d1cc3 100644 --- a/tools/testing/selftests/bpf/progs/bpf_tracing_net.h +++ b/tools/testing/selftests/bpf/progs/bpf_tracing_net.h @@ -51,9 +51,25 @@ #define ICSK_TIME_LOSS_PROBE 5 #define ICSK_TIME_REO_TIMEOUT 6 +#define ETH_ALEN 6 #define ETH_HLEN 14 +#define ETH_P_IP 0x0800 #define ETH_P_IPV6 0x86DD +#define NEXTHDR_TCP 6 + +#define TCPOPT_NOP 1 +#define TCPOPT_EOL 0 +#define TCPOPT_MSS 2 +#define TCPOPT_WINDOW 3 +#define TCPOPT_TIMESTAMP 8 +#define TCPOPT_SACK_PERM 4 + +#define TCPOLEN_MSS 4 +#define TCPOLEN_WINDOW 3 +#define TCPOLEN_TIMESTAMP 10 +#define TCPOLEN_SACK_PERM 2 + #define CHECKSUM_NONE 0 #define CHECKSUM_PARTIAL 3 diff --git a/tools/testing/selftests/bpf/progs/iters.c b/tools/testing/selftests/bpf/progs/iters.c index fe971992e635..225f02dd66d0 100644 --- a/tools/testing/selftests/bpf/progs/iters.c +++ b/tools/testing/selftests/bpf/progs/iters.c @@ -78,8 +78,8 @@ int iter_err_unsafe_asm_loop(const void *ctx) "*(u32 *)(r1 + 0) = r6;" /* invalid */ : : [it]"r"(&it), - [small_arr]"p"(small_arr), - [zero]"p"(zero), + [small_arr]"r"(small_arr), + [zero]"r"(zero), __imm(bpf_iter_num_new), __imm(bpf_iter_num_next), __imm(bpf_iter_num_destroy) diff --git a/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c new file mode 100644 index 000000000000..2414ac20b6d5 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/kptr_xchg_inline.c @@ -0,0 +1,48 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (C) 2023. Huawei Technologies Co., Ltd */ +#include <linux/types.h> +#include <bpf/bpf_helpers.h> + +#include "bpf_experimental.h" +#include "bpf_misc.h" + +char _license[] SEC("license") = "GPL"; + +struct bin_data { + char blob[32]; +}; + +#define private(name) SEC(".bss." #name) __hidden __attribute__((aligned(8))) +private(kptr) struct bin_data __kptr * ptr; + +SEC("tc") +__naked int kptr_xchg_inline(void) +{ + asm volatile ( + "r1 = %[ptr] ll;" + "r2 = 0;" + "call %[bpf_kptr_xchg];" + "if r0 == 0 goto 1f;" + "r1 = r0;" + "r2 = 0;" + "call %[bpf_obj_drop_impl];" + "1:" + "r0 = 0;" + "exit;" + : + : __imm_addr(ptr), + __imm(bpf_kptr_xchg), + __imm(bpf_obj_drop_impl) + : __clobber_all + ); +} + +/* BTF FUNC records are not generated for kfuncs referenced + * from inline assembly. These records are necessary for + * libbpf to link the program. The function below is a hack + * to ensure that BTF FUNC records are generated. + */ +void __btf_root(void) +{ + bpf_obj_drop(NULL); +} diff --git a/tools/testing/selftests/bpf/progs/priv_map.c b/tools/testing/selftests/bpf/progs/priv_map.c new file mode 100644 index 000000000000..9085be50f03b --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_map.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +struct { + __uint(type, BPF_MAP_TYPE_QUEUE); + __uint(max_entries, 1); + __type(value, __u32); +} priv_map SEC(".maps"); diff --git a/tools/testing/selftests/bpf/progs/priv_prog.c b/tools/testing/selftests/bpf/progs/priv_prog.c new file mode 100644 index 000000000000..3c7b2b618c8a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/priv_prog.c @@ -0,0 +1,13 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2023 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> + +char _license[] SEC("license") = "GPL"; + +SEC("kprobe") +int kprobe_prog(void *ctx) +{ + return 1; +} diff --git a/tools/testing/selftests/bpf/progs/struct_ops_module.c b/tools/testing/selftests/bpf/progs/struct_ops_module.c new file mode 100644 index 000000000000..e44ac55195ca --- /dev/null +++ b/tools/testing/selftests/bpf/progs/struct_ops_module.c @@ -0,0 +1,30 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ +#include <vmlinux.h> +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> +#include "../bpf_testmod/bpf_testmod.h" + +char _license[] SEC("license") = "GPL"; + +int test_2_result = 0; + +SEC("struct_ops/test_1") +int BPF_PROG(test_1) +{ + return 0xdeadbeef; +} + +SEC("struct_ops/test_2") +int BPF_PROG(test_2, int a, int b) +{ + test_2_result = a + b; + return a + b; +} + +SEC(".struct_ops.link") +struct bpf_testmod_ops testmod_1 = { + .test_1 = (void *)test_1, + .test_2 = (void *)test_2, +}; + diff --git a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c index 22aba3f6e344..6fc8b9d66e34 100644 --- a/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c +++ b/tools/testing/selftests/bpf/progs/test_core_reloc_type_id.c @@ -80,7 +80,7 @@ int test_core_type_id(void *ctx) * to detect whether this test has to be executed, however strange * that might look like. * - * [0] https://reviews.llvm.org/D85174 + * [0] https://github.com/llvm/llvm-project/commit/00602ee7ef0bf6c68d690a2bd729c12b95c95c99 */ #if __has_builtin(__builtin_preserve_type_info) struct core_reloc_type_id_output *out = (void *)&data.out; diff --git a/tools/testing/selftests/bpf/progs/test_fill_link_info.c b/tools/testing/selftests/bpf/progs/test_fill_link_info.c index 69509f8bb680..6afa834756e9 100644 --- a/tools/testing/selftests/bpf/progs/test_fill_link_info.c +++ b/tools/testing/selftests/bpf/progs/test_fill_link_info.c @@ -33,6 +33,12 @@ int BPF_PROG(tp_run) return 0; } +SEC("perf_event") +int event_run(void *ctx) +{ + return 0; +} + SEC("kprobe.multi") int BPF_PROG(kmulti_run) { diff --git a/tools/testing/selftests/bpf/progs/test_map_in_map.c b/tools/testing/selftests/bpf/progs/test_map_in_map.c index f416032ba858..b295f9b721bf 100644 --- a/tools/testing/selftests/bpf/progs/test_map_in_map.c +++ b/tools/testing/selftests/bpf/progs/test_map_in_map.c @@ -21,6 +21,32 @@ struct { __type(value, __u32); } mim_hash SEC(".maps"); +/* The following three maps are used to test + * perf_event_array map can be an inner + * map of hash/array_of_maps. + */ +struct perf_event_array { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, __u32); + __type(value, __u32); +} inner_map0 SEC(".maps"); + +struct { + __uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_array_pe SEC(".maps") = { + .values = {&inner_map0}}; + +struct { + __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS); + __uint(max_entries, 1); + __type(key, __u32); + __array(values, struct perf_event_array); +} mim_hash_pe SEC(".maps") = { + .values = {&inner_map0}}; + SEC("xdp") int xdp_mimtest0(struct xdp_md *ctx) { diff --git a/tools/testing/selftests/bpf/progs/test_siphash.h b/tools/testing/selftests/bpf/progs/test_siphash.h new file mode 100644 index 000000000000..5d3a7ec36780 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_siphash.h @@ -0,0 +1,64 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_SIPHASH_H +#define _TEST_SIPHASH_H + +/* include/linux/bitops.h */ +static inline u64 rol64(u64 word, unsigned int shift) +{ + return (word << (shift & 63)) | (word >> ((-shift) & 63)); +} + +/* include/linux/siphash.h */ +#define SIPHASH_PERMUTATION(a, b, c, d) ( \ + (a) += (b), (b) = rol64((b), 13), (b) ^= (a), (a) = rol64((a), 32), \ + (c) += (d), (d) = rol64((d), 16), (d) ^= (c), \ + (a) += (d), (d) = rol64((d), 21), (d) ^= (a), \ + (c) += (b), (b) = rol64((b), 17), (b) ^= (c), (c) = rol64((c), 32)) + +#define SIPHASH_CONST_0 0x736f6d6570736575ULL +#define SIPHASH_CONST_1 0x646f72616e646f6dULL +#define SIPHASH_CONST_2 0x6c7967656e657261ULL +#define SIPHASH_CONST_3 0x7465646279746573ULL + +/* lib/siphash.c */ +#define SIPROUND SIPHASH_PERMUTATION(v0, v1, v2, v3) + +#define PREAMBLE(len) \ + u64 v0 = SIPHASH_CONST_0; \ + u64 v1 = SIPHASH_CONST_1; \ + u64 v2 = SIPHASH_CONST_2; \ + u64 v3 = SIPHASH_CONST_3; \ + u64 b = ((u64)(len)) << 56; \ + v3 ^= key->key[1]; \ + v2 ^= key->key[0]; \ + v1 ^= key->key[1]; \ + v0 ^= key->key[0]; + +#define POSTAMBLE \ + v3 ^= b; \ + SIPROUND; \ + SIPROUND; \ + v0 ^= b; \ + v2 ^= 0xff; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + SIPROUND; \ + return (v0 ^ v1) ^ (v2 ^ v3); + +static inline u64 siphash_2u64(const u64 first, const u64 second, const siphash_key_t *key) +{ + PREAMBLE(16) + v3 ^= first; + SIPROUND; + SIPROUND; + v0 ^= first; + v3 ^= second; + SIPROUND; + SIPROUND; + v0 ^= second; + POSTAMBLE +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c new file mode 100644 index 000000000000..a5501b29979a --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.c @@ -0,0 +1,572 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#include "vmlinux.h" + +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_endian.h> +#include "bpf_tracing_net.h" +#include "bpf_kfuncs.h" +#include "test_siphash.h" +#include "test_tcp_custom_syncookie.h" + +/* Hash is calculated for each client and split into ISN and TS. + * + * MSB LSB + * ISN: | 31 ... 8 | 7 6 | 5 | 4 | 3 2 1 0 | + * | Hash_1 | MSS | ECN | SACK | WScale | + * + * TS: | 31 ... 8 | 7 ... 0 | + * | Random | Hash_2 | + */ +#define COOKIE_BITS 8 +#define COOKIE_MASK (((__u32)1 << COOKIE_BITS) - 1) + +enum { + /* 0xf is invalid thus means that SYN did not have WScale. */ + BPF_SYNCOOKIE_WSCALE_MASK = (1 << 4) - 1, + BPF_SYNCOOKIE_SACK = (1 << 4), + BPF_SYNCOOKIE_ECN = (1 << 5), +}; + +#define MSS_LOCAL_IPV4 65495 +#define MSS_LOCAL_IPV6 65476 + +const __u16 msstab4[] = { + 536, + 1300, + 1460, + MSS_LOCAL_IPV4, +}; + +const __u16 msstab6[] = { + 1280 - 60, /* IPV6_MIN_MTU - 60 */ + 1480 - 60, + 9000 - 60, + MSS_LOCAL_IPV6, +}; + +static siphash_key_t test_key_siphash = { + { 0x0706050403020100ULL, 0x0f0e0d0c0b0a0908ULL } +}; + +struct tcp_syncookie { + struct __sk_buff *skb; + void *data_end; + struct ethhdr *eth; + struct iphdr *ipv4; + struct ipv6hdr *ipv6; + struct tcphdr *tcp; + union { + char *ptr; + __be32 *ptr32; + }; + struct bpf_tcp_req_attrs attrs; + u32 cookie; + u64 first; +}; + +bool handled_syn, handled_ack; + +static int tcp_load_headers(struct tcp_syncookie *ctx) +{ + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + + if (ctx->eth + 1 > ctx->data_end) + goto err; + + switch (bpf_ntohs(ctx->eth->h_proto)) { + case ETH_P_IP: + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + + if (ctx->ipv4 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv4->ihl != sizeof(*ctx->ipv4) / 4) + goto err; + + if (ctx->ipv4->version != 4) + goto err; + + if (ctx->ipv4->protocol != IPPROTO_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + break; + case ETH_P_IPV6: + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + + if (ctx->ipv6 + 1 > ctx->data_end) + goto err; + + if (ctx->ipv6->version != 6) + goto err; + + if (ctx->ipv6->nexthdr != NEXTHDR_TCP) + goto err; + + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + break; + default: + goto err; + } + + if (ctx->tcp + 1 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static int tcp_reload_headers(struct tcp_syncookie *ctx) +{ + /* Without volatile, + * R3 32-bit pointer arithmetic prohibited + */ + volatile u64 data_len = ctx->skb->data_end - ctx->skb->data; + + if (ctx->tcp->doff < sizeof(*ctx->tcp) / 4) + goto err; + + /* Needed to calculate csum and parse TCP options. */ + if (bpf_skb_change_tail(ctx->skb, data_len + 60 - ctx->tcp->doff * 4, 0)) + goto err; + + ctx->data_end = (void *)(long)ctx->skb->data_end; + ctx->eth = (struct ethhdr *)(long)ctx->skb->data; + if (ctx->ipv4) { + ctx->ipv4 = (struct iphdr *)(ctx->eth + 1); + ctx->ipv6 = NULL; + ctx->tcp = (struct tcphdr *)(ctx->ipv4 + 1); + } else { + ctx->ipv4 = NULL; + ctx->ipv6 = (struct ipv6hdr *)(ctx->eth + 1); + ctx->tcp = (struct tcphdr *)(ctx->ipv6 + 1); + } + + if ((void *)ctx->tcp + 60 > ctx->data_end) + goto err; + + return 0; +err: + return -1; +} + +static __sum16 tcp_v4_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_tcpudp_magic(ctx->ipv4->saddr, ctx->ipv4->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static __sum16 tcp_v6_csum(struct tcp_syncookie *ctx, __wsum csum) +{ + return csum_ipv6_magic(&ctx->ipv6->saddr, &ctx->ipv6->daddr, + ctx->tcp->doff * 4, IPPROTO_TCP, csum); +} + +static int tcp_validate_header(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_reload_headers(ctx)) + goto err; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + /* check tcp_v4_csum(csum) is 0 if not on lo. */ + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, ctx->ipv4->ihl * 4, 0); + if (csum < 0) + goto err; + + if (csum_fold(csum) != 0) + goto err; + } else if (ctx->ipv6) { + /* check tcp_v6_csum(csum) is 0 if not on lo. */ + } + + return 0; +err: + return -1; +} + +static int tcp_parse_option(__u32 index, struct tcp_syncookie *ctx) +{ + char opcode, opsize; + + if (ctx->ptr + 1 > ctx->data_end) + goto stop; + + opcode = *ctx->ptr++; + + if (opcode == TCPOPT_EOL) + goto stop; + + if (opcode == TCPOPT_NOP) + goto next; + + if (ctx->ptr + 1 > ctx->data_end) + goto stop; + + opsize = *ctx->ptr++; + + if (opsize < 2) + goto stop; + + switch (opcode) { + case TCPOPT_MSS: + if (opsize == TCPOLEN_MSS && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_MSS - 2) < ctx->data_end) + ctx->attrs.mss = get_unaligned_be16(ctx->ptr); + break; + case TCPOPT_WINDOW: + if (opsize == TCPOLEN_WINDOW && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_WINDOW - 2) < ctx->data_end) { + ctx->attrs.wscale_ok = 1; + ctx->attrs.snd_wscale = *ctx->ptr; + } + break; + case TCPOPT_TIMESTAMP: + if (opsize == TCPOLEN_TIMESTAMP && + ctx->ptr + (TCPOLEN_TIMESTAMP - 2) < ctx->data_end) { + ctx->attrs.rcv_tsval = get_unaligned_be32(ctx->ptr); + ctx->attrs.rcv_tsecr = get_unaligned_be32(ctx->ptr + 4); + + if (ctx->tcp->syn && ctx->attrs.rcv_tsecr) + ctx->attrs.tstamp_ok = 0; + else + ctx->attrs.tstamp_ok = 1; + } + break; + case TCPOPT_SACK_PERM: + if (opsize == TCPOLEN_SACK_PERM && ctx->tcp->syn && + ctx->ptr + (TCPOLEN_SACK_PERM - 2) < ctx->data_end) + ctx->attrs.sack_ok = 1; + break; + } + + ctx->ptr += opsize - 2; +next: + return 0; +stop: + return 1; +} + +static void tcp_parse_options(struct tcp_syncookie *ctx) +{ + ctx->ptr = (char *)(ctx->tcp + 1); + + bpf_loop(40, tcp_parse_option, ctx, 0); +} + +static int tcp_validate_sysctl(struct tcp_syncookie *ctx) +{ + if ((ctx->ipv4 && ctx->attrs.mss != MSS_LOCAL_IPV4) || + (ctx->ipv6 && ctx->attrs.mss != MSS_LOCAL_IPV6)) + goto err; + + if (!ctx->attrs.wscale_ok || ctx->attrs.snd_wscale != 7) + goto err; + + if (!ctx->attrs.tstamp_ok) + goto err; + + if (!ctx->attrs.sack_ok) + goto err; + + if (!ctx->tcp->ece || !ctx->tcp->cwr) + goto err; + + return 0; +err: + return -1; +} + +static void tcp_prepare_cookie(struct tcp_syncookie *ctx) +{ + u32 seq = bpf_ntohl(ctx->tcp->seq); + u64 first = 0, second; + int mssind = 0; + u32 hash; + + if (ctx->ipv4) { + for (mssind = ARRAY_SIZE(msstab4) - 1; mssind; mssind--) + if (ctx->attrs.mss >= msstab4[mssind]) + break; + + ctx->attrs.mss = msstab4[mssind]; + + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + } else if (ctx->ipv6) { + for (mssind = ARRAY_SIZE(msstab6) - 1; mssind; mssind--) + if (ctx->attrs.mss >= msstab6[mssind]) + break; + + ctx->attrs.mss = msstab6[mssind]; + + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + } + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (ctx->attrs.tstamp_ok) { + ctx->attrs.rcv_tsecr = bpf_get_prandom_u32(); + ctx->attrs.rcv_tsecr &= ~COOKIE_MASK; + ctx->attrs.rcv_tsecr |= hash & COOKIE_MASK; + } + + hash &= ~COOKIE_MASK; + hash |= mssind << 6; + + if (ctx->attrs.wscale_ok) + hash |= ctx->attrs.snd_wscale & BPF_SYNCOOKIE_WSCALE_MASK; + + if (ctx->attrs.sack_ok) + hash |= BPF_SYNCOOKIE_SACK; + + if (ctx->attrs.tstamp_ok && ctx->tcp->ece && ctx->tcp->cwr) + hash |= BPF_SYNCOOKIE_ECN; + + ctx->cookie = hash; +} + +static void tcp_write_options(struct tcp_syncookie *ctx) +{ + ctx->ptr32 = (__be32 *)(ctx->tcp + 1); + + *ctx->ptr32++ = bpf_htonl(TCPOPT_MSS << 24 | TCPOLEN_MSS << 16 | + ctx->attrs.mss); + + if (ctx->attrs.wscale_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_WINDOW << 16 | + TCPOLEN_WINDOW << 8 | + ctx->attrs.snd_wscale); + + if (ctx->attrs.tstamp_ok) { + if (ctx->attrs.sack_ok) + *ctx->ptr32++ = bpf_htonl(TCPOPT_SACK_PERM << 24 | + TCPOLEN_SACK_PERM << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + else + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_TIMESTAMP << 8 | + TCPOLEN_TIMESTAMP); + + *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsecr); + *ctx->ptr32++ = bpf_htonl(ctx->attrs.rcv_tsval); + } else if (ctx->attrs.sack_ok) { + *ctx->ptr32++ = bpf_htonl(TCPOPT_NOP << 24 | + TCPOPT_NOP << 16 | + TCPOPT_SACK_PERM << 8 | + TCPOLEN_SACK_PERM); + } +} + +static int tcp_handle_syn(struct tcp_syncookie *ctx) +{ + s64 csum; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_sysctl(ctx)) + goto err; + + tcp_prepare_cookie(ctx); + tcp_write_options(ctx); + + swap(ctx->tcp->source, ctx->tcp->dest); + ctx->tcp->check = 0; + ctx->tcp->ack_seq = bpf_htonl(bpf_ntohl(ctx->tcp->seq) + 1); + ctx->tcp->seq = bpf_htonl(ctx->cookie); + ctx->tcp->doff = ((long)ctx->ptr32 - (long)ctx->tcp) >> 2; + ctx->tcp->ack = 1; + if (!ctx->attrs.tstamp_ok || !ctx->tcp->ece || !ctx->tcp->cwr) + ctx->tcp->ece = 0; + ctx->tcp->cwr = 0; + + csum = bpf_csum_diff(0, 0, (void *)ctx->tcp, ctx->tcp->doff * 4, 0); + if (csum < 0) + goto err; + + if (ctx->ipv4) { + swap(ctx->ipv4->saddr, ctx->ipv4->daddr); + ctx->tcp->check = tcp_v4_csum(ctx, csum); + + ctx->ipv4->check = 0; + ctx->ipv4->tos = 0; + ctx->ipv4->tot_len = bpf_htons((long)ctx->ptr32 - (long)ctx->ipv4); + ctx->ipv4->id = 0; + ctx->ipv4->ttl = 64; + + csum = bpf_csum_diff(0, 0, (void *)ctx->ipv4, sizeof(*ctx->ipv4), 0); + if (csum < 0) + goto err; + + ctx->ipv4->check = csum_fold(csum); + } else if (ctx->ipv6) { + swap(ctx->ipv6->saddr, ctx->ipv6->daddr); + ctx->tcp->check = tcp_v6_csum(ctx, csum); + + *(__be32 *)ctx->ipv6 = bpf_htonl(0x60000000); + ctx->ipv6->payload_len = bpf_htons((long)ctx->ptr32 - (long)ctx->tcp); + ctx->ipv6->hop_limit = 64; + } + + swap_array(ctx->eth->h_source, ctx->eth->h_dest); + + if (bpf_skb_change_tail(ctx->skb, (long)ctx->ptr32 - (long)ctx->eth, 0)) + goto err; + + return bpf_redirect(ctx->skb->ifindex, 0); +err: + return TC_ACT_SHOT; +} + +static int tcp_validate_cookie(struct tcp_syncookie *ctx) +{ + u32 cookie = bpf_ntohl(ctx->tcp->ack_seq) - 1; + u32 seq = bpf_ntohl(ctx->tcp->seq) - 1; + u64 first = 0, second; + int mssind; + u32 hash; + + if (ctx->ipv4) + first = (u64)ctx->ipv4->saddr << 32 | ctx->ipv4->daddr; + else if (ctx->ipv6) + first = (u64)ctx->ipv6->saddr.in6_u.u6_addr8[0] << 32 | + ctx->ipv6->daddr.in6_u.u6_addr32[0]; + + second = (u64)seq << 32 | ctx->tcp->source << 16 | ctx->tcp->dest; + hash = siphash_2u64(first, second, &test_key_siphash); + + if (ctx->attrs.tstamp_ok) + hash -= ctx->attrs.rcv_tsecr & COOKIE_MASK; + else + hash &= ~COOKIE_MASK; + + hash -= cookie & ~COOKIE_MASK; + if (hash) + goto err; + + mssind = (cookie & (3 << 6)) >> 6; + if (ctx->ipv4) { + if (mssind > ARRAY_SIZE(msstab4)) + goto err; + + ctx->attrs.mss = msstab4[mssind]; + } else { + if (mssind > ARRAY_SIZE(msstab6)) + goto err; + + ctx->attrs.mss = msstab6[mssind]; + } + + ctx->attrs.snd_wscale = cookie & BPF_SYNCOOKIE_WSCALE_MASK; + ctx->attrs.rcv_wscale = ctx->attrs.snd_wscale; + ctx->attrs.wscale_ok = ctx->attrs.snd_wscale == BPF_SYNCOOKIE_WSCALE_MASK; + ctx->attrs.sack_ok = cookie & BPF_SYNCOOKIE_SACK; + ctx->attrs.ecn_ok = cookie & BPF_SYNCOOKIE_ECN; + + return 0; +err: + return -1; +} + +static int tcp_handle_ack(struct tcp_syncookie *ctx) +{ + struct bpf_sock_tuple tuple; + struct bpf_sock *skc; + int ret = TC_ACT_OK; + struct sock *sk; + u32 tuple_size; + + if (ctx->ipv4) { + tuple.ipv4.saddr = ctx->ipv4->saddr; + tuple.ipv4.daddr = ctx->ipv4->daddr; + tuple.ipv4.sport = ctx->tcp->source; + tuple.ipv4.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv4); + } else if (ctx->ipv6) { + __builtin_memcpy(tuple.ipv6.saddr, &ctx->ipv6->saddr, sizeof(tuple.ipv6.saddr)); + __builtin_memcpy(tuple.ipv6.daddr, &ctx->ipv6->daddr, sizeof(tuple.ipv6.daddr)); + tuple.ipv6.sport = ctx->tcp->source; + tuple.ipv6.dport = ctx->tcp->dest; + tuple_size = sizeof(tuple.ipv6); + } else { + goto out; + } + + skc = bpf_skc_lookup_tcp(ctx->skb, &tuple, tuple_size, -1, 0); + if (!skc) + goto out; + + if (skc->state != TCP_LISTEN) + goto release; + + sk = (struct sock *)bpf_skc_to_tcp_sock(skc); + if (!sk) + goto err; + + if (tcp_validate_header(ctx)) + goto err; + + tcp_parse_options(ctx); + + if (tcp_validate_cookie(ctx)) + goto err; + + ret = bpf_sk_assign_tcp_reqsk(ctx->skb, sk, &ctx->attrs, sizeof(ctx->attrs)); + if (ret < 0) + goto err; + +release: + bpf_sk_release(skc); +out: + return ret; + +err: + ret = TC_ACT_SHOT; + goto release; +} + +SEC("tc") +int tcp_custom_syncookie(struct __sk_buff *skb) +{ + struct tcp_syncookie ctx = { + .skb = skb, + }; + + if (tcp_load_headers(&ctx)) + return TC_ACT_OK; + + if (ctx.tcp->rst) + return TC_ACT_OK; + + if (ctx.tcp->syn) { + if (ctx.tcp->ack) + return TC_ACT_OK; + + handled_syn = true; + + return tcp_handle_syn(&ctx); + } + + handled_ack = true; + + return tcp_handle_ack(&ctx); +} + +char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h new file mode 100644 index 000000000000..29a6a53cf229 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/test_tcp_custom_syncookie.h @@ -0,0 +1,140 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright Amazon.com Inc. or its affiliates. */ + +#ifndef _TEST_TCP_SYNCOOKIE_H +#define _TEST_TCP_SYNCOOKIE_H + +#define __packed __attribute__((__packed__)) +#define __force + +#define ARRAY_SIZE(arr) (sizeof(arr) / sizeof((arr)[0])) + +#define swap(a, b) \ + do { \ + typeof(a) __tmp = (a); \ + (a) = (b); \ + (b) = __tmp; \ + } while (0) + +#define swap_array(a, b) \ + do { \ + typeof(a) __tmp[sizeof(a)]; \ + __builtin_memcpy(__tmp, a, sizeof(a)); \ + __builtin_memcpy(a, b, sizeof(a)); \ + __builtin_memcpy(b, __tmp, sizeof(a)); \ + } while (0) + +/* asm-generic/unaligned.h */ +#define __get_unaligned_t(type, ptr) ({ \ + const struct { type x; } __packed * __pptr = (typeof(__pptr))(ptr); \ + __pptr->x; \ +}) + +#define get_unaligned(ptr) __get_unaligned_t(typeof(*(ptr)), (ptr)) + +static inline u16 get_unaligned_be16(const void *p) +{ + return bpf_ntohs(__get_unaligned_t(__be16, p)); +} + +static inline u32 get_unaligned_be32(const void *p) +{ + return bpf_ntohl(__get_unaligned_t(__be32, p)); +} + +/* lib/checksum.c */ +static inline u32 from64to32(u64 x) +{ + /* add up 32-bit and 32-bit for 32+c bit */ + x = (x & 0xffffffff) + (x >> 32); + /* add up carry.. */ + x = (x & 0xffffffff) + (x >> 32); + return (u32)x; +} + +static inline __wsum csum_tcpudp_nofold(__be32 saddr, __be32 daddr, + __u32 len, __u8 proto, __wsum sum) +{ + unsigned long long s = (__force u32)sum; + + s += (__force u32)saddr; + s += (__force u32)daddr; +#ifdef __BIG_ENDIAN + s += proto + len; +#else + s += (proto + len) << 8; +#endif + return (__force __wsum)from64to32(s); +} + +/* asm-generic/checksum.h */ +static inline __sum16 csum_fold(__wsum csum) +{ + u32 sum = (__force u32)csum; + + sum = (sum & 0xffff) + (sum >> 16); + sum = (sum & 0xffff) + (sum >> 16); + return (__force __sum16)~sum; +} + +static inline __sum16 csum_tcpudp_magic(__be32 saddr, __be32 daddr, __u32 len, + __u8 proto, __wsum sum) +{ + return csum_fold(csum_tcpudp_nofold(saddr, daddr, len, proto, sum)); +} + +/* net/ipv6/ip6_checksum.c */ +static inline __sum16 csum_ipv6_magic(const struct in6_addr *saddr, + const struct in6_addr *daddr, + __u32 len, __u8 proto, __wsum csum) +{ + int carry; + __u32 ulen; + __u32 uproto; + __u32 sum = (__force u32)csum; + + sum += (__force u32)saddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)saddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)saddr->in6_u.u6_addr32[3]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[0]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[0]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[1]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[1]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[2]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[2]); + sum += carry; + + sum += (__force u32)daddr->in6_u.u6_addr32[3]; + carry = (sum < (__force u32)daddr->in6_u.u6_addr32[3]); + sum += carry; + + ulen = (__force u32)bpf_htonl((__u32)len); + sum += ulen; + carry = (sum < ulen); + sum += carry; + + uproto = (__force u32)bpf_htonl(proto); + sum += uproto; + carry = (sum < uproto); + sum += carry; + + return csum_fold((__force __wsum)sum); +} +#endif diff --git a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c index cf7ed8cbb1fe..a3f3f43fc195 100644 --- a/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c +++ b/tools/testing/selftests/bpf/progs/test_tcpbpf_kern.c @@ -59,7 +59,7 @@ int bpf_testcb(struct bpf_sock_ops *skops) asm volatile ( "%[op] = *(u32 *)(%[skops] +96)" - : [op] "+r"(op) + : [op] "=r"(op) : [skops] "r"(skops) :); diff --git a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c index 78c368e71797..67a77944ef29 100644 --- a/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c +++ b/tools/testing/selftests/bpf/progs/test_xdp_dynptr.c @@ -18,11 +18,11 @@ #include "test_iptunnel_common.h" #include "bpf_kfuncs.h" -const size_t tcphdr_sz = sizeof(struct tcphdr); -const size_t udphdr_sz = sizeof(struct udphdr); -const size_t ethhdr_sz = sizeof(struct ethhdr); -const size_t iphdr_sz = sizeof(struct iphdr); -const size_t ipv6hdr_sz = sizeof(struct ipv6hdr); +#define tcphdr_sz sizeof(struct tcphdr) +#define udphdr_sz sizeof(struct udphdr) +#define ethhdr_sz sizeof(struct ethhdr) +#define iphdr_sz sizeof(struct iphdr) +#define ipv6hdr_sz sizeof(struct ipv6hdr) struct { __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); diff --git a/tools/testing/selftests/bpf/progs/token_lsm.c b/tools/testing/selftests/bpf/progs/token_lsm.c new file mode 100644 index 000000000000..e4d59b6ba743 --- /dev/null +++ b/tools/testing/selftests/bpf/progs/token_lsm.c @@ -0,0 +1,32 @@ +// SPDX-License-Identifier: GPL-2.0 +/* Copyright (c) 2024 Meta Platforms, Inc. and affiliates. */ + +#include "vmlinux.h" +#include <bpf/bpf_helpers.h> +#include <bpf/bpf_tracing.h> + +char _license[] SEC("license") = "GPL"; + +int my_pid; +bool reject_capable; +bool reject_cmd; + +SEC("lsm/bpf_token_capable") +int BPF_PROG(token_capable, struct bpf_token *token, int cap) +{ + if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + if (reject_capable) + return -1; + return 0; +} + +SEC("lsm/bpf_token_cmd") +int BPF_PROG(token_cmd, struct bpf_token *token, enum bpf_cmd cmd) +{ + if (my_pid == 0 || my_pid != (bpf_get_current_pid_tgid() >> 32)) + return 0; + if (reject_cmd) + return -1; + return 0; +} diff --git a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c index be95570ab382..28b602ac9cbe 100644 --- a/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c +++ b/tools/testing/selftests/bpf/progs/verifier_direct_packet_access.c @@ -568,7 +568,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("direct packet access: test23 (x += pkt_ptr, 4)") -__failure __msg("invalid access to packet, off=0 size=8, R5(id=2,off=0,r=0)") +__failure __msg("invalid access to packet, off=0 size=8, R5(id=3,off=0,r=0)") __flag(BPF_F_ANY_ALIGNMENT) __naked void test23_x_pkt_ptr_4(void) { diff --git a/tools/testing/selftests/bpf/progs/verifier_loops1.c b/tools/testing/selftests/bpf/progs/verifier_loops1.c index 71735dbf33d4..e07b43b78fd2 100644 --- a/tools/testing/selftests/bpf/progs/verifier_loops1.c +++ b/tools/testing/selftests/bpf/progs/verifier_loops1.c @@ -259,4 +259,28 @@ l0_%=: r2 += r1; \ " ::: __clobber_all); } +SEC("xdp") +__success +__naked void not_an_inifinite_loop(void) +{ + asm volatile (" \ + call %[bpf_get_prandom_u32]; \ + r0 &= 0xff; \ + *(u64 *)(r10 - 8) = r0; \ + r0 = 0; \ +loop_%=: \ + r0 = *(u64 *)(r10 - 8); \ + if r0 > 10 goto exit_%=; \ + r0 += 1; \ + *(u64 *)(r10 - 8) = r0; \ + r0 = 0; \ + goto loop_%=; \ +exit_%=: \ + r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c index 39fe3372e0e0..7013a9694163 100644 --- a/tools/testing/selftests/bpf/progs/verifier_spill_fill.c +++ b/tools/testing/selftests/bpf/progs/verifier_spill_fill.c @@ -243,7 +243,7 @@ l0_%=: r0 = 0; \ SEC("tc") __description("Spill u32 const scalars. Refill as u64. Offset to skb->data") -__failure __msg("invalid access to packet") +__failure __msg("math between pkt pointer and register with unbounded min value is not allowed") __naked void u64_offset_to_skb_data(void) { asm volatile (" \ @@ -253,13 +253,11 @@ __naked void u64_offset_to_skb_data(void) w7 = 20; \ *(u32*)(r10 - 4) = r6; \ *(u32*)(r10 - 8) = r7; \ - r4 = *(u16*)(r10 - 8); \ + r4 = *(u64*)(r10 - 8); \ r0 = r2; \ - /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4=umax=65535 */\ + /* r0 += r4 R0=pkt R2=pkt R3=pkt_end R4= */ \ r0 += r4; \ - /* if (r0 > r3) R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=umax=65535 */\ if r0 > r3 goto l0_%=; \ - /* r0 = *(u32 *)r2 R0=pkt,umax=65535 R2=pkt R3=pkt_end R4=20 */\ r0 = *(u32*)(r2 + 0); \ l0_%=: r0 = 0; \ exit; \ @@ -495,14 +493,14 @@ char single_byte_buf[1] SEC(".data.single_byte_buf"); SEC("raw_tp") __log_level(2) __success -/* make sure fp-8 is all STACK_ZERO */ -__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=00000000") +/* fp-8 is spilled IMPRECISE value zero (represented by a zero value fake reg) */ +__msg("2: (7a) *(u64 *)(r10 -8) = 0 ; R10=fp0 fp-8_w=0") /* but fp-16 is spilled IMPRECISE zero const reg */ __msg("4: (7b) *(u64 *)(r10 -16) = r0 ; R0_w=0 R10=fp0 fp-16_w=0") -/* validate that assigning R2 from STACK_ZERO doesn't mark register +/* validate that assigning R2 from STACK_SPILL with zero value doesn't mark register * precise immediately; if necessary, it will be marked precise later */ -__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=00000000") +__msg("6: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8_w=0") /* similarly, when R2 is assigned from spilled register, it is initially * imprecise, but will be marked precise later once it is used in precise context */ @@ -520,14 +518,14 @@ __msg("mark_precise: frame0: regs=r0 stack= before 3: (b7) r0 = 0") __naked void partial_stack_load_preserves_zeros(void) { asm volatile ( - /* fp-8 is all STACK_ZERO */ + /* fp-8 is value zero (represented by a zero value fake reg) */ ".8byte %[fp8_st_zero];" /* LLVM-18+: *(u64 *)(r10 -8) = 0; */ /* fp-16 is const zero register */ "r0 = 0;" "*(u64 *)(r10 -16) = r0;" - /* load single U8 from non-aligned STACK_ZERO slot */ + /* load single U8 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u8 *)(r10 -1);" "r1 += r2;" @@ -539,7 +537,7 @@ __naked void partial_stack_load_preserves_zeros(void) "r1 += r2;" "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ - /* load single U16 from non-aligned STACK_ZERO slot */ + /* load single U16 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u16 *)(r10 -2);" "r1 += r2;" @@ -551,7 +549,7 @@ __naked void partial_stack_load_preserves_zeros(void) "r1 += r2;" "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ - /* load single U32 from non-aligned STACK_ZERO slot */ + /* load single U32 from non-aligned spilled value zero slot */ "r1 = %[single_byte_buf];" "r2 = *(u32 *)(r10 -4);" "r1 += r2;" @@ -583,6 +581,47 @@ __naked void partial_stack_load_preserves_zeros(void) : __clobber_common); } +SEC("raw_tp") +__log_level(2) +__success +/* fp-4 is STACK_ZERO */ +__msg("2: (62) *(u32 *)(r10 -4) = 0 ; R10=fp0 fp-8=0000????") +__msg("4: (71) r2 = *(u8 *)(r10 -1) ; R2_w=0 R10=fp0 fp-8=0000????") +__msg("5: (0f) r1 += r2") +__msg("mark_precise: frame0: last_idx 5 first_idx 0 subseq_idx -1") +__msg("mark_precise: frame0: regs=r2 stack= before 4: (71) r2 = *(u8 *)(r10 -1)") +__naked void partial_stack_load_preserves_partial_zeros(void) +{ + asm volatile ( + /* fp-4 is value zero */ + ".8byte %[fp4_st_zero];" /* LLVM-18+: *(u32 *)(r10 -4) = 0; */ + + /* load single U8 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u8 *)(r10 -1);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U16 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u16 *)(r10 -2);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + /* load single U32 from non-aligned stack zero slot */ + "r1 = %[single_byte_buf];" + "r2 = *(u32 *)(r10 -4);" + "r1 += r2;" + "*(u8 *)(r1 + 0) = r2;" /* this should be fine */ + + "r0 = 0;" + "exit;" + : + : __imm_ptr(single_byte_buf), + __imm_insn(fp4_st_zero, BPF_ST_MEM(BPF_W, BPF_REG_FP, -4, 0)) + : __clobber_common); +} + char two_byte_buf[2] SEC(".data.two_byte_buf"); SEC("raw_tp") @@ -737,4 +776,168 @@ __naked void stack_load_preserves_const_precision_subreg(void) : __clobber_common); } +SEC("xdp") +__description("32-bit spilled reg range should be tracked") +__success __retval(0) +__naked void spill_32bit_range_track(void) +{ + asm volatile(" \ + call %[bpf_ktime_get_ns]; \ + /* Make r0 bounded. */ \ + r0 &= 65535; \ + /* Assign an ID to r0. */ \ + r1 = r0; \ + /* 32-bit spill r0 to stack. */ \ + *(u32*)(r10 - 8) = r0; \ + /* Boundary check on r0. */ \ + if r0 < 1 goto l0_%=; \ + /* 32-bit fill r1 from stack. */ \ + r1 = *(u32*)(r10 - 8); \ + /* r1 == r0 => r1 >= 1 always. */ \ + if r1 >= 1 goto l0_%=; \ + /* Dead branch: the verifier should prune it. \ + * Do an invalid memory access if the verifier \ + * follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_ktime_get_ns) + : __clobber_all); +} + +SEC("xdp") +__description("64-bit spill of 64-bit reg should assign ID") +__success __retval(0) +__naked void spill_64bit_of_64bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x80000000; \ + r0 <<= 32; \ + /* 64-bit spill r0 to stack - should assign an ID. */\ + *(u64*)(r10 - 8) = r0; \ + /* 64-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u64*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("32-bit spill of 32-bit reg should assign ID") +__success __retval(0) +__naked void spill_32bit_of_32bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + w0 &= 0x80000000; \ + /* 32-bit spill r0 to stack - should assign an ID. */\ + *(u32*)(r10 - 8) = r0; \ + /* 32-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u32*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("16-bit spill of 16-bit reg should assign ID") +__success __retval(0) +__naked void spill_16bit_of_16bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x8000; \ + /* 16-bit spill r0 to stack - should assign an ID. */\ + *(u16*)(r10 - 8) = r0; \ + /* 16-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u16*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + +SEC("xdp") +__description("8-bit spill of 8-bit reg should assign ID") +__success __retval(0) +__naked void spill_8bit_of_8bit_ok(void) +{ + asm volatile (" \ + /* Roll one bit to make the register inexact. */\ + call %[bpf_get_prandom_u32]; \ + r0 &= 0x80; \ + /* 8-bit spill r0 to stack - should assign an ID. */\ + *(u8*)(r10 - 8) = r0; \ + /* 8-bit fill r1 from stack - should preserve the ID. */\ + r1 = *(u8*)(r10 - 8); \ + /* Compare r1 with another register to trigger find_equal_scalars.\ + * Having one random bit is important here, otherwise the verifier cuts\ + * the corners. \ + */ \ + r2 = 0; \ + if r1 != r2 goto l0_%=; \ + /* The result of this comparison is predefined. */\ + if r0 == r2 goto l0_%=; \ + /* Dead branch: the verifier should prune it. Do an invalid memory\ + * access if the verifier follows it. \ + */ \ + r0 = *(u64*)(r9 + 0); \ + exit; \ +l0_%=: r0 = 0; \ + exit; \ +" : + : __imm(bpf_get_prandom_u32) + : __clobber_all); +} + char _license[] SEC("license") = "GPL"; diff --git a/tools/testing/selftests/bpf/test_loader.c b/tools/testing/selftests/bpf/test_loader.c index f01391021218..ba57601c2a4d 100644 --- a/tools/testing/selftests/bpf/test_loader.c +++ b/tools/testing/selftests/bpf/test_loader.c @@ -181,7 +181,7 @@ static int parse_test_spec(struct test_loader *tester, memset(spec, 0, sizeof(*spec)); spec->prog_name = bpf_program__name(prog); - spec->prog_flags = BPF_F_TEST_REG_INVARIANTS; /* by default be strict */ + spec->prog_flags = testing_prog_flags(); btf = bpf_object__btf(obj); if (!btf) { @@ -688,7 +688,7 @@ static void process_subtest(struct test_loader *tester, ++nr_progs; specs = calloc(nr_progs, sizeof(struct test_spec)); - if (!ASSERT_OK_PTR(specs, "Can't alloc specs array")) + if (!ASSERT_OK_PTR(specs, "specs_alloc")) return; i = 0; diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c index 767e0693df10..dfbab214f4d1 100644 --- a/tools/testing/selftests/bpf/test_maps.c +++ b/tools/testing/selftests/bpf/test_maps.c @@ -1190,7 +1190,11 @@ static void test_map_in_map(void) goto out_map_in_map; } - bpf_object__load(obj); + err = bpf_object__load(obj); + if (err) { + printf("Failed to load test prog\n"); + goto out_map_in_map; + } map = bpf_object__find_map_by_name(obj, "mim_array"); if (!map) { diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c index 1b9387890148..808550986f30 100644 --- a/tools/testing/selftests/bpf/test_progs.c +++ b/tools/testing/selftests/bpf/test_progs.c @@ -547,24 +547,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name) return bpf_map__fd(map); } -static bool is_jit_enabled(void) -{ - const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; - bool enabled = false; - int sysctl_fd; - - sysctl_fd = open(jit_sysctl, 0, O_RDONLY); - if (sysctl_fd != -1) { - char tmpc; - - if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) - enabled = (tmpc != '0'); - close(sysctl_fd); - } - - return enabled; -} - int compare_map_keys(int map1_fd, int map2_fd) { __u32 key, next_key; diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c index b0068a9d2cfe..80c42583f597 100644 --- a/tools/testing/selftests/bpf/test_sock_addr.c +++ b/tools/testing/selftests/bpf/test_sock_addr.c @@ -19,6 +19,7 @@ #include <bpf/libbpf.h> #include "cgroup_helpers.h" +#include "testing_helpers.h" #include "bpf_util.h" #ifndef ENOTSUPP @@ -679,7 +680,7 @@ static int load_path(const struct sock_addr_test *test, const char *path) bpf_program__set_type(prog, BPF_PROG_TYPE_CGROUP_SOCK_ADDR); bpf_program__set_expected_attach_type(prog, test->expected_attach_type); - bpf_program__set_flags(prog, BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS); + bpf_program__set_flags(prog, testing_prog_flags()); err = bpf_object__load(obj); if (err) { diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c index f36e41435be7..df04bda1c927 100644 --- a/tools/testing/selftests/bpf/test_verifier.c +++ b/tools/testing/selftests/bpf/test_verifier.c @@ -67,6 +67,7 @@ #define F_NEEDS_EFFICIENT_UNALIGNED_ACCESS (1 << 0) #define F_LOAD_WITH_STRICT_ALIGNMENT (1 << 1) +#define F_NEEDS_JIT_ENABLED (1 << 2) /* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */ #define ADMIN_CAPS (1ULL << CAP_NET_ADMIN | \ @@ -74,6 +75,7 @@ 1ULL << CAP_BPF) #define UNPRIV_SYSCTL "kernel/unprivileged_bpf_disabled" static bool unpriv_disabled = false; +static bool jit_disabled; static int skips; static bool verbose = false; static int verif_log_level = 0; @@ -1341,48 +1343,6 @@ static bool cmp_str_seq(const char *log, const char *exp) return true; } -static struct bpf_insn *get_xlated_program(int fd_prog, int *cnt) -{ - __u32 buf_element_size = sizeof(struct bpf_insn); - struct bpf_prog_info info = {}; - __u32 info_len = sizeof(info); - __u32 xlated_prog_len; - struct bpf_insn *buf; - - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("bpf_prog_get_info_by_fd failed"); - return NULL; - } - - xlated_prog_len = info.xlated_prog_len; - if (xlated_prog_len % buf_element_size) { - printf("Program length %d is not multiple of %d\n", - xlated_prog_len, buf_element_size); - return NULL; - } - - *cnt = xlated_prog_len / buf_element_size; - buf = calloc(*cnt, buf_element_size); - if (!buf) { - perror("can't allocate xlated program buffer"); - return NULL; - } - - bzero(&info, sizeof(info)); - info.xlated_prog_len = xlated_prog_len; - info.xlated_prog_insns = (__u64)(unsigned long)buf; - if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { - perror("second bpf_prog_get_info_by_fd failed"); - goto out_free_buf; - } - - return buf; - -out_free_buf: - free(buf); - return NULL; -} - static bool is_null_insn(struct bpf_insn *insn) { struct bpf_insn null_insn = {}; @@ -1505,7 +1465,7 @@ static void print_insn(struct bpf_insn *buf, int cnt) static bool check_xlated_program(struct bpf_test *test, int fd_prog) { struct bpf_insn *buf; - int cnt; + unsigned int cnt; bool result = true; bool check_expected = !is_null_insn(test->expected_insns); bool check_unexpected = !is_null_insn(test->unexpected_insns); @@ -1513,8 +1473,7 @@ static bool check_xlated_program(struct bpf_test *test, int fd_prog) if (!check_expected && !check_unexpected) goto out; - buf = get_xlated_program(fd_prog, &cnt); - if (!buf) { + if (get_xlated_program(fd_prog, &buf, &cnt)) { printf("FAIL: can't get xlated program\n"); result = false; goto out; @@ -1567,6 +1526,13 @@ static void do_test_single(struct bpf_test *test, bool unpriv, __u32 pflags; int i, err; + if ((test->flags & F_NEEDS_JIT_ENABLED) && jit_disabled) { + printf("SKIP (requires BPF JIT)\n"); + skips++; + sched_yield(); + return; + } + fd_prog = -1; for (i = 0; i < MAX_NR_MAPS; i++) map_fds[i] = -1; @@ -1588,7 +1554,7 @@ static void do_test_single(struct bpf_test *test, bool unpriv, if (fixup_skips != skips) return; - pflags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; + pflags = testing_prog_flags(); if (test->flags & F_LOAD_WITH_STRICT_ALIGNMENT) pflags |= BPF_F_STRICT_ALIGNMENT; if (test->flags & F_NEEDS_EFFICIENT_UNALIGNED_ACCESS) @@ -1887,6 +1853,8 @@ int main(int argc, char **argv) return EXIT_FAILURE; } + jit_disabled = !is_jit_enabled(); + /* Use libbpf 1.0 API mode */ libbpf_set_strict_mode(LIBBPF_STRICT_ALL); diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c index d2458c1b1671..a59e56d804ee 100644 --- a/tools/testing/selftests/bpf/testing_helpers.c +++ b/tools/testing/selftests/bpf/testing_helpers.c @@ -252,6 +252,34 @@ __u32 link_info_prog_id(const struct bpf_link *link, struct bpf_link_info *info) int extra_prog_load_log_flags = 0; +int testing_prog_flags(void) +{ + static int cached_flags = -1; + static int prog_flags[] = { BPF_F_TEST_RND_HI32, BPF_F_TEST_REG_INVARIANTS }; + static struct bpf_insn insns[] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN(), + }; + int insn_cnt = ARRAY_SIZE(insns), i, fd, flags = 0; + LIBBPF_OPTS(bpf_prog_load_opts, opts); + + if (cached_flags >= 0) + return cached_flags; + + for (i = 0; i < ARRAY_SIZE(prog_flags); i++) { + opts.prog_flags = prog_flags[i]; + fd = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "flag-test", "GPL", + insns, insn_cnt, &opts); + if (fd >= 0) { + flags |= prog_flags[i]; + close(fd); + } + } + + cached_flags = flags; + return cached_flags; +} + int bpf_prog_test_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd) { @@ -276,7 +304,7 @@ int bpf_prog_test_load(const char *file, enum bpf_prog_type type, if (type != BPF_PROG_TYPE_UNSPEC && bpf_program__type(prog) != type) bpf_program__set_type(prog, type); - flags = bpf_program__flags(prog) | BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS; + flags = bpf_program__flags(prog) | testing_prog_flags(); bpf_program__set_flags(prog, flags); err = bpf_object__load(obj); @@ -299,7 +327,7 @@ int bpf_test_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, { LIBBPF_OPTS(bpf_prog_load_opts, opts, .kern_version = kern_version, - .prog_flags = BPF_F_TEST_RND_HI32 | BPF_F_TEST_REG_INVARIANTS, + .prog_flags = testing_prog_flags(), .log_level = extra_prog_load_log_flags, .log_buf = log_buf, .log_size = log_buf_sz, @@ -387,3 +415,63 @@ int kern_sync_rcu(void) { return syscall(__NR_membarrier, MEMBARRIER_CMD_SHARED, 0, 0); } + +int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt) +{ + __u32 buf_element_size = sizeof(struct bpf_insn); + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 xlated_prog_len; + + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("bpf_prog_get_info_by_fd failed"); + return -1; + } + + xlated_prog_len = info.xlated_prog_len; + if (xlated_prog_len % buf_element_size) { + printf("Program length %u is not multiple of %u\n", + xlated_prog_len, buf_element_size); + return -1; + } + + *cnt = xlated_prog_len / buf_element_size; + *buf = calloc(*cnt, buf_element_size); + if (!buf) { + perror("can't allocate xlated program buffer"); + return -ENOMEM; + } + + bzero(&info, sizeof(info)); + info.xlated_prog_len = xlated_prog_len; + info.xlated_prog_insns = (__u64)(unsigned long)*buf; + if (bpf_prog_get_info_by_fd(fd_prog, &info, &info_len)) { + perror("second bpf_prog_get_info_by_fd failed"); + goto out_free_buf; + } + + return 0; + +out_free_buf: + free(*buf); + *buf = NULL; + return -1; +} + +bool is_jit_enabled(void) +{ + const char *jit_sysctl = "/proc/sys/net/core/bpf_jit_enable"; + bool enabled = false; + int sysctl_fd; + + sysctl_fd = open(jit_sysctl, O_RDONLY); + if (sysctl_fd != -1) { + char tmpc; + + if (read(sysctl_fd, &tmpc, sizeof(tmpc)) == 1) + enabled = (tmpc != '0'); + close(sysctl_fd); + } + + return enabled; +} diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h index 35284faff4f2..d14de81727e6 100644 --- a/tools/testing/selftests/bpf/testing_helpers.h +++ b/tools/testing/selftests/bpf/testing_helpers.h @@ -46,4 +46,12 @@ static inline __u64 get_time_ns(void) return (u64)t.tv_sec * 1000000000 + t.tv_nsec; } +struct bpf_insn; +/* Request BPF program instructions after all rewrites are applied, + * e.g. verifier.c:convert_ctx_access() is done. + */ +int get_xlated_program(int fd_prog, struct bpf_insn **buf, __u32 *cnt); +int testing_prog_flags(void); +bool is_jit_enabled(void); + #endif /* __TESTING_HELPERS_H */ diff --git a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c index a535d41dc20d..59125b22ae39 100644 --- a/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c +++ b/tools/testing/selftests/bpf/verifier/bpf_loop_inline.c @@ -57,6 +57,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { { 0, MAIN_TYPE }, { 12, CALLBACK_TYPE } }, @@ -90,6 +91,7 @@ .expected_insns = { HELPER_CALL_INSN() }, .unexpected_insns = { PSEUDO_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, @@ -127,6 +129,7 @@ .expected_insns = { HELPER_CALL_INSN() }, .unexpected_insns = { PSEUDO_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { @@ -165,6 +168,7 @@ .expected_insns = { PSEUDO_CALL_INSN() }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .runs = 0, .func_info = { @@ -235,6 +239,7 @@ }, .unexpected_insns = { HELPER_CALL_INSN() }, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .result = ACCEPT, .func_info = { { 0, MAIN_TYPE }, @@ -252,6 +257,7 @@ .unexpected_insns = { HELPER_CALL_INSN() }, .result = ACCEPT, .prog_type = BPF_PROG_TYPE_TRACEPOINT, + .flags = F_NEEDS_JIT_ENABLED, .func_info = { { 0, MAIN_TYPE }, { 16, CALLBACK_TYPE } }, .func_info_cnt = 2, BTF_TYPES diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c index 8a2ff81d8350..0a9293a57211 100644 --- a/tools/testing/selftests/bpf/verifier/precise.c +++ b/tools/testing/selftests/bpf/verifier/precise.c @@ -183,10 +183,10 @@ .prog_type = BPF_PROG_TYPE_XDP, .flags = BPF_F_TEST_STATE_FREQ, .errstr = "mark_precise: frame0: last_idx 7 first_idx 7\ - mark_precise: frame0: parent state regs=r4 stack=:\ + mark_precise: frame0: parent state regs=r4 stack=-8:\ mark_precise: frame0: last_idx 6 first_idx 4\ - mark_precise: frame0: regs=r4 stack= before 6: (b7) r0 = -1\ - mark_precise: frame0: regs=r4 stack= before 5: (79) r4 = *(u64 *)(r10 -8)\ + mark_precise: frame0: regs=r4 stack=-8 before 6: (b7) r0 = -1\ + mark_precise: frame0: regs=r4 stack=-8 before 5: (79) r4 = *(u64 *)(r10 -8)\ mark_precise: frame0: regs= stack=-8 before 4: (7b) *(u64 *)(r3 -8) = r0\ mark_precise: frame0: parent state regs=r0 stack=:\ mark_precise: frame0: last_idx 3 first_idx 3\ diff --git a/tools/testing/selftests/drivers/net/bonding/Makefile b/tools/testing/selftests/drivers/net/bonding/Makefile index 8a72bb7de70f..03a089165d3f 100644 --- a/tools/testing/selftests/drivers/net/bonding/Makefile +++ b/tools/testing/selftests/drivers/net/bonding/Makefile @@ -15,7 +15,10 @@ TEST_PROGS := \ TEST_FILES := \ lag_lib.sh \ bond_topo_2d1c.sh \ - bond_topo_3d1c.sh \ - net_forwarding_lib.sh + bond_topo_3d1c.sh + +TEST_INCLUDES := \ + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh index 6358df5752f9..1ec7f59db7f4 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-break-lacpdu-tx.sh @@ -20,21 +20,21 @@ # +------+ +------+ # # We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh set -e -tmp=$(mktemp -q dump.XXXXXX) cleanup() { ip link del fab-br0 >/dev/null 2>&1 || : ip link del fbond >/dev/null 2>&1 || : ip link del veth1-bond >/dev/null 2>&1 || : ip link del veth2-bond >/dev/null 2>&1 || : - modprobe -r bonding >/dev/null 2>&1 || : - rm -f -- ${tmp} } trap cleanup 0 1 2 cleanup -sleep 1 # create the bridge ip link add fab-br0 address 52:54:00:3B:7C:A6 mtu 1500 type bridge \ @@ -67,13 +67,12 @@ ip link set fab-br0 up ip link set fbond up ip addr add dev fab-br0 10.0.0.3 -tcpdump -n -i veth1-end -e ether proto 0x8809 >${tmp} 2>&1 & -sleep 15 -pkill tcpdump >/dev/null 2>&1 rc=0 -num=$(grep "packets captured" ${tmp} | awk '{print $1}') -if test "$num" -gt 0; then - echo "PASS, captured ${num}" +tc qdisc add dev veth1-end clsact +tc filter add dev veth1-end ingress protocol 0x8809 pref 1 handle 101 flower skip_hw action pass +if slowwait_for_counter 15 2 \ + tc_rule_handle_stats_get "dev veth1-end ingress" 101 ".packets" "" &> /dev/null; then + echo "PASS, captured 2" else echo "FAIL" rc=1 diff --git a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh index 862e947e17c7..8293dbc7c18f 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-eth-type-change.sh @@ -11,7 +11,7 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh bond_check_flags() { diff --git a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh index 89af402fabbe..78d3e0fe6604 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond-lladdr-target.sh @@ -17,6 +17,11 @@ # +----------------+ # # We use veths instead of physical interfaces +REQUIRE_MZ=no +NUM_NETIFS=0 +lib_dir=$(dirname "$0") +source "$lib_dir"/../../../net/forwarding/lib.sh + sw="sw-$(mktemp -u XXXXXX)" host="ns-$(mktemp -u XXXXXX)" @@ -26,6 +31,16 @@ cleanup() ip netns del $host } +wait_lladdr_dad() +{ + $@ | grep fe80 | grep -qv tentative +} + +wait_bond_up() +{ + $@ | grep -q 'state UP' +} + trap cleanup 0 1 2 ip netns add $sw @@ -37,8 +52,8 @@ ip -n $host link add veth1 type veth peer name veth1 netns $sw ip -n $sw link add br0 type bridge ip -n $sw link set br0 up sw_lladdr=$(ip -n $sw addr show br0 | awk '/fe80/{print $2}' | cut -d'/' -f1) -# sleep some time to make sure bridge lladdr pass DAD -sleep 2 +# wait some time to make sure bridge lladdr pass DAD +slowwait 2 wait_lladdr_dad ip -n $sw addr show br0 ip -n $host link add bond0 type bond mode 1 ns_ip6_target ${sw_lladdr} \ arp_validate 3 arp_interval 1000 @@ -53,7 +68,7 @@ ip -n $sw link set veth1 master br0 ip -n $sw link set veth0 up ip -n $sw link set veth1 up -sleep 5 +slowwait 5 wait_bond_up ip -n $host link show bond0 rc=0 if ip -n $host link show bond0 | grep -q LOWER_UP; then diff --git a/tools/testing/selftests/drivers/net/bonding/bond_options.sh b/tools/testing/selftests/drivers/net/bonding/bond_options.sh index d508486cc0bd..6fd0cff3e1e9 100755 --- a/tools/testing/selftests/drivers/net/bonding/bond_options.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_options.sh @@ -45,15 +45,23 @@ skip_ns() } active_slave="" +active_slave_changed() +{ + local old_active_slave=$1 + local new_active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" \ + ".[].linkinfo.info_data.active_slave") + test "$old_active_slave" != "$new_active_slave" +} + check_active_slave() { local target_active_slave=$1 + slowwait 2 active_slave_changed $active_slave active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") test "$active_slave" = "$target_active_slave" check_err $? "Current active slave is $active_slave but not $target_active_slave" } - # Test bonding prio option prio_test() { @@ -84,13 +92,13 @@ prio_test() # active slave should be the higher prio slave ip -n ${s_ns} link set $active_slave down - bond_check_connection "fail over" check_active_slave eth2 + bond_check_connection "fail over" # when only 1 slave is up ip -n ${s_ns} link set $active_slave down - bond_check_connection "only 1 slave up" check_active_slave eth0 + bond_check_connection "only 1 slave up" # when a higher prio slave change to up ip -n ${s_ns} link set eth2 up @@ -140,8 +148,8 @@ prio_test() check_active_slave "eth1" ip -n ${s_ns} link set $active_slave down - bond_check_connection "change slave prio" check_active_slave "eth0" + bond_check_connection "change slave prio" fi } @@ -199,6 +207,15 @@ prio() prio_ns "active-backup" } +wait_mii_up() +{ + for i in $(seq 0 2); do + mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") + [ ${mii_status} != "UP" ] && return 1 + done + return 0 +} + arp_validate_test() { local param="$1" @@ -211,7 +228,7 @@ arp_validate_test() [ $RET -ne 0 ] && log_test "arp_validate" "$retmsg" # wait for a while to make sure the mii status stable - sleep 5 + slowwait 5 wait_mii_up for i in $(seq 0 2); do mii_status=$(cmd_jq "ip -n ${s_ns} -j -d link show eth$i" ".[].linkinfo.info_slave_data.mii_status") if [ ${mii_status} != "UP" ]; then @@ -276,10 +293,13 @@ garp_test() active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") ip -n ${s_ns} link set ${active_slave} down - exp_num=$(echo "${param}" | cut -f6 -d ' ') - sleep $((exp_num + 2)) + # wait for active link change + slowwait 2 active_slave_changed $active_slave + exp_num=$(echo "${param}" | cut -f6 -d ' ') active_slave=$(cmd_jq "ip -n ${s_ns} -d -j link show bond0" ".[].linkinfo.info_data.active_slave") + slowwait_for_counter $((exp_num + 5)) $exp_num \ + tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}" # check result real_num=$(tc_rule_handle_stats_get "dev s${active_slave#eth} ingress" 101 ".packets" "-n ${g_ns}") @@ -296,8 +316,8 @@ garp_test() num_grat_arp() { local val - for val in 10 20 30 50; do - garp_test "mode active-backup miimon 100 num_grat_arp $val peer_notify_delay 1000" + for val in 10 20 30; do + garp_test "mode active-backup miimon 10 num_grat_arp $val peer_notify_delay 100" log_test "num_grat_arp" "active-backup miimon num_grat_arp $val" done } diff --git a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh index a509ef949dcf..195ef83cfbf1 100644 --- a/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh +++ b/tools/testing/selftests/drivers/net/bonding/bond_topo_2d1c.sh @@ -28,7 +28,7 @@ REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source ${lib_dir}/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh s_ns="s-$(mktemp -u XXXXXX)" c_ns="c-$(mktemp -u XXXXXX)" @@ -73,7 +73,6 @@ server_create() ip -n ${s_ns} link set bond0 up ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 } # Reset bond with new mode and options @@ -96,7 +95,8 @@ bond_reset() ip -n ${s_ns} link set bond0 up ip -n ${s_ns} addr add ${s_ip4}/24 dev bond0 ip -n ${s_ns} addr add ${s_ip6}/24 dev bond0 - sleep 2 + # Wait for IPv6 address ready as it needs DAD + slowwait 2 ip netns exec ${s_ns} ping6 ${c_ip6} -c 1 -W 0.1 &> /dev/null } server_destroy() @@ -150,7 +150,7 @@ bond_check_connection() { local msg=${1:-"check connection"} - sleep 2 + slowwait 2 ip netns exec ${s_ns} ping ${c_ip4} -c 1 -W 0.1 &> /dev/null ip netns exec ${s_ns} ping ${c_ip4} -c5 -i 0.1 &>/dev/null check_err $? "${msg}: ping failed" ip netns exec ${s_ns} ping6 ${c_ip6} -c5 -i 0.1 &>/dev/null diff --git a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh index 5cfe7d8ebc25..e6fa24eded5b 100755 --- a/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh +++ b/tools/testing/selftests/drivers/net/bonding/dev_addr_lists.sh @@ -14,7 +14,7 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh diff --git a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh index dbdd736a41d3..bf9bcd1b5ec0 100644 --- a/tools/testing/selftests/drivers/net/bonding/lag_lib.sh +++ b/tools/testing/selftests/drivers/net/bonding/lag_lib.sh @@ -107,13 +107,12 @@ lag_setup2x2() NAMESPACES="${namespaces}" } -# cleanup all lag related namespaces and remove the bonding module +# cleanup all lag related namespaces lag_cleanup() { for n in ${NAMESPACES}; do ip netns delete ${n} >/dev/null 2>&1 || true done - modprobe -r bonding } SWITCH="lag_node1" @@ -159,7 +158,7 @@ test_bond_recovery() create_bond $@ # verify connectivity - ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null check_err $? "No connectivity" # force the links of the bond down @@ -169,7 +168,7 @@ test_bond_recovery() ip netns exec ${SWITCH} ip link set eth1 down # re-verify connectivity - ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 >/dev/null 2>&1 + slowwait 2 ip netns exec ${CLIENT} ping ${SWITCHIP} -c 2 -W 0.1 &> /dev/null local rc=$? check_err $rc "Bond failed to recover" diff --git a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh index b76bf5030952..9d26ab4cad0b 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-1-recovery-updelay.sh @@ -23,7 +23,7 @@ REQUIRE_MZ=no REQUIRE_JQ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh cleanup() diff --git a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh index 8c2619002147..2d275b3e47dd 100755 --- a/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh +++ b/tools/testing/selftests/drivers/net/bonding/mode-2-recovery-updelay.sh @@ -23,7 +23,7 @@ REQUIRE_MZ=no REQUIRE_JQ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh source "$lib_dir"/lag_lib.sh cleanup() diff --git a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/bonding/net_forwarding_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/Makefile b/tools/testing/selftests/drivers/net/dsa/Makefile index c393e7b73805..cd6817fe5be6 100644 --- a/tools/testing/selftests/drivers/net/dsa/Makefile +++ b/tools/testing/selftests/drivers/net/dsa/Makefile @@ -11,8 +11,22 @@ TEST_PROGS = bridge_locked_port.sh \ tc_actions.sh \ test_bridge_fdb_stress.sh -TEST_PROGS_EXTENDED := lib.sh tc_common.sh +TEST_FILES := \ + run_net_forwarding_test.sh \ + forwarding.config -TEST_FILES := forwarding.config +TEST_INCLUDES := \ + ../../../net/forwarding/bridge_locked_port.sh \ + ../../../net/forwarding/bridge_mdb.sh \ + ../../../net/forwarding/bridge_mld.sh \ + ../../../net/forwarding/bridge_vlan_aware.sh \ + ../../../net/forwarding/bridge_vlan_mcast.sh \ + ../../../net/forwarding/bridge_vlan_unaware.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/forwarding/local_termination.sh \ + ../../../net/forwarding/no_forwarding.sh \ + ../../../net/forwarding/tc_actions.sh \ + ../../../net/forwarding/tc_common.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh index f5eb940c4c7c..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_locked_port.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_locked_port.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh index 76492da525f7..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mdb.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_mdb.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh index 81a7e0df0474..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_mld.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_mld.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh index 9831ed74376a..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_aware.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_aware.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh index 7f3c3f0bf719..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_mcast.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_mcast.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh index bf1a57e6bde1..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh +++ b/tools/testing/selftests/drivers/net/dsa/bridge_vlan_unaware.sh @@ -1 +1 @@ -../../../net/forwarding/bridge_vlan_unaware.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/lib.sh b/tools/testing/selftests/drivers/net/dsa/lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/dsa/lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/local_termination.sh b/tools/testing/selftests/drivers/net/dsa/local_termination.sh index c08166f84501..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/local_termination.sh +++ b/tools/testing/selftests/drivers/net/dsa/local_termination.sh @@ -1 +1 @@ -../../../net/forwarding/local_termination.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh index b9757466bc97..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh +++ b/tools/testing/selftests/drivers/net/dsa/no_forwarding.sh @@ -1 +1 @@ -../../../net/forwarding/no_forwarding.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh new file mode 100755 index 000000000000..4106c0a102ea --- /dev/null +++ b/tools/testing/selftests/drivers/net/dsa/run_net_forwarding_test.sh @@ -0,0 +1,9 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0 + +libdir=$(dirname "$(readlink -f "${BASH_SOURCE[0]}")") +testname=$(basename "${BASH_SOURCE[0]}") + +source "$libdir"/forwarding.config +cd "$libdir"/../../../net/forwarding/ || exit 1 +source "./$testname" "$@" diff --git a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh index 306213d9430e..d16a65e7595d 120000 --- a/tools/testing/selftests/drivers/net/dsa/tc_actions.sh +++ b/tools/testing/selftests/drivers/net/dsa/tc_actions.sh @@ -1 +1 @@ -../../../net/forwarding/tc_actions.sh
\ No newline at end of file +run_net_forwarding_test.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/tc_common.sh b/tools/testing/selftests/drivers/net/dsa/tc_common.sh deleted file mode 120000 index bc3465bdc36b..000000000000 --- a/tools/testing/selftests/drivers/net/dsa/tc_common.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/tc_common.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh index 92acab83fbe2..74682151d04d 100755 --- a/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh +++ b/tools/testing/selftests/drivers/net/dsa/test_bridge_fdb_stress.sh @@ -19,7 +19,7 @@ REQUIRE_JQ="no" REQUIRE_MZ="no" NETIF_CREATE="no" lib_dir=$(dirname "$0") -source "$lib_dir"/lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh cleanup() { echo "Cleaning up" diff --git a/tools/testing/selftests/drivers/net/netdevsim/Makefile b/tools/testing/selftests/drivers/net/netdevsim/Makefile new file mode 100644 index 000000000000..7a29a05bea8b --- /dev/null +++ b/tools/testing/selftests/drivers/net/netdevsim/Makefile @@ -0,0 +1,17 @@ +# SPDX-License-Identifier: GPL-2.0+ OR MIT + +TEST_PROGS = devlink.sh \ + devlink_in_netns.sh \ + devlink_trap.sh \ + ethtool-coalesce.sh \ + ethtool-fec.sh \ + ethtool-pause.sh \ + ethtool-ring.sh \ + fib.sh \ + hw_stats_l3.sh \ + nexthop.sh \ + psample.sh \ + tc-mq-visibility.sh \ + udp_tunnel_nic.sh \ + +include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh index f98435c502f6..384cfa3d38a6 100755 --- a/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh +++ b/tools/testing/selftests/drivers/net/netdevsim/udp_tunnel_nic.sh @@ -270,7 +270,7 @@ for port in 0 1; do echo 1 > $NSIM_DEV_SYS/new_port fi NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="new NIC device created" exp0=( 0 0 0 0 ) @@ -284,8 +284,8 @@ for port in 0 1; do msg="VxLAN v4 devices go down" exp0=( 0 0 0 0 ) - ifconfig vxlan1 down - ifconfig vxlan0 down + ip link set dev vxlan1 down + ip link set dev vxlan0 down check_tables msg="VxLAN v6 devices" @@ -293,7 +293,7 @@ for port in 0 1; do new_vxlan vxlanA 4789 $NSIM_NETDEV 6 for ifc in vxlan0 vxlan1; do - ifconfig $ifc up + ip link set dev $ifc up done new_vxlan vxlanB 4789 $NSIM_NETDEV 6 @@ -307,14 +307,14 @@ for port in 0 1; do new_geneve gnv0 6081 msg="NIC device goes down" - ifconfig $NSIM_NETDEV down + ip link set dev $NSIM_NETDEV down if [ $port -eq 1 ]; then exp0=( 0 0 0 0 ) exp1=( 0 0 0 0 ) fi check_tables msg="NIC device goes up again" - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up exp0=( `mke 4789 1` `mke 4790 1` 0 0 ) exp1=( `mke 6081 2` 0 0 0 ) check_tables @@ -433,7 +433,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "overflow NIC table" overflow_table1 "overflow NIC table" @@ -491,7 +491,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "overflow NIC table" overflow_table1 "overflow NIC table" @@ -548,7 +548,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up overflow_table0 "destroy NIC" overflow_table1 "destroy NIC" @@ -578,7 +578,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" new_vxlan vxlanA0 10000 $NSIM_NETDEV 6 @@ -639,7 +639,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up echo 110 > $NSIM_DEV_DFS/ports/$port/udp_ports_inject_error @@ -695,7 +695,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" exp0=( `mke 10000 1` 0 0 0 ) @@ -755,7 +755,7 @@ for port in 0 1; do echo $port > $NSIM_DEV_SYS/new_port NSIM_NETDEV=`get_netdev_name old_netdevs` - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up msg="create VxLANs v6" exp0=( `mke 10000 1` 0 0 0 ) @@ -768,7 +768,7 @@ for port in 0 1; do check_tables msg="NIC device goes down" - ifconfig $NSIM_NETDEV down + ip link set dev $NSIM_NETDEV down if [ $port -eq 1 ]; then exp0=( 0 0 0 0 ) exp1=( 0 0 0 0 ) @@ -779,7 +779,7 @@ for port in 0 1; do check_tables msg="NIC device goes up again" - ifconfig $NSIM_NETDEV up + ip link set dev $NSIM_NETDEV up exp0=( `mke 10000 1` 0 0 0 ) check_tables @@ -827,12 +827,12 @@ new_vxlan vxlan1 4789 $NSIM_NETDEV2 msg="VxLAN v4 devices go down" exp0=( 0 0 0 0 ) -ifconfig vxlan1 down -ifconfig vxlan0 down +ip link set dev vxlan1 down +ip link set dev vxlan0 down check_tables for ifc in vxlan0 vxlan1; do - ifconfig $ifc up + ip link set dev $ifc up done msg="VxLAN v6 device" @@ -844,11 +844,11 @@ exp1=( `mke 6081 2` 0 0 0 ) new_geneve gnv0 6081 msg="NIC device goes down" -ifconfig $NSIM_NETDEV down +ip link set dev $NSIM_NETDEV down check_tables msg="NIC device goes up again" -ifconfig $NSIM_NETDEV up +ip link set dev $NSIM_NETDEV up check_tables for i in `seq 2`; do diff --git a/tools/testing/selftests/drivers/net/team/Makefile b/tools/testing/selftests/drivers/net/team/Makefile index 6a86e61e8bfe..2d5a76d99181 100644 --- a/tools/testing/selftests/drivers/net/team/Makefile +++ b/tools/testing/selftests/drivers/net/team/Makefile @@ -3,8 +3,9 @@ TEST_PROGS := dev_addr_lists.sh -TEST_FILES := \ - lag_lib.sh \ - net_forwarding_lib.sh +TEST_INCLUDES := \ + ../bonding/lag_lib.sh \ + ../../../net/forwarding/lib.sh \ + ../../../net/lib.sh include ../../../lib.mk diff --git a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh index 33913112d5ca..b1ec7755b783 100755 --- a/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh +++ b/tools/testing/selftests/drivers/net/team/dev_addr_lists.sh @@ -11,9 +11,9 @@ ALL_TESTS=" REQUIRE_MZ=no NUM_NETIFS=0 lib_dir=$(dirname "$0") -source "$lib_dir"/net_forwarding_lib.sh +source "$lib_dir"/../../../net/forwarding/lib.sh -source "$lib_dir"/lag_lib.sh +source "$lib_dir"/../bonding/lag_lib.sh destroy() diff --git a/tools/testing/selftests/drivers/net/team/lag_lib.sh b/tools/testing/selftests/drivers/net/team/lag_lib.sh deleted file mode 120000 index e1347a10afde..000000000000 --- a/tools/testing/selftests/drivers/net/team/lag_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../bonding/lag_lib.sh
\ No newline at end of file diff --git a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh b/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh deleted file mode 120000 index 39c96828c5ef..000000000000 --- a/tools/testing/selftests/drivers/net/team/net_forwarding_lib.sh +++ /dev/null @@ -1 +0,0 @@ -../../../net/forwarding/lib.sh
\ No newline at end of file diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk index aa646e0661f3..087fee22dd53 100644 --- a/tools/testing/selftests/lib.mk +++ b/tools/testing/selftests/lib.mk @@ -69,11 +69,29 @@ define RUN_TESTS run_many $(1) endef +define INSTALL_INCLUDES + $(if $(TEST_INCLUDES), \ + relative_files=""; \ + for entry in $(TEST_INCLUDES); do \ + entry_dir=$$(readlink -e "$$(dirname "$$entry")"); \ + entry_name=$$(basename "$$entry"); \ + relative_dir=$${entry_dir#"$$SRC_PATH"/}; \ + if [ "$$relative_dir" = "$$entry_dir" ]; then \ + echo "Error: TEST_INCLUDES entry \"$$entry\" not located inside selftests directory ($$SRC_PATH)" >&2; \ + exit 1; \ + fi; \ + relative_files="$$relative_files $$relative_dir/$$entry_name"; \ + done; \ + cd $(SRC_PATH) && rsync -aR $$relative_files $(OBJ_PATH)/ \ + ) +endef + run_tests: all ifdef building_out_of_srctree @if [ "X$(TEST_PROGS)$(TEST_PROGS_EXTENDED)$(TEST_FILES)" != "X" ]; then \ rsync -aq --copy-unsafe-links $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) $(OUTPUT); \ fi + @$(INSTALL_INCLUDES) @if [ "X$(TEST_PROGS)" != "X" ]; then \ $(call RUN_TESTS, $(TEST_GEN_PROGS) $(TEST_CUSTOM_PROGS) \ $(addprefix $(OUTPUT)/,$(TEST_PROGS))) ; \ @@ -103,6 +121,7 @@ endef install: all ifdef INSTALL_PATH $(INSTALL_RULE) + $(INSTALL_INCLUDES) else $(error Error: set INSTALL_PATH to use install) endif diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 211753756bde..7b6918d5f4af 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -97,6 +97,8 @@ TEST_PROGS += vlan_hw_filter.sh TEST_FILES := settings TEST_FILES += in_netns.sh lib.sh net_helper.sh setup_loopback.sh setup_veth.sh +TEST_INCLUDES := forwarding/lib.sh + include ../lib.mk $(OUTPUT)/reuseport_bpf_numa: LDLIBS += -lnuma diff --git a/tools/testing/selftests/net/fcnal-test.sh b/tools/testing/selftests/net/fcnal-test.sh index 0d4f252427e2..386ebd829df5 100755 --- a/tools/testing/selftests/net/fcnal-test.sh +++ b/tools/testing/selftests/net/fcnal-test.sh @@ -38,6 +38,9 @@ # server / client nomenclature relative to ns-A source lib.sh + +PATH=$PWD:$PWD/tools/testing/selftests/net:$PATH + VERBOSE=0 NSA_DEV=eth1 @@ -97,6 +100,7 @@ log_test() local rc=$1 local expected=$2 local msg="$3" + local ans [ "${VERBOSE}" = "1" ] && echo @@ -106,19 +110,20 @@ log_test() else nfail=$((nfail+1)) printf "TEST: %-70s [FAIL]\n" "${msg}" + echo " expected rc $expected; actual rc $rc" if [ "${PAUSE_ON_FAIL}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi fi if [ "${PAUSE}" = "yes" ]; then echo echo "hit enter to continue, 'q' to quit" - read a - [ "$a" = "q" ] && exit 1 + read ans + [ "$ans" = "q" ] && exit 1 fi kill_procs @@ -187,6 +192,15 @@ kill_procs() sleep 1 } +set_ping_group() +{ + if [ "$VERBOSE" = "1" ]; then + echo "COMMAND: ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647'" + fi + + ${NSA_CMD} sysctl -q -w net.ipv4.ping_group_range='0 2147483647' +} + do_run_cmd() { local cmd="$*" @@ -835,14 +849,14 @@ ipv4_ping() set_sysctl net.ipv4.raw_l3mdev_accept=1 2>/dev/null ipv4_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_novrf log_subsection "With VRF" setup "yes" ipv4_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_ping_vrf } @@ -2053,12 +2067,12 @@ ipv4_addr_bind() log_subsection "No VRF" setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_novrf log_subsection "With VRF" setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv4_addr_bind_vrf } @@ -2521,14 +2535,14 @@ ipv6_ping() setup ipv6_ping_novrf setup - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_novrf log_subsection "With VRF" setup "yes" ipv6_ping_vrf setup "yes" - set_sysctl net.ipv4.ping_group_range='0 2147483647' 2>/dev/null + set_ping_group ipv6_ping_vrf } diff --git a/tools/testing/selftests/net/fib_tests.sh b/tools/testing/selftests/net/fib_tests.sh index b3ecccbbfcd2..3ec1050e47a2 100755 --- a/tools/testing/selftests/net/fib_tests.sh +++ b/tools/testing/selftests/net/fib_tests.sh @@ -743,6 +743,43 @@ fib_notify_test() cleanup &> /dev/null } +# Create a new dummy_10 to remove all associated routes. +reset_dummy_10() +{ + $IP link del dev dummy_10 + + $IP link add dummy_10 type dummy + $IP link set dev dummy_10 up + $IP -6 address add 2001:10::1/64 dev dummy_10 +} + +check_rt_num() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + echo "FAIL: Expected $expected routes, got $num" + ret=1 + else + ret=0 + fi +} + +check_rt_num_clean() +{ + local expected=$1 + local num=$2 + + if [ $num -ne $expected ]; then + log_test 1 0 "expected $expected routes, got $num" + set +e + cleanup &> /dev/null + return 1 + fi + return 0 +} + fib6_gc_test() { setup @@ -751,7 +788,7 @@ fib6_gc_test() echo "Fib6 garbage collection test" set -e - EXPIRE=3 + EXPIRE=5 # Check expiration of routes every $EXPIRE seconds (GC) $NS_EXEC sysctl -wq net.ipv6.route.gc_interval=$EXPIRE @@ -763,44 +800,114 @@ fib6_gc_test() $NS_EXEC sysctl -wq net.ipv6.route.flush=1 # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires, got $N_EXP_SLEEP" - ret=1 - else - ret=0 - fi + sleep $(($EXPIRE * 2 + 1)) + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection" + + reset_dummy_10 # Permanent routes - for i in $(seq 1 5000); do + for i in $(seq 1 5); do $IP -6 route add 2001:30::$i \ via 2001:10::2 dev dummy_10 done # Temporary routes - for i in $(seq 1 1000); do + for i in $(seq 1 5); do # Expire route after $EXPIRE seconds $IP -6 route add 2001:20::$i \ via 2001:10::2 dev dummy_10 expires $EXPIRE done - sleep $(($EXPIRE * 2)) - N_EXP_SLEEP=$($IP -6 route list |grep expires|wc -l) - if [ $N_EXP_SLEEP -ne 0 ]; then - echo "FAIL: expected 0 routes with expires," \ - "got $N_EXP_SLEEP (5000 permanent routes)" - ret=1 - else - ret=0 + sleep $(($EXPIRE * 2 + 1)) + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (with permanent routes)" + + reset_dummy_10 + + # Permanent routes + for i in $(seq 1 5); do + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + # Replace with temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + check_rt_num_clean 5 $($IP -6 route list |grep expires|wc -l) || return + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with expires)" + + reset_dummy_10 + + # Temporary routes + for i in $(seq 1 5); do + # Expire route after $EXPIRE seconds + $IP -6 route add 2001:20::$i \ + via 2001:10::2 dev dummy_10 expires $EXPIRE + done + # Replace with permanent routes + for i in $(seq 1 5); do + $IP -6 route replace 2001:20::$i \ + via 2001:10::2 dev dummy_10 + done + check_rt_num_clean 0 $($IP -6 route list |grep expires|wc -l) || return + + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + + check_rt_num 5 $($IP -6 route list |grep -v expires|grep 2001:20::|wc -l) + log_test $ret 0 "ipv6 route garbage collection (replace with permanent)" + + # ra6 is required for the next test. (ipv6toolkit) + if [ ! -x "$(command -v ra6)" ]; then + echo "SKIP: ra6 not found." + set +e + cleanup &> /dev/null + return fi - set +e + # Delete dummy_10 and remove all routes + $IP link del dev dummy_10 - log_test $ret 0 "ipv6 route garbage collection" + # Create a pair of veth devices to send a RA message from one + # device to another. + $IP link add veth1 type veth peer name veth2 + $IP link set dev veth1 up + $IP link set dev veth2 up + $IP -6 address add 2001:10::1/64 dev veth1 nodad + $IP -6 address add 2001:10::2/64 dev veth2 nodad + + # Make veth1 ready to receive RA messages. + $NS_EXEC sysctl -wq net.ipv6.conf.veth1.accept_ra=2 + + # Send a RA message with a route from veth2 to veth1. + $NS_EXEC ra6 -i veth2 -d 2001:10::1 -t $EXPIRE + + # Wait for the RA message. + sleep 1 + + # systemd may mess up the test. You syould make sure that + # systemd-networkd.service and systemd-networkd.socket are stopped. + check_rt_num_clean 1 $($IP -6 route list|grep expires|wc -l) || return + + # Wait for GC + sleep $(($EXPIRE * 2 + 1)) + + $NS_EXEC sysctl -wq net.ipv6.route.flush=1 + check_rt_num 0 $($IP -6 route list |grep expires|wc -l) + log_test $ret 0 "ipv6 route garbage collection (RA message)" + + set +e cleanup &> /dev/null } diff --git a/tools/testing/selftests/net/forwarding/Makefile b/tools/testing/selftests/net/forwarding/Makefile index 4de92632f483..cdefc9a5ec34 100644 --- a/tools/testing/selftests/net/forwarding/Makefile +++ b/tools/testing/selftests/net/forwarding/Makefile @@ -129,4 +129,7 @@ TEST_FILES := devlink_lib.sh \ sch_tbf_etsprio.sh \ tc_common.sh +TEST_INCLUDES := \ + ../lib.sh + include ../../lib.mk diff --git a/tools/testing/selftests/net/forwarding/config b/tools/testing/selftests/net/forwarding/config index 697994a9278b..8d7a1a004b7c 100644 --- a/tools/testing/selftests/net/forwarding/config +++ b/tools/testing/selftests/net/forwarding/config @@ -6,14 +6,49 @@ CONFIG_IPV6_MULTIPLE_TABLES=y CONFIG_NET_VRF=m CONFIG_BPF_SYSCALL=y CONFIG_CGROUP_BPF=y +CONFIG_DUMMY=m +CONFIG_IPV6=y +CONFIG_IPV6_GRE=m +CONFIG_IPV6_MROUTE=y +CONFIG_IPV6_MROUTE_MULTIPLE_TABLES=y +CONFIG_IPV6_PIMSM_V2=y +CONFIG_IP_MROUTE=y +CONFIG_IP_MROUTE_MULTIPLE_TABLES=y +CONFIG_IP_PIMSM_V1=y +CONFIG_IP_PIMSM_V2=y +CONFIG_MACVLAN=m CONFIG_NET_ACT_CT=m CONFIG_NET_ACT_MIRRED=m CONFIG_NET_ACT_MPLS=m +CONFIG_NET_ACT_PEDIT=m +CONFIG_NET_ACT_POLICE=m +CONFIG_NET_ACT_SAMPLE=m +CONFIG_NET_ACT_SKBEDIT=m +CONFIG_NET_ACT_TUNNEL_KEY=m CONFIG_NET_ACT_VLAN=m CONFIG_NET_CLS_FLOWER=m CONFIG_NET_CLS_MATCHALL=m +CONFIG_NET_CLS_BASIC=m +CONFIG_NET_EMATCH=y +CONFIG_NET_EMATCH_META=m +CONFIG_NET_IPGRE=m +CONFIG_NET_IPGRE_DEMUX=m +CONFIG_NET_IPIP=m +CONFIG_NET_SCH_ETS=m CONFIG_NET_SCH_INGRESS=m CONFIG_NET_ACT_GACT=m +CONFIG_NET_SCH_PRIO=m +CONFIG_NET_SCH_RED=m +CONFIG_NET_SCH_TBF=m +CONFIG_NET_TC_SKB_EXT=y +CONFIG_NET_TEAM=y +CONFIG_NET_TEAM_MODE_LOADBALANCE=y +CONFIG_NETFILTER=y +CONFIG_NF_CONNTRACK=m +CONFIG_NF_FLOW_TABLE=m +CONFIG_NF_TABLES=m CONFIG_VETH=m CONFIG_NAMESPACES=y CONFIG_NET_NS=y +CONFIG_VXLAN=m +CONFIG_XFRM_USER=m diff --git a/tools/testing/selftests/net/forwarding/lib.sh b/tools/testing/selftests/net/forwarding/lib.sh index 8a61464ab6eb..db3688f52888 100644 --- a/tools/testing/selftests/net/forwarding/lib.sh +++ b/tools/testing/selftests/net/forwarding/lib.sh @@ -29,23 +29,20 @@ STABLE_MAC_ADDRS=${STABLE_MAC_ADDRS:=no} TCPDUMP_EXTRA_FLAGS=${TCPDUMP_EXTRA_FLAGS:=} TROUTE6=${TROUTE6:=traceroute6} -relative_path="${BASH_SOURCE%/*}" -if [[ "$relative_path" == "${BASH_SOURCE}" ]]; then - relative_path="." -fi +net_forwarding_dir=$(dirname "$(readlink -e "${BASH_SOURCE[0]}")") -if [[ -f $relative_path/forwarding.config ]]; then - source "$relative_path/forwarding.config" +if [[ -f $net_forwarding_dir/forwarding.config ]]; then + source "$net_forwarding_dir/forwarding.config" fi -# Kselftest framework requirement - SKIP code is 4. -ksft_skip=4 +source "$net_forwarding_dir/../lib.sh" -busywait() +# timeout in seconds +slowwait() { local timeout=$1; shift - local start_time="$(date -u +%s%3N)" + local start_time="$(date -u +%s)" while true do local out @@ -56,11 +53,13 @@ busywait() return 0 fi - local current_time="$(date -u +%s%3N)" + local current_time="$(date -u +%s)" if ((current_time - start_time > timeout)); then echo -n "$out" return 1 fi + + sleep 0.1 done } @@ -505,6 +504,15 @@ busywait_for_counter() busywait "$timeout" until_counter_is ">= $((base + delta))" "$@" } +slowwait_for_counter() +{ + local timeout=$1; shift + local delta=$1; shift + + local base=$("$@") + slowwait "$timeout" until_counter_is ">= $((base + delta))" "$@" +} + setup_wait_dev() { local dev=$1; shift diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh index fac486178ef7..0c36546e131e 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_lib.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0 -source "$relative_path/mirror_lib.sh" +source "$net_forwarding_dir/mirror_lib.sh" quick_test_span_gre_dir_ips() { diff --git a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh index 39c03e2867f4..6e615fffa4ef 100644 --- a/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh +++ b/tools/testing/selftests/net/forwarding/mirror_gre_topo_lib.sh @@ -33,7 +33,7 @@ # | | # +-------------------------------------------------------------------------+ -source "$relative_path/mirror_topo_lib.sh" +source "$net_forwarding_dir/mirror_topo_lib.sh" mirror_gre_topo_h3_create() { diff --git a/tools/testing/selftests/net/fq_band_pktlimit.sh b/tools/testing/selftests/net/fq_band_pktlimit.sh index 24b77bdf41ff..977070ed42b3 100755 --- a/tools/testing/selftests/net/fq_band_pktlimit.sh +++ b/tools/testing/selftests/net/fq_band_pktlimit.sh @@ -8,7 +8,7 @@ # 3. send 20 pkts on band A: verify that 0 are queued, 20 dropped # 4. send 20 pkts on band B: verify that 10 are queued, 10 dropped # -# Send packets with a 100ms delay to ensure that previously sent +# Send packets with a delay to ensure that previously sent # packets are still queued when later ones are sent. # Use SO_TXTIME for this. @@ -29,19 +29,21 @@ ip -6 addr add fdaa::1/128 dev dummy0 ip -6 route add fdaa::/64 dev dummy0 tc qdisc replace dev dummy0 root handle 1: fq quantum 1514 initial_quantum 1514 limit 10 -./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +DELAY=400000 + +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 OUT1="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" -./cmsg_sender -6 -p u -d 100000 -n 20 fdaa::2 8000 +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 fdaa::2 8000 OUT2="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" -./cmsg_sender -6 -p u -d 100000 -n 20 -P 7 fdaa::2 8000 +./cmsg_sender -6 -p u -d "${DELAY}" -n 20 -P 7 fdaa::2 8000 OUT3="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" # Initial stats will report zero sent, as all packets are still -# queued in FQ. Sleep for the delay period (100ms) and see that +# queued in FQ. Sleep for at least the delay period and see that # twenty are now sent. -sleep 0.1 +sleep 0.6 OUT4="$(tc -s qdisc show dev dummy0 | grep '^\ Sent')" # Log the output after the test diff --git a/tools/testing/selftests/net/openvswitch/openvswitch.sh b/tools/testing/selftests/net/openvswitch/openvswitch.sh index 36e40256ab92..5cae53543849 100755 --- a/tools/testing/selftests/net/openvswitch/openvswitch.sh +++ b/tools/testing/selftests/net/openvswitch/openvswitch.sh @@ -17,6 +17,7 @@ tests=" ct_connect_v4 ip4-ct-xon: Basic ipv4 tcp connection using ct connect_v4 ip4-xon: Basic ipv4 ping between two NS nat_connect_v4 ip4-nat-xon: Basic ipv4 tcp connection via NAT + nat_related_v4 ip4-nat-related: ICMP related matches work with SNAT netlink_checks ovsnl: validate netlink attrs and settings upcall_interfaces ovs: test the upcall interfaces drop_reason drop: test drop reasons are emitted" @@ -473,6 +474,67 @@ test_nat_connect_v4 () { return 0 } +# nat_related_v4 test +# - client->server ip packets go via SNAT +# - client solicits ICMP destination unreachable packet from server +# - undo NAT for ICMP reply and test dst ip has been updated +test_nat_related_v4 () { + which nc >/dev/null 2>/dev/null || return $ksft_skip + + sbx_add "test_nat_related_v4" || return $? + + ovs_add_dp "test_nat_related_v4" natrelated4 || return 1 + info "create namespaces" + for ns in client server; do + ovs_add_netns_and_veths "test_nat_related_v4" "natrelated4" "$ns" \ + "${ns:0:1}0" "${ns:0:1}1" || return 1 + done + + ip netns exec client ip addr add 172.31.110.10/24 dev c1 + ip netns exec client ip link set c1 up + ip netns exec server ip addr add 172.31.110.20/24 dev s1 + ip netns exec server ip link set s1 up + + ip netns exec server ip route add 192.168.0.20/32 via 172.31.110.10 + + # Allow ARP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(1),eth(),eth_type(0x0806),arp()" "2" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "in_port(2),eth(),eth_type(0x0806),arp()" "1" || return 1 + + # Allow IP traffic from client->server, rewrite source IP with SNAT to 192.168.0.20 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(1),eth(),eth_type(0x0800),ipv4(dst=172.31.110.20)" \ + "ct(commit,nat(src=192.168.0.20)),recirc(0x1)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x1),ct_state(+trk-inv),in_port(1),eth(),eth_type(0x0800),ipv4()" \ + "2" || return 1 + + # Allow related ICMP responses back from server and undo NAT to restore original IP + # Drop any ICMP related packets where dst ip hasn't been restored back to original IP + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "ct_state(-trk),in_port(2),eth(),eth_type(0x0800),ipv4()" \ + "ct(commit,nat),recirc(0x2)" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(src=172.31.110.20,dst=172.31.110.10,proto=1),icmp()" \ + "1" || return 1 + ovs_add_flow "test_nat_related_v4" natrelated4 \ + "recirc_id(0x2),ct_state(+rel+trk),in_port(2),eth(),eth_type(0x0800),ipv4(dst=192.168.0.20,proto=1),icmp()" \ + "drop" || return 1 + + # Solicit destination unreachable response from server + ovs_sbx "test_nat_related_v4" ip netns exec client \ + bash -c "echo a | nc -u -w 1 172.31.110.20 10000" + + # Check to make sure no packets matched the drop rule with incorrect dst ip + python3 "$ovs_base/ovs-dpctl.py" dump-flows natrelated4 \ + | grep "drop" | grep "packets:0" >/dev/null || return 1 + + info "done..." + return 0 +} + # netlink_validation # - Create a dp # - check no warning with "old version" simulation diff --git a/tools/testing/selftests/net/so_txtime.c b/tools/testing/selftests/net/so_txtime.c index 2672ac0b6d1f..8457b7ccbc09 100644 --- a/tools/testing/selftests/net/so_txtime.c +++ b/tools/testing/selftests/net/so_txtime.c @@ -134,8 +134,11 @@ static void do_recv_one(int fdr, struct timed_send *ts) if (rbuf[0] != ts->data) error(1, 0, "payload mismatch. expected %c", ts->data); - if (llabs(tstop - texpect) > cfg_variance_us) - error(1, 0, "exceeds variance (%d us)", cfg_variance_us); + if (llabs(tstop - texpect) > cfg_variance_us) { + fprintf(stderr, "exceeds variance (%d us)\n", cfg_variance_us); + if (!getenv("KSFT_MACHINE_SLOW")) + exit(1); + } } static void do_recv_verify_empty(int fdr) diff --git a/tools/testing/selftests/net/txtimestamp.c b/tools/testing/selftests/net/txtimestamp.c index 10f2fde3686b..ec60a16c9307 100644 --- a/tools/testing/selftests/net/txtimestamp.c +++ b/tools/testing/selftests/net/txtimestamp.c @@ -163,7 +163,8 @@ static void validate_timestamp(struct timespec *cur, int min_delay) if (cur64 < start64 + min_delay || cur64 > start64 + max_delay) { fprintf(stderr, "ERROR: %" PRId64 " us expected between %d and %d\n", cur64 - start64, min_delay, max_delay); - test_failed = true; + if (!getenv("KSFT_MACHINE_SLOW")) + test_failed = true; } } diff --git a/tools/testing/selftests/net/txtimestamp.sh b/tools/testing/selftests/net/txtimestamp.sh index 31637769f59f..25baca4b148e 100755 --- a/tools/testing/selftests/net/txtimestamp.sh +++ b/tools/testing/selftests/net/txtimestamp.sh @@ -8,13 +8,13 @@ set -e setup() { # set 1ms delay on lo egress - tc qdisc add dev lo root netem delay 1ms + tc qdisc add dev lo root netem delay 10ms # set 2ms delay on ifb0 egress modprobe ifb ip link add ifb_netem0 type ifb ip link set dev ifb_netem0 up - tc qdisc add dev ifb_netem0 root netem delay 2ms + tc qdisc add dev ifb_netem0 root netem delay 20ms # redirect lo ingress through ifb0 egress tc qdisc add dev lo handle ffff: ingress @@ -24,9 +24,11 @@ setup() { } run_test_v4v6() { - # SND will be delayed 1000us - # ACK will be delayed 6000us: 1 + 2 ms round-trip - local -r args="$@ -v 1000 -V 6000" + # SND will be delayed 10ms + # ACK will be delayed 60ms: 10 + 20 ms round-trip + # allow +/- tolerance of 8ms + # wait for ACK to be queued + local -r args="$@ -v 10000 -V 60000 -t 8000 -S 80000" ./txtimestamp ${args} -4 -L 127.0.0.1 ./txtimestamp ${args} -6 -L ::1 diff --git a/tools/testing/selftests/net/udpgso.c b/tools/testing/selftests/net/udpgso.c index 7badaf215de2..1d975bf52af3 100644 --- a/tools/testing/selftests/net/udpgso.c +++ b/tools/testing/selftests/net/udpgso.c @@ -56,7 +56,6 @@ static bool cfg_do_msgmore; static bool cfg_do_setsockopt; static int cfg_specific_test_id = -1; -static const char cfg_ifname[] = "lo"; static unsigned short cfg_port = 9000; static char buf[ETH_MAX_MTU]; @@ -69,8 +68,13 @@ struct testcase { int r_len_last; /* recv(): size of last non-mss dgram, if any */ }; -const struct in6_addr addr6 = IN6ADDR_LOOPBACK_INIT; -const struct in_addr addr4 = { .s_addr = __constant_htonl(INADDR_LOOPBACK + 2) }; +const struct in6_addr addr6 = { + { { 0xfd, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1 } }, /* fd00::1 */ +}; + +const struct in_addr addr4 = { + __constant_htonl(0x0a000001), /* 10.0.0.1 */ +}; struct testcase testcases_v4[] = { { @@ -274,48 +278,6 @@ struct testcase testcases_v6[] = { } }; -static unsigned int get_device_mtu(int fd, const char *ifname) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCGIFMTU, &ifr)) - error(1, errno, "ioctl get mtu"); - - return ifr.ifr_mtu; -} - -static void __set_device_mtu(int fd, const char *ifname, unsigned int mtu) -{ - struct ifreq ifr; - - memset(&ifr, 0, sizeof(ifr)); - - ifr.ifr_mtu = mtu; - strcpy(ifr.ifr_name, ifname); - - if (ioctl(fd, SIOCSIFMTU, &ifr)) - error(1, errno, "ioctl set mtu"); -} - -static void set_device_mtu(int fd, int mtu) -{ - int val; - - val = get_device_mtu(fd, cfg_ifname); - fprintf(stderr, "device mtu (orig): %u\n", val); - - __set_device_mtu(fd, cfg_ifname, mtu); - val = get_device_mtu(fd, cfg_ifname); - if (val != mtu) - error(1, 0, "unable to set device mtu to %u\n", val); - - fprintf(stderr, "device mtu (test): %u\n", val); -} - static void set_pmtu_discover(int fd, bool is_ipv4) { int level, name, val; @@ -354,81 +316,6 @@ static unsigned int get_path_mtu(int fd, bool is_ipv4) return mtu; } -/* very wordy version of system("ip route add dev lo mtu 1500 127.0.0.3/32") */ -static void set_route_mtu(int mtu, bool is_ipv4) -{ - struct sockaddr_nl nladdr = { .nl_family = AF_NETLINK }; - struct nlmsghdr *nh; - struct rtattr *rta; - struct rtmsg *rt; - char data[NLMSG_ALIGN(sizeof(*nh)) + - NLMSG_ALIGN(sizeof(*rt)) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(addr6))) + - NLMSG_ALIGN(RTA_LENGTH(sizeof(int))) + - NLMSG_ALIGN(RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)))]; - int fd, ret, alen, off = 0; - - alen = is_ipv4 ? sizeof(addr4) : sizeof(addr6); - - fd = socket(AF_NETLINK, SOCK_RAW, NETLINK_ROUTE); - if (fd == -1) - error(1, errno, "socket netlink"); - - memset(data, 0, sizeof(data)); - - nh = (void *)data; - nh->nlmsg_type = RTM_NEWROUTE; - nh->nlmsg_flags = NLM_F_REQUEST | NLM_F_CREATE; - off += NLMSG_ALIGN(sizeof(*nh)); - - rt = (void *)(data + off); - rt->rtm_family = is_ipv4 ? AF_INET : AF_INET6; - rt->rtm_table = RT_TABLE_MAIN; - rt->rtm_dst_len = alen << 3; - rt->rtm_protocol = RTPROT_BOOT; - rt->rtm_scope = RT_SCOPE_UNIVERSE; - rt->rtm_type = RTN_UNICAST; - off += NLMSG_ALIGN(sizeof(*rt)); - - rta = (void *)(data + off); - rta->rta_type = RTA_DST; - rta->rta_len = RTA_LENGTH(alen); - if (is_ipv4) - memcpy(RTA_DATA(rta), &addr4, alen); - else - memcpy(RTA_DATA(rta), &addr6, alen); - off += NLMSG_ALIGN(rta->rta_len); - - rta = (void *)(data + off); - rta->rta_type = RTA_OIF; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = 1; //if_nametoindex("lo"); - off += NLMSG_ALIGN(rta->rta_len); - - /* MTU is a subtype in a metrics type */ - rta = (void *)(data + off); - rta->rta_type = RTA_METRICS; - rta->rta_len = RTA_LENGTH(0) + RTA_LENGTH(sizeof(int)); - off += NLMSG_ALIGN(rta->rta_len); - - /* now fill MTU subtype. Note that it fits within above rta_len */ - rta = (void *)(((char *) rta) + RTA_LENGTH(0)); - rta->rta_type = RTAX_MTU; - rta->rta_len = RTA_LENGTH(sizeof(int)); - *((int *)(RTA_DATA(rta))) = mtu; - - nh->nlmsg_len = off; - - ret = sendto(fd, data, off, 0, (void *)&nladdr, sizeof(nladdr)); - if (ret != off) - error(1, errno, "send netlink: %uB != %uB\n", ret, off); - - if (close(fd)) - error(1, errno, "close netlink"); - - fprintf(stderr, "route mtu (test): %u\n", mtu); -} - static bool __send_one(int fd, struct msghdr *msg, int flags) { int ret; @@ -591,15 +478,10 @@ static void run_test(struct sockaddr *addr, socklen_t alen) /* Do not fragment these datagrams: only succeed if GSO works */ set_pmtu_discover(fdt, addr->sa_family == AF_INET); - if (cfg_do_connectionless) { - set_device_mtu(fdt, CONST_MTU_TEST); + if (cfg_do_connectionless) run_all(fdt, fdr, addr, alen); - } if (cfg_do_connected) { - set_device_mtu(fdt, CONST_MTU_TEST + 100); - set_route_mtu(CONST_MTU_TEST, addr->sa_family == AF_INET); - if (connect(fdt, addr, alen)) error(1, errno, "connect"); diff --git a/tools/testing/selftests/net/udpgso.sh b/tools/testing/selftests/net/udpgso.sh index fec24f584fe9..6c63178086b0 100755 --- a/tools/testing/selftests/net/udpgso.sh +++ b/tools/testing/selftests/net/udpgso.sh @@ -3,27 +3,56 @@ # # Run a series of udpgso regression tests +set -o errexit +set -o nounset + +setup_loopback() { + ip addr add dev lo 10.0.0.1/32 + ip addr add dev lo fd00::1/128 nodad noprefixroute +} + +test_dev_mtu() { + setup_loopback + # Reduce loopback MTU + ip link set dev lo mtu 1500 +} + +test_route_mtu() { + setup_loopback + # Remove default local routes + ip route del local 10.0.0.1/32 table local dev lo + ip route del local fd00::1/128 table local dev lo + # Install local routes with reduced MTU + ip route add local 10.0.0.1/32 table local dev lo mtu 1500 + ip route add local fd00::1/128 table local dev lo mtu 1500 +} + +if [ "$#" -gt 0 ]; then + "$1" + shift 2 # pop "test_*" arg and "--" delimiter + exec "$@" +fi + echo "ipv4 cmsg" -./in_netns.sh ./udpgso -4 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C echo "ipv4 setsockopt" -./in_netns.sh ./udpgso -4 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -s echo "ipv6 cmsg" -./in_netns.sh ./udpgso -6 -C +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C echo "ipv6 setsockopt" -./in_netns.sh ./udpgso -6 -C -s +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -s echo "ipv4 connected" -./in_netns.sh ./udpgso -4 -c +./in_netns.sh "$0" test_route_mtu -- ./udpgso -4 -c -# blocked on 2nd loopback address -# echo "ipv6 connected" -# ./in_netns.sh ./udpgso -6 -c +echo "ipv6 connected" +./in_netns.sh "$0" test_route_mtu -- ./udpgso -6 -c echo "ipv4 msg_more" -./in_netns.sh ./udpgso -4 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -4 -C -m echo "ipv6 msg_more" -./in_netns.sh ./udpgso -6 -C -m +./in_netns.sh "$0" test_dev_mtu -- ./udpgso -6 -C -m diff --git a/tools/testing/selftests/tc-testing/config b/tools/testing/selftests/tc-testing/config index c60acba951c2..db176fe7d0c3 100644 --- a/tools/testing/selftests/tc-testing/config +++ b/tools/testing/selftests/tc-testing/config @@ -8,6 +8,7 @@ CONFIG_VETH=y # # Core Netfilter Configuration # +CONFIG_NETFILTER=y CONFIG_NETFILTER_ADVANCED=y CONFIG_NF_CONNTRACK=m CONFIG_NF_CONNTRACK_MARK=y diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json index b53d12909962..795cf1ce8af0 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json +++ b/tools/testing/selftests/tc-testing/tc-tests/actions/mirred.json @@ -649,5 +649,401 @@ "teardown": [ "$TC actions flush action mirred" ] + }, + { + "id": "456d", + "name": "Add mirred mirror to egress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 egress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress mirror index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 egress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "2358", + "name": "Add mirred mirror to ingress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress mirror index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "ingress", + "to_blockid": 21, + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "fdb1", + "name": "Add mirred redirect to egress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred egress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "20cc", + "name": "Add mirred redirect to ingress block action", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "ingress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "e739", + "name": "Try to add mirred action with both dev and block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1 blockid 21 dev $DEV1", + "expExitCode": "255", + "verifyCmd": "$TC -j actions list action mirred", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "2f47", + "name": "Try to add mirred action without specifying neither dev nor block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ] + ], + "cmdUnderTest": "$TC actions add action mirred ingress redirect index 1", + "expExitCode": "255", + "verifyCmd": "$TC -j actions list action mirred", + "matchJSON": [], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "3188", + "name": "Replace mirred redirect to dev action with redirect to block", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ], + [ + "$TC actions add action mirred ingress redirect index 1 dev $DEV1", + 0 + ] + ], + "cmdUnderTest": "$TC actions replace action mirred egress redirect index 1 blockid 21", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "redirect", + "direction": "egress", + "to_blockid": 21, + "control_action": { + "type": "stolen" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] + }, + { + "id": "83cc", + "name": "Replace mirred redirect to block action with mirror to dev", + "category": [ + "actions", + "mirred" + ], + "plugins": { + "requires": "nsPlugin" + }, + "setup": [ + [ + "$TC actions flush action mirred", + 0, + 1, + 255 + ], + [ + "$TC qdisc add dev $DEV1 ingress_block 21 clsact", + 0 + ], + [ + "$TC actions add action mirred egress redirect index 1 blockid 21", + 0 + ] + ], + "cmdUnderTest": "$TC actions replace action mirred ingress mirror index 1 dev lo", + "expExitCode": "0", + "verifyCmd": "$TC -j actions get action mirred index 1", + "matchJSON": [ + { + "total acts": 0 + }, + { + "actions": [ + { + "order": 1, + "kind": "mirred", + "mirred_action": "mirror", + "direction": "ingress", + "to_dev": "lo", + "control_action": { + "type": "pipe" + }, + "index": 1, + "ref": 1, + "bind": 0, + "not_in_hw": true + } + ] + } + ], + "matchCount": "1", + "teardown": [ + "$TC qdisc del dev $DEV1 ingress_block 21 clsact", + "$TC actions flush action mirred" + ] } ] diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json index be293e7c6d18..3a537b2ec4c9 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/fq.json @@ -77,7 +77,7 @@ "cmdUnderTest": "$TC qdisc add dev $DUMMY handle 1: root fq quantum 9000", "expExitCode": "0", "verifyCmd": "$TC qdisc show dev $DUMMY", - "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p buckets.*orphan_mask 1023 quantum 9000b", + "matchPattern": "qdisc fq 1: root refcnt [0-9]+ limit 10000p flow_limit 100p.*quantum 9000b", "matchCount": "1", "teardown": [ "$TC qdisc del dev $DUMMY handle 1: root" diff --git a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json index 2d603ef2e375..12da0a939e3e 100644 --- a/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json +++ b/tools/testing/selftests/tc-testing/tc-tests/qdiscs/taprio.json @@ -167,6 +167,7 @@ "plugins": { "requires": "nsPlugin" }, + "dependsOn": "echo '' | jq", "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 clockid CLOCK_TAI", @@ -192,6 +193,7 @@ "plugins": { "requires": "nsPlugin" }, + "dependsOn": "echo '' | jq", "setup": [ "echo \"1 1 8\" > /sys/bus/netdevsim/new_device", "$TC qdisc replace dev $ETH handle 8001: parent root stab overhead 24 taprio num_tc 8 map 0 1 2 3 4 5 6 7 queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 base-time 0 sched-entry S ff 20000000 flags 0x2", diff --git a/tools/testing/selftests/tc-testing/tdc.py b/tools/testing/selftests/tc-testing/tdc.py index caeacc691587..ee349187636f 100755 --- a/tools/testing/selftests/tc-testing/tdc.py +++ b/tools/testing/selftests/tc-testing/tdc.py @@ -541,7 +541,7 @@ def test_runner(pm, args, filtered_tests): message = pmtf.message output = pmtf.output res = TestResult(tidx['id'], tidx['name']) - res.set_result(ResultState.skip) + res.set_result(ResultState.fail) res.set_errormsg(pmtf.message) res.set_failmsg(pmtf.output) tsr.add_resultdata(res) diff --git a/tools/testing/selftests/tc-testing/tdc.sh b/tools/testing/selftests/tc-testing/tdc.sh index c53ede8b730d..cddff1772e10 100755 --- a/tools/testing/selftests/tc-testing/tdc.sh +++ b/tools/testing/selftests/tc-testing/tdc.sh @@ -63,5 +63,4 @@ try_modprobe sch_hfsc try_modprobe sch_hhf try_modprobe sch_htb try_modprobe sch_teql -./tdc.py -J`nproc` -c actions -./tdc.py -J`nproc` -c qdisc +./tdc.py -J`nproc` diff --git a/tools/testing/vsock/util.c b/tools/testing/vsock/util.c index ae2b33c21c45..554b290fefdc 100644 --- a/tools/testing/vsock/util.c +++ b/tools/testing/vsock/util.c @@ -33,8 +33,7 @@ void init_signals(void) signal(SIGPIPE, SIG_IGN); } -/* Parse a CID in string representation */ -unsigned int parse_cid(const char *str) +static unsigned int parse_uint(const char *str, const char *err_str) { char *endptr = NULL; unsigned long n; @@ -42,12 +41,24 @@ unsigned int parse_cid(const char *str) errno = 0; n = strtoul(str, &endptr, 10); if (errno || *endptr != '\0') { - fprintf(stderr, "malformed CID \"%s\"\n", str); + fprintf(stderr, "malformed %s \"%s\"\n", err_str, str); exit(EXIT_FAILURE); } return n; } +/* Parse a CID in string representation */ +unsigned int parse_cid(const char *str) +{ + return parse_uint(str, "CID"); +} + +/* Parse a port in string representation */ +unsigned int parse_port(const char *str) +{ + return parse_uint(str, "port"); +} + /* Wait for the remote to close the connection */ void vsock_wait_remote_close(int fd) { diff --git a/tools/testing/vsock/util.h b/tools/testing/vsock/util.h index 03c88d0cb861..e95e62485959 100644 --- a/tools/testing/vsock/util.h +++ b/tools/testing/vsock/util.h @@ -12,10 +12,13 @@ enum test_mode { TEST_MODE_SERVER }; +#define DEFAULT_PEER_PORT 1234 + /* Test runner options */ struct test_opts { enum test_mode mode; unsigned int peer_cid; + unsigned int peer_port; }; /* A test case definition. Test functions must print failures to stderr and @@ -35,6 +38,7 @@ struct test_case { void init_signals(void); unsigned int parse_cid(const char *str); +unsigned int parse_port(const char *str); int vsock_stream_connect(unsigned int cid, unsigned int port); int vsock_bind_connect(unsigned int cid, unsigned int port, unsigned int bind_port, int type); diff --git a/tools/testing/vsock/vsock_diag_test.c b/tools/testing/vsock/vsock_diag_test.c index fa927ad16f8a..081e045f4696 100644 --- a/tools/testing/vsock/vsock_diag_test.c +++ b/tools/testing/vsock/vsock_diag_test.c @@ -39,6 +39,8 @@ static const char *sock_type_str(int type) return "DGRAM"; case SOCK_STREAM: return "STREAM"; + case SOCK_SEQPACKET: + return "SEQPACKET"; default: return "INVALID TYPE"; } @@ -342,7 +344,7 @@ static void test_listen_socket_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -378,7 +380,7 @@ static void test_connect_client(const struct test_opts *opts) LIST_HEAD(sockets); struct vsock_stat *st; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -403,7 +405,7 @@ static void test_connect_server(const struct test_opts *opts) LIST_HEAD(sockets); int client_fd; - client_fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + client_fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (client_fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -462,6 +464,11 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { .name = "list", .has_arg = no_argument, .val = 'l', @@ -481,7 +488,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" + fprintf(stderr, "Usage: vsock_diag_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_diag_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_diag_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -503,9 +510,11 @@ static void usage(void) " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --skip <test_id> Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -517,6 +526,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; init_signals(); @@ -544,6 +554,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; diff --git a/tools/testing/vsock/vsock_test.c b/tools/testing/vsock/vsock_test.c index 66246d81d654..f851f8961247 100644 --- a/tools/testing/vsock/vsock_test.c +++ b/tools/testing/vsock/vsock_test.c @@ -34,7 +34,7 @@ static void test_stream_connection_reset(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -70,7 +70,7 @@ static void test_stream_bind_only_client(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = opts->peer_cid, }, }; @@ -112,7 +112,7 @@ static void test_stream_bind_only_server(const struct test_opts *opts) } addr = { .svm = { .svm_family = AF_VSOCK, - .svm_port = 1234, + .svm_port = opts->peer_port, .svm_cid = VMADDR_CID_ANY, }, }; @@ -138,7 +138,7 @@ static void test_stream_client_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -152,7 +152,7 @@ static void test_stream_client_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -173,7 +173,7 @@ static void test_stream_server_close_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -194,7 +194,7 @@ static void test_stream_server_close_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -215,7 +215,7 @@ static void test_stream_multiconn_client(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_connect(opts->peer_cid, 1234); + fds[i] = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fds[i] < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -239,7 +239,7 @@ static void test_stream_multiconn_server(const struct test_opts *opts) int i; for (i = 0; i < MULTICONN_NFDS; i++) { - fds[i] = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fds[i] = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fds[i] < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -267,9 +267,9 @@ static void test_msg_peek_client(const struct test_opts *opts, int i; if (seqpacket) - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -295,9 +295,9 @@ static void test_msg_peek_server(const struct test_opts *opts, int fd; if (seqpacket) - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -363,7 +363,7 @@ static void test_seqpacket_msg_bounds_client(const struct test_opts *opts) int msg_count; int fd; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -434,7 +434,7 @@ static void test_seqpacket_msg_bounds_server(const struct test_opts *opts) struct msghdr msg = {0}; struct iovec iov = {0}; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -505,7 +505,7 @@ static void test_seqpacket_msg_trunc_client(const struct test_opts *opts) int fd; char buf[MESSAGE_TRUNC_SZ]; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -524,7 +524,7 @@ static void test_seqpacket_msg_trunc_server(const struct test_opts *opts) struct msghdr msg = {0}; struct iovec iov = {0}; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -575,7 +575,7 @@ static void test_seqpacket_timeout_client(const struct test_opts *opts) time_t read_enter_ns; time_t read_overhead_ns; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -620,7 +620,7 @@ static void test_seqpacket_timeout_server(const struct test_opts *opts) { int fd; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -639,7 +639,7 @@ static void test_seqpacket_bigmsg_client(const struct test_opts *opts) len = sizeof(sock_buf_size); - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -671,7 +671,7 @@ static void test_seqpacket_bigmsg_server(const struct test_opts *opts) { int fd; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -692,7 +692,7 @@ static void test_seqpacket_invalid_rec_buffer_client(const struct test_opts *opt unsigned char *buf2; int buf_size = getpagesize() * 3; - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -732,7 +732,7 @@ static void test_seqpacket_invalid_rec_buffer_server(const struct test_opts *opt int flags = MAP_PRIVATE | MAP_ANONYMOUS; int i; - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -808,7 +808,7 @@ static void test_stream_poll_rcvlowat_server(const struct test_opts *opts) int fd; int i; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -839,7 +839,7 @@ static void test_stream_poll_rcvlowat_client(const struct test_opts *opts) short poll_flags; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -906,9 +906,9 @@ static void test_inv_buf_client(const struct test_opts *opts, bool stream) int fd; if (stream) - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -941,9 +941,9 @@ static void test_inv_buf_server(const struct test_opts *opts, bool stream) int fd; if (stream) - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -986,7 +986,7 @@ static void test_stream_virtio_skb_merge_client(const struct test_opts *opts) { int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1015,7 +1015,7 @@ static void test_stream_virtio_skb_merge_server(const struct test_opts *opts) unsigned char buf[64]; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1108,7 +1108,7 @@ static void test_stream_shutwr_client(const struct test_opts *opts) sigaction(SIGPIPE, &act, NULL); - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1130,7 +1130,7 @@ static void test_stream_shutwr_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1151,7 +1151,7 @@ static void test_stream_shutrd_client(const struct test_opts *opts) sigaction(SIGPIPE, &act, NULL); - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1170,7 +1170,7 @@ static void test_stream_shutrd_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1193,7 +1193,7 @@ static void test_double_bind_connect_server(const struct test_opts *opts) struct sockaddr_vm sa_client; socklen_t socklen_client = sizeof(sa_client); - listen_fd = vsock_stream_listen(VMADDR_CID_ANY, 1234); + listen_fd = vsock_stream_listen(VMADDR_CID_ANY, opts->peer_port); for (i = 0; i < 2; i++) { control_writeln("LISTENING"); @@ -1226,7 +1226,13 @@ static void test_double_bind_connect_client(const struct test_opts *opts) /* Wait until server is ready to accept a new connection */ control_expectln("LISTENING"); - client_fd = vsock_bind_connect(opts->peer_cid, 1234, 4321, SOCK_STREAM); + /* We use 'peer_port + 1' as "some" port for the 'bind()' + * call. It is safe for overflow, but must be considered, + * when running multiple test applications simultaneously + * where 'peer-port' argument differs by 1. + */ + client_fd = vsock_bind_connect(opts->peer_cid, opts->peer_port, + opts->peer_port + 1, SOCK_STREAM); close(client_fd); } @@ -1246,7 +1252,7 @@ static void test_stream_rcvlowat_def_cred_upd_client(const struct test_opts *opt void *buf; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -1282,7 +1288,7 @@ static void test_stream_credit_update_test(const struct test_opts *opts, void *buf; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -1543,6 +1549,11 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { .name = "list", .has_arg = no_argument, .val = 'l', @@ -1562,7 +1573,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--list] [--skip=<test_id>]\n" + fprintf(stderr, "Usage: vsock_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>] [--list] [--skip=<test_id>]\n" "\n" " Server: vsock_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -1577,6 +1588,9 @@ static void usage(void) "connect to.\n" "\n" "The CID of the other side must be given with --peer-cid=<cid>.\n" + "During the test, two AF_VSOCK ports will be used: the port\n" + "specified with --peer-port=<port> (or the default port)\n" + "and the next one.\n" "\n" "Options:\n" " --help This help message\n" @@ -1584,9 +1598,11 @@ static void usage(void) " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n" " --list List of tests that will be executed\n" " --skip <test_id> Test ID to skip;\n" - " use multiple --skip options to skip more tests\n" + " use multiple --skip options to skip more tests\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -1598,6 +1614,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; srand(time(NULL)); @@ -1626,6 +1643,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; diff --git a/tools/testing/vsock/vsock_test_zerocopy.c b/tools/testing/vsock/vsock_test_zerocopy.c index a16ff76484e6..04c376b6937f 100644 --- a/tools/testing/vsock/vsock_test_zerocopy.c +++ b/tools/testing/vsock/vsock_test_zerocopy.c @@ -152,9 +152,9 @@ static void test_client(const struct test_opts *opts, int fd; if (sock_seqpacket) - fd = vsock_seqpacket_connect(opts->peer_cid, 1234); + fd = vsock_seqpacket_connect(opts->peer_cid, opts->peer_port); else - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); @@ -248,9 +248,9 @@ static void test_server(const struct test_opts *opts, int fd; if (sock_seqpacket) - fd = vsock_seqpacket_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_seqpacket_accept(VMADDR_CID_ANY, opts->peer_port, NULL); else - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); @@ -323,7 +323,7 @@ void test_stream_msgzcopy_empty_errq_client(const struct test_opts *opts) ssize_t res; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -347,7 +347,7 @@ void test_stream_msgzcopy_empty_errq_server(const struct test_opts *opts) { int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); diff --git a/tools/testing/vsock/vsock_uring_test.c b/tools/testing/vsock/vsock_uring_test.c index d976d35f0ba9..6c3e6f70c457 100644 --- a/tools/testing/vsock/vsock_uring_test.c +++ b/tools/testing/vsock/vsock_uring_test.c @@ -66,7 +66,7 @@ static void vsock_io_uring_client(const struct test_opts *opts, struct msghdr msg; int fd; - fd = vsock_stream_connect(opts->peer_cid, 1234); + fd = vsock_stream_connect(opts->peer_cid, opts->peer_port); if (fd < 0) { perror("connect"); exit(EXIT_FAILURE); @@ -120,7 +120,7 @@ static void vsock_io_uring_server(const struct test_opts *opts, void *data; int fd; - fd = vsock_stream_accept(VMADDR_CID_ANY, 1234, NULL); + fd = vsock_stream_accept(VMADDR_CID_ANY, opts->peer_port, NULL); if (fd < 0) { perror("accept"); exit(EXIT_FAILURE); @@ -248,6 +248,11 @@ static const struct option longopts[] = { .val = 'p', }, { + .name = "peer-port", + .has_arg = required_argument, + .val = 'q', + }, + { .name = "help", .has_arg = no_argument, .val = '?', @@ -257,7 +262,7 @@ static const struct option longopts[] = { static void usage(void) { - fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid>\n" + fprintf(stderr, "Usage: vsock_uring_test [--help] [--control-host=<host>] --control-port=<port> --mode=client|server --peer-cid=<cid> [--peer-port=<port>]\n" "\n" " Server: vsock_uring_test --control-port=1234 --mode=server --peer-cid=3\n" " Client: vsock_uring_test --control-host=192.168.0.1 --control-port=1234 --mode=client --peer-cid=2\n" @@ -271,6 +276,8 @@ static void usage(void) " --control-port <port> Server port to listen on/connect to\n" " --mode client|server Server or client mode\n" " --peer-cid <cid> CID of the other side\n" + " --peer-port <port> AF_VSOCK port used for the test [default: %d]\n", + DEFAULT_PEER_PORT ); exit(EXIT_FAILURE); } @@ -282,6 +289,7 @@ int main(int argc, char **argv) struct test_opts opts = { .mode = TEST_MODE_UNSET, .peer_cid = VMADDR_CID_ANY, + .peer_port = DEFAULT_PEER_PORT, }; init_signals(); @@ -309,6 +317,9 @@ int main(int argc, char **argv) case 'p': opts.peer_cid = parse_cid(optarg); break; + case 'q': + opts.peer_port = parse_port(optarg); + break; case 'P': control_port = optarg; break; |