diff options
Diffstat (limited to 'tools/lib')
38 files changed, 3048 insertions, 1496 deletions
| diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index b393b5e82380..f947b61b2107 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -84,11 +84,13 @@ else  endif  # Append required CFLAGS +override CFLAGS += -std=gnu89  override CFLAGS += $(EXTRA_WARNINGS) -Wno-switch-enum  override CFLAGS += -Werror -Wall  override CFLAGS += $(INCLUDES)  override CFLAGS += -fvisibility=hidden  override CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 +override CFLAGS += $(CLANG_CROSS_FLAGS)  # flags specific for shared library  SHLIB_FLAGS := -DSHARED -fPIC @@ -161,7 +163,7 @@ $(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h  $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)  $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) $(VERSION_SCRIPT) -	$(QUIET_LINK)$(CC) $(LDFLAGS) \ +	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) \  		--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \  		-Wl,--version-script=$(VERSION_SCRIPT) $< -lelf -lz -o $@  	@ln -sf $(@F) $(OUTPUT)libbpf.so diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 725701235fd8..550b4cbb6c99 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -28,6 +28,9 @@  #include <asm/unistd.h>  #include <errno.h>  #include <linux/bpf.h> +#include <linux/filter.h> +#include <limits.h> +#include <sys/resource.h>  #include "bpf.h"  #include "libbpf.h"  #include "libbpf_internal.h" @@ -49,6 +52,12 @@  #  define __NR_bpf 351  # elif defined(__arc__)  #  define __NR_bpf 280 +# elif defined(__mips__) && defined(_ABIO32) +#  define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +#  define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +#  define __NR_bpf 5315  # else  #  error __NR_bpf not defined. libbpf does not support your arch.  # endif @@ -74,158 +83,208 @@ static inline int sys_bpf_fd(enum bpf_cmd cmd, union bpf_attr *attr,  	return ensure_good_fd(fd);  } -static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +#define PROG_LOAD_ATTEMPTS 5 + +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size, int attempts)  { -	int retries = 5;  	int fd;  	do {  		fd = sys_bpf_fd(BPF_PROG_LOAD, attr, size); -	} while (fd < 0 && errno == EAGAIN && retries-- > 0); +	} while (fd < 0 && errno == EAGAIN && --attempts > 0);  	return fd;  } -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr) +/* Probe whether kernel switched from memlock-based (RLIMIT_MEMLOCK) to + * memcg-based memory accounting for BPF maps and progs. This was done in [0]. + * We use the support for bpf_ktime_get_coarse_ns() helper, which was added in + * the same 5.11 Linux release ([1]), to detect memcg-based accounting for BPF. + * + *   [0] https://lore.kernel.org/bpf/[email protected]/ + *   [1] d05512618056 ("bpf: Add bpf_ktime_get_coarse_ns helper") + */ +int probe_memcg_account(void) +{ +	const size_t prog_load_attr_sz = offsetofend(union bpf_attr, attach_btf_obj_fd); +	struct bpf_insn insns[] = { +		BPF_EMIT_CALL(BPF_FUNC_ktime_get_coarse_ns), +		BPF_EXIT_INSN(), +	}; +	size_t insn_cnt = sizeof(insns) / sizeof(insns[0]); +	union bpf_attr attr; +	int prog_fd; + +	/* attempt loading freplace trying to use custom BTF */ +	memset(&attr, 0, prog_load_attr_sz); +	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; +	attr.insns = ptr_to_u64(insns); +	attr.insn_cnt = insn_cnt; +	attr.license = ptr_to_u64("GPL"); + +	prog_fd = sys_bpf_fd(BPF_PROG_LOAD, &attr, prog_load_attr_sz); +	if (prog_fd >= 0) { +		close(prog_fd); +		return 1; +	} +	return 0; +} + +static bool memlock_bumped; +static rlim_t memlock_rlim = RLIM_INFINITY; + +int libbpf_set_memlock_rlim(size_t memlock_bytes) +{ +	if (memlock_bumped) +		return libbpf_err(-EBUSY); + +	memlock_rlim = memlock_bytes; +	return 0; +} + +int bump_rlimit_memlock(void) +{ +	struct rlimit rlim; + +	/* this the default in libbpf 1.0, but for now user has to opt-in explicitly */ +	if (!(libbpf_mode & LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK)) +		return 0; + +	/* if kernel supports memcg-based accounting, skip bumping RLIMIT_MEMLOCK */ +	if (memlock_bumped || kernel_supports(NULL, FEAT_MEMCG_ACCOUNT)) +		return 0; + +	memlock_bumped = true; + +	/* zero memlock_rlim_max disables auto-bumping RLIMIT_MEMLOCK */ +	if (memlock_rlim == 0) +		return 0; + +	rlim.rlim_cur = rlim.rlim_max = memlock_rlim; +	if (setrlimit(RLIMIT_MEMLOCK, &rlim)) +		return -errno; + +	return 0; +} + +int bpf_map_create(enum bpf_map_type map_type, +		   const char *map_name, +		   __u32 key_size, +		   __u32 value_size, +		   __u32 max_entries, +		   const struct bpf_map_create_opts *opts)  { +	const size_t attr_sz = offsetofend(union bpf_attr, map_extra);  	union bpf_attr attr;  	int fd; -	memset(&attr, '\0', sizeof(attr)); - -	attr.map_type = create_attr->map_type; -	attr.key_size = create_attr->key_size; -	attr.value_size = create_attr->value_size; -	attr.max_entries = create_attr->max_entries; -	attr.map_flags = create_attr->map_flags; -	if (create_attr->name) -		memcpy(attr.map_name, create_attr->name, -		       min(strlen(create_attr->name), BPF_OBJ_NAME_LEN - 1)); -	attr.numa_node = create_attr->numa_node; -	attr.btf_fd = create_attr->btf_fd; -	attr.btf_key_type_id = create_attr->btf_key_type_id; -	attr.btf_value_type_id = create_attr->btf_value_type_id; -	attr.map_ifindex = create_attr->map_ifindex; -	if (attr.map_type == BPF_MAP_TYPE_STRUCT_OPS) -		attr.btf_vmlinux_value_type_id = -			create_attr->btf_vmlinux_value_type_id; -	else -		attr.inner_map_fd = create_attr->inner_map_fd; -	attr.map_extra = create_attr->map_extra; +	bump_rlimit_memlock(); + +	memset(&attr, 0, attr_sz); + +	if (!OPTS_VALID(opts, bpf_map_create_opts)) +		return libbpf_err(-EINVAL); + +	attr.map_type = map_type; +	if (map_name) +		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name)); +	attr.key_size = key_size; +	attr.value_size = value_size; +	attr.max_entries = max_entries; -	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); +	attr.btf_fd = OPTS_GET(opts, btf_fd, 0); +	attr.btf_key_type_id = OPTS_GET(opts, btf_key_type_id, 0); +	attr.btf_value_type_id = OPTS_GET(opts, btf_value_type_id, 0); +	attr.btf_vmlinux_value_type_id = OPTS_GET(opts, btf_vmlinux_value_type_id, 0); + +	attr.inner_map_fd = OPTS_GET(opts, inner_map_fd, 0); +	attr.map_flags = OPTS_GET(opts, map_flags, 0); +	attr.map_extra = OPTS_GET(opts, map_extra, 0); +	attr.numa_node = OPTS_GET(opts, numa_node, 0); +	attr.map_ifindex = OPTS_GET(opts, map_ifindex, 0); + +	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, attr_sz);  	return libbpf_err_errno(fd);  }  int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr)  { -	struct bpf_create_map_params p = {}; +	LIBBPF_OPTS(bpf_map_create_opts, p); -	p.map_type = create_attr->map_type; -	p.key_size = create_attr->key_size; -	p.value_size = create_attr->value_size; -	p.max_entries = create_attr->max_entries;  	p.map_flags = create_attr->map_flags; -	p.name = create_attr->name;  	p.numa_node = create_attr->numa_node;  	p.btf_fd = create_attr->btf_fd;  	p.btf_key_type_id = create_attr->btf_key_type_id;  	p.btf_value_type_id = create_attr->btf_value_type_id;  	p.map_ifindex = create_attr->map_ifindex; -	if (p.map_type == BPF_MAP_TYPE_STRUCT_OPS) -		p.btf_vmlinux_value_type_id = -			create_attr->btf_vmlinux_value_type_id; +	if (create_attr->map_type == BPF_MAP_TYPE_STRUCT_OPS) +		p.btf_vmlinux_value_type_id = create_attr->btf_vmlinux_value_type_id;  	else  		p.inner_map_fd = create_attr->inner_map_fd; -	return libbpf__bpf_create_map_xattr(&p); +	return bpf_map_create(create_attr->map_type, create_attr->name, +			      create_attr->key_size, create_attr->value_size, +			      create_attr->max_entries, &p);  }  int bpf_create_map_node(enum bpf_map_type map_type, const char *name,  			int key_size, int value_size, int max_entries,  			__u32 map_flags, int node)  { -	struct bpf_create_map_attr map_attr = {}; - -	map_attr.name = name; -	map_attr.map_type = map_type; -	map_attr.map_flags = map_flags; -	map_attr.key_size = key_size; -	map_attr.value_size = value_size; -	map_attr.max_entries = max_entries; +	LIBBPF_OPTS(bpf_map_create_opts, opts); + +	opts.map_flags = map_flags;  	if (node >= 0) { -		map_attr.numa_node = node; -		map_attr.map_flags |= BPF_F_NUMA_NODE; +		opts.numa_node = node; +		opts.map_flags |= BPF_F_NUMA_NODE;  	} -	return bpf_create_map_xattr(&map_attr); +	return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);  }  int bpf_create_map(enum bpf_map_type map_type, int key_size,  		   int value_size, int max_entries, __u32 map_flags)  { -	struct bpf_create_map_attr map_attr = {}; +	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); -	map_attr.map_type = map_type; -	map_attr.map_flags = map_flags; -	map_attr.key_size = key_size; -	map_attr.value_size = value_size; -	map_attr.max_entries = max_entries; - -	return bpf_create_map_xattr(&map_attr); +	return bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts);  }  int bpf_create_map_name(enum bpf_map_type map_type, const char *name,  			int key_size, int value_size, int max_entries,  			__u32 map_flags)  { -	struct bpf_create_map_attr map_attr = {}; - -	map_attr.name = name; -	map_attr.map_type = map_type; -	map_attr.map_flags = map_flags; -	map_attr.key_size = key_size; -	map_attr.value_size = value_size; -	map_attr.max_entries = max_entries; +	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = map_flags); -	return bpf_create_map_xattr(&map_attr); +	return bpf_map_create(map_type, name, key_size, value_size, max_entries, &opts);  }  int bpf_create_map_in_map_node(enum bpf_map_type map_type, const char *name,  			       int key_size, int inner_map_fd, int max_entries,  			       __u32 map_flags, int node)  { -	union bpf_attr attr; -	int fd; - -	memset(&attr, '\0', sizeof(attr)); - -	attr.map_type = map_type; -	attr.key_size = key_size; -	attr.value_size = 4; -	attr.inner_map_fd = inner_map_fd; -	attr.max_entries = max_entries; -	attr.map_flags = map_flags; -	if (name) -		memcpy(attr.map_name, name, -		       min(strlen(name), BPF_OBJ_NAME_LEN - 1)); +	LIBBPF_OPTS(bpf_map_create_opts, opts); +	opts.inner_map_fd = inner_map_fd; +	opts.map_flags = map_flags;  	if (node >= 0) { -		attr.map_flags |= BPF_F_NUMA_NODE; -		attr.numa_node = node; +		opts.map_flags |= BPF_F_NUMA_NODE; +		opts.numa_node = node;  	} -	fd = sys_bpf_fd(BPF_MAP_CREATE, &attr, sizeof(attr)); -	return libbpf_err_errno(fd); +	return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);  }  int bpf_create_map_in_map(enum bpf_map_type map_type, const char *name,  			  int key_size, int inner_map_fd, int max_entries,  			  __u32 map_flags)  { -	return bpf_create_map_in_map_node(map_type, name, key_size, -					  inner_map_fd, max_entries, map_flags, -					  -1); +	LIBBPF_OPTS(bpf_map_create_opts, opts, +		.inner_map_fd = inner_map_fd, +		.map_flags = map_flags, +	); + +	return bpf_map_create(map_type, name, key_size, 4, max_entries, &opts);  }  static void * @@ -253,58 +312,95 @@ alloc_zero_tailing_info(const void *orecord, __u32 cnt,  	return info;  } -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr) +DEFAULT_VERSION(bpf_prog_load_v0_6_0, bpf_prog_load, LIBBPF_0.6.0) +int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, +		         const char *prog_name, const char *license, +		         const struct bpf_insn *insns, size_t insn_cnt, +		         const struct bpf_prog_load_opts *opts)  {  	void *finfo = NULL, *linfo = NULL; +	const char *func_info, *line_info; +	__u32 log_size, log_level, attach_prog_fd, attach_btf_obj_fd; +	__u32 func_info_rec_size, line_info_rec_size; +	int fd, attempts;  	union bpf_attr attr; -	int fd; +	char *log_buf; + +	bump_rlimit_memlock(); -	if (!load_attr->log_buf != !load_attr->log_buf_sz) +	if (!OPTS_VALID(opts, bpf_prog_load_opts))  		return libbpf_err(-EINVAL); -	if (load_attr->log_level > (4 | 2 | 1) || (load_attr->log_level && !load_attr->log_buf)) +	attempts = OPTS_GET(opts, attempts, 0); +	if (attempts < 0)  		return libbpf_err(-EINVAL); +	if (attempts == 0) +		attempts = PROG_LOAD_ATTEMPTS;  	memset(&attr, 0, sizeof(attr)); -	attr.prog_type = load_attr->prog_type; -	attr.expected_attach_type = load_attr->expected_attach_type; -	if (load_attr->attach_prog_fd) -		attr.attach_prog_fd = load_attr->attach_prog_fd; -	else -		attr.attach_btf_obj_fd = load_attr->attach_btf_obj_fd; -	attr.attach_btf_id = load_attr->attach_btf_id; +	attr.prog_type = prog_type; +	attr.expected_attach_type = OPTS_GET(opts, expected_attach_type, 0); -	attr.prog_ifindex = load_attr->prog_ifindex; -	attr.kern_version = load_attr->kern_version; +	attr.prog_btf_fd = OPTS_GET(opts, prog_btf_fd, 0); +	attr.prog_flags = OPTS_GET(opts, prog_flags, 0); +	attr.prog_ifindex = OPTS_GET(opts, prog_ifindex, 0); +	attr.kern_version = OPTS_GET(opts, kern_version, 0); -	attr.insn_cnt = (__u32)load_attr->insn_cnt; -	attr.insns = ptr_to_u64(load_attr->insns); -	attr.license = ptr_to_u64(load_attr->license); +	if (prog_name) +		libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name)); +	attr.license = ptr_to_u64(license); -	attr.log_level = load_attr->log_level; -	if (attr.log_level) { -		attr.log_buf = ptr_to_u64(load_attr->log_buf); -		attr.log_size = load_attr->log_buf_sz; -	} +	if (insn_cnt > UINT_MAX) +		return libbpf_err(-E2BIG); + +	attr.insns = ptr_to_u64(insns); +	attr.insn_cnt = (__u32)insn_cnt; + +	attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); +	attach_btf_obj_fd = OPTS_GET(opts, attach_btf_obj_fd, 0); + +	if (attach_prog_fd && attach_btf_obj_fd) +		return libbpf_err(-EINVAL); + +	attr.attach_btf_id = OPTS_GET(opts, attach_btf_id, 0); +	if (attach_prog_fd) +		attr.attach_prog_fd = attach_prog_fd; +	else +		attr.attach_btf_obj_fd = attach_btf_obj_fd; -	attr.prog_btf_fd = load_attr->prog_btf_fd; -	attr.prog_flags = load_attr->prog_flags; +	log_buf = OPTS_GET(opts, log_buf, NULL); +	log_size = OPTS_GET(opts, log_size, 0); +	log_level = OPTS_GET(opts, log_level, 0); -	attr.func_info_rec_size = load_attr->func_info_rec_size; -	attr.func_info_cnt = load_attr->func_info_cnt; -	attr.func_info = ptr_to_u64(load_attr->func_info); +	if (!!log_buf != !!log_size) +		return libbpf_err(-EINVAL); +	if (log_level > (4 | 2 | 1)) +		return libbpf_err(-EINVAL); +	if (log_level && !log_buf) +		return libbpf_err(-EINVAL); -	attr.line_info_rec_size = load_attr->line_info_rec_size; -	attr.line_info_cnt = load_attr->line_info_cnt; -	attr.line_info = ptr_to_u64(load_attr->line_info); -	attr.fd_array = ptr_to_u64(load_attr->fd_array); +	func_info_rec_size = OPTS_GET(opts, func_info_rec_size, 0); +	func_info = OPTS_GET(opts, func_info, NULL); +	attr.func_info_rec_size = func_info_rec_size; +	attr.func_info = ptr_to_u64(func_info); +	attr.func_info_cnt = OPTS_GET(opts, func_info_cnt, 0); -	if (load_attr->name) -		memcpy(attr.prog_name, load_attr->name, -		       min(strlen(load_attr->name), (size_t)BPF_OBJ_NAME_LEN - 1)); +	line_info_rec_size = OPTS_GET(opts, line_info_rec_size, 0); +	line_info = OPTS_GET(opts, line_info, NULL); +	attr.line_info_rec_size = line_info_rec_size; +	attr.line_info = ptr_to_u64(line_info); +	attr.line_info_cnt = OPTS_GET(opts, line_info_cnt, 0); + +	attr.fd_array = ptr_to_u64(OPTS_GET(opts, fd_array, NULL)); + +	if (log_level) { +		attr.log_buf = ptr_to_u64(log_buf); +		attr.log_size = log_size; +		attr.log_level = log_level; +	} -	fd = sys_bpf_prog_load(&attr, sizeof(attr)); +	fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);  	if (fd >= 0)  		return fd; @@ -314,11 +410,11 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)  	 */  	while (errno == E2BIG && (!finfo || !linfo)) {  		if (!finfo && attr.func_info_cnt && -		    attr.func_info_rec_size < load_attr->func_info_rec_size) { +		    attr.func_info_rec_size < func_info_rec_size) {  			/* try with corrected func info records */ -			finfo = alloc_zero_tailing_info(load_attr->func_info, -							load_attr->func_info_cnt, -							load_attr->func_info_rec_size, +			finfo = alloc_zero_tailing_info(func_info, +							attr.func_info_cnt, +							func_info_rec_size,  							attr.func_info_rec_size);  			if (!finfo) {  				errno = E2BIG; @@ -326,13 +422,12 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)  			}  			attr.func_info = ptr_to_u64(finfo); -			attr.func_info_rec_size = load_attr->func_info_rec_size; +			attr.func_info_rec_size = func_info_rec_size;  		} else if (!linfo && attr.line_info_cnt && -			   attr.line_info_rec_size < -			   load_attr->line_info_rec_size) { -			linfo = alloc_zero_tailing_info(load_attr->line_info, -							load_attr->line_info_cnt, -							load_attr->line_info_rec_size, +			   attr.line_info_rec_size < line_info_rec_size) { +			linfo = alloc_zero_tailing_info(line_info, +							attr.line_info_cnt, +							line_info_rec_size,  							attr.line_info_rec_size);  			if (!linfo) {  				errno = E2BIG; @@ -340,26 +435,27 @@ int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr)  			}  			attr.line_info = ptr_to_u64(linfo); -			attr.line_info_rec_size = load_attr->line_info_rec_size; +			attr.line_info_rec_size = line_info_rec_size;  		} else {  			break;  		} -		fd = sys_bpf_prog_load(&attr, sizeof(attr)); +		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts);  		if (fd >= 0)  			goto done;  	} -	if (load_attr->log_level || !load_attr->log_buf) -		goto done; +	if (log_level == 0 && log_buf) { +		/* log_level == 0 with non-NULL log_buf requires retrying on error +		 * with log_level == 1 and log_buf/log_buf_size set, to get details of +		 * failure +		 */ +		attr.log_buf = ptr_to_u64(log_buf); +		attr.log_size = log_size; +		attr.log_level = 1; -	/* Try again with log */ -	attr.log_buf = ptr_to_u64(load_attr->log_buf); -	attr.log_size = load_attr->log_buf_sz; -	attr.log_level = 1; -	load_attr->log_buf[0] = 0; - -	fd = sys_bpf_prog_load(&attr, sizeof(attr)); +		fd = sys_bpf_prog_load(&attr, sizeof(attr), attempts); +	}  done:  	/* free() doesn't affect errno, so we don't need to restore it */  	free(finfo); @@ -367,17 +463,20 @@ done:  	return libbpf_err_errno(fd);  } +__attribute__((alias("bpf_load_program_xattr2")))  int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, -			   char *log_buf, size_t log_buf_sz) +			   char *log_buf, size_t log_buf_sz); + +static int bpf_load_program_xattr2(const struct bpf_load_program_attr *load_attr, +				   char *log_buf, size_t log_buf_sz)  { -	struct bpf_prog_load_params p = {}; +	LIBBPF_OPTS(bpf_prog_load_opts, p);  	if (!load_attr || !log_buf != !log_buf_sz)  		return libbpf_err(-EINVAL); -	p.prog_type = load_attr->prog_type;  	p.expected_attach_type = load_attr->expected_attach_type; -	switch (p.prog_type) { +	switch (load_attr->prog_type) {  	case BPF_PROG_TYPE_STRUCT_OPS:  	case BPF_PROG_TYPE_LSM:  		p.attach_btf_id = load_attr->attach_btf_id; @@ -391,12 +490,9 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,  		p.prog_ifindex = load_attr->prog_ifindex;  		p.kern_version = load_attr->kern_version;  	} -	p.insn_cnt = load_attr->insns_cnt; -	p.insns = load_attr->insns; -	p.license = load_attr->license;  	p.log_level = load_attr->log_level;  	p.log_buf = log_buf; -	p.log_buf_sz = log_buf_sz; +	p.log_size = log_buf_sz;  	p.prog_btf_fd = load_attr->prog_btf_fd;  	p.func_info_rec_size = load_attr->func_info_rec_size;  	p.func_info_cnt = load_attr->func_info_cnt; @@ -404,10 +500,10 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,  	p.line_info_rec_size = load_attr->line_info_rec_size;  	p.line_info_cnt = load_attr->line_info_cnt;  	p.line_info = load_attr->line_info; -	p.name = load_attr->name;  	p.prog_flags = load_attr->prog_flags; -	return libbpf__bpf_prog_load(&p); +	return bpf_prog_load(load_attr->prog_type, load_attr->name, load_attr->license, +			     load_attr->insns, load_attr->insns_cnt, &p);  }  int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -426,7 +522,7 @@ int bpf_load_program(enum bpf_prog_type type, const struct bpf_insn *insns,  	load_attr.license = license;  	load_attr.kern_version = kern_version; -	return bpf_load_program_xattr(&load_attr, log_buf, log_buf_sz); +	return bpf_load_program_xattr2(&load_attr, log_buf, log_buf_sz);  }  int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, @@ -437,6 +533,8 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,  	union bpf_attr attr;  	int fd; +	bump_rlimit_memlock(); +  	memset(&attr, 0, sizeof(attr));  	attr.prog_type = type;  	attr.insn_cnt = (__u32)insns_cnt; @@ -449,7 +547,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns,  	attr.kern_version = kern_version;  	attr.prog_flags = prog_flags; -	fd = sys_bpf_prog_load(&attr, sizeof(attr)); +	fd = sys_bpf_prog_load(&attr, sizeof(attr), PROG_LOAD_ATTEMPTS);  	return libbpf_err_errno(fd);  } @@ -593,11 +691,11 @@ static int bpf_map_batch_common(int cmd, int fd, void  *in_batch,  	return libbpf_err_errno(ret);  } -int bpf_map_delete_batch(int fd, void *keys, __u32 *count, +int bpf_map_delete_batch(int fd, const void *keys, __u32 *count,  			 const struct bpf_map_batch_opts *opts)  {  	return bpf_map_batch_common(BPF_MAP_DELETE_BATCH, fd, NULL, -				    NULL, keys, NULL, count, opts); +				    NULL, (void *)keys, NULL, count, opts);  }  int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch, void *keys, @@ -617,11 +715,11 @@ int bpf_map_lookup_and_delete_batch(int fd, void *in_batch, void *out_batch,  				    count, opts);  } -int bpf_map_update_batch(int fd, void *keys, void *values, __u32 *count, +int bpf_map_update_batch(int fd, const void *keys, const void *values, __u32 *count,  			 const struct bpf_map_batch_opts *opts)  {  	return bpf_map_batch_common(BPF_MAP_UPDATE_BATCH, fd, NULL, NULL, -				    keys, values, count, opts); +				    (void *)keys, (void *)values, count, opts);  }  int bpf_obj_pin(int fd, const char *pathname) @@ -1028,24 +1126,67 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd)  	return libbpf_err_errno(fd);  } -int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, -		 bool do_log) +int bpf_btf_load(const void *btf_data, size_t btf_size, const struct bpf_btf_load_opts *opts)  { -	union bpf_attr attr = {}; +	const size_t attr_sz = offsetofend(union bpf_attr, btf_log_level); +	union bpf_attr attr; +	char *log_buf; +	size_t log_size; +	__u32 log_level;  	int fd; -	attr.btf = ptr_to_u64(btf); +	bump_rlimit_memlock(); + +	memset(&attr, 0, attr_sz); + +	if (!OPTS_VALID(opts, bpf_btf_load_opts)) +		return libbpf_err(-EINVAL); + +	log_buf = OPTS_GET(opts, log_buf, NULL); +	log_size = OPTS_GET(opts, log_size, 0); +	log_level = OPTS_GET(opts, log_level, 0); + +	if (log_size > UINT_MAX) +		return libbpf_err(-EINVAL); +	if (log_size && !log_buf) +		return libbpf_err(-EINVAL); + +	attr.btf = ptr_to_u64(btf_data);  	attr.btf_size = btf_size; +	/* log_level == 0 and log_buf != NULL means "try loading without +	 * log_buf, but retry with log_buf and log_level=1 on error", which is +	 * consistent across low-level and high-level BTF and program loading +	 * APIs within libbpf and provides a sensible behavior in practice +	 */ +	if (log_level) { +		attr.btf_log_buf = ptr_to_u64(log_buf); +		attr.btf_log_size = (__u32)log_size; +		attr.btf_log_level = log_level; +	} -retry: -	if (do_log && log_buf && log_buf_size) { -		attr.btf_log_level = 1; -		attr.btf_log_size = log_buf_size; +	fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz); +	if (fd < 0 && log_buf && log_level == 0) {  		attr.btf_log_buf = ptr_to_u64(log_buf); +		attr.btf_log_size = (__u32)log_size; +		attr.btf_log_level = 1; +		fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, attr_sz);  	} +	return libbpf_err_errno(fd); +} -	fd = sys_bpf_fd(BPF_BTF_LOAD, &attr, sizeof(attr)); +int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, __u32 log_buf_size, bool do_log) +{ +	LIBBPF_OPTS(bpf_btf_load_opts, opts); +	int fd; + +retry: +	if (do_log && log_buf && log_buf_size) { +		opts.log_buf = log_buf; +		opts.log_size = log_buf_size; +		opts.log_level = 1; +	} +	fd = bpf_btf_load(btf, btf_size, &opts);  	if (fd < 0 && !do_log && log_buf && log_buf_size) {  		do_log = true;  		goto retry; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 6fffb3cdf39b..14e0d97ad2cf 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -29,11 +29,38 @@  #include <stdint.h>  #include "libbpf_common.h" +#include "libbpf_legacy.h"  #ifdef __cplusplus  extern "C" {  #endif +int libbpf_set_memlock_rlim(size_t memlock_bytes); + +struct bpf_map_create_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ + +	__u32 btf_fd; +	__u32 btf_key_type_id; +	__u32 btf_value_type_id; +	__u32 btf_vmlinux_value_type_id; + +	__u32 inner_map_fd; +	__u32 map_flags; +	__u64 map_extra; + +	__u32 numa_node; +	__u32 map_ifindex; +}; +#define bpf_map_create_opts__last_field map_ifindex + +LIBBPF_API int bpf_map_create(enum bpf_map_type map_type, +			      const char *map_name, +			      __u32 key_size, +			      __u32 value_size, +			      __u32 max_entries, +			      const struct bpf_map_create_opts *opts); +  struct bpf_create_map_attr {  	const char *name;  	enum bpf_map_type map_type; @@ -52,25 +79,95 @@ struct bpf_create_map_attr {  	};  }; -LIBBPF_API int -bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead") +LIBBPF_API int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")  LIBBPF_API int bpf_create_map_node(enum bpf_map_type map_type, const char *name,  				   int key_size, int value_size,  				   int max_entries, __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")  LIBBPF_API int bpf_create_map_name(enum bpf_map_type map_type, const char *name,  				   int key_size, int value_size,  				   int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")  LIBBPF_API int bpf_create_map(enum bpf_map_type map_type, int key_size,  			      int value_size, int max_entries, __u32 map_flags); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")  LIBBPF_API int bpf_create_map_in_map_node(enum bpf_map_type map_type,  					  const char *name, int key_size,  					  int inner_map_fd, int max_entries,  					  __u32 map_flags, int node); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_map_create() instead")  LIBBPF_API int bpf_create_map_in_map(enum bpf_map_type map_type,  				     const char *name, int key_size,  				     int inner_map_fd, int max_entries,  				     __u32 map_flags); +struct bpf_prog_load_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ + +	/* libbpf can retry BPF_PROG_LOAD command if bpf() syscall returns +	 * -EAGAIN. This field determines how many attempts libbpf has to +	 *  make. If not specified, libbpf will use default value of 5. +	 */ +	int attempts; + +	enum bpf_attach_type expected_attach_type; +	__u32 prog_btf_fd; +	__u32 prog_flags; +	__u32 prog_ifindex; +	__u32 kern_version; + +	__u32 attach_btf_id; +	__u32 attach_prog_fd; +	__u32 attach_btf_obj_fd; + +	const int *fd_array; + +	/* .BTF.ext func info data */ +	const void *func_info; +	__u32 func_info_cnt; +	__u32 func_info_rec_size; + +	/* .BTF.ext line info data */ +	const void *line_info; +	__u32 line_info_cnt; +	__u32 line_info_rec_size; + +	/* verifier log options */ +	__u32 log_level; +	__u32 log_size; +	char *log_buf; +}; +#define bpf_prog_load_opts__last_field log_buf + +LIBBPF_API int bpf_prog_load(enum bpf_prog_type prog_type, +			     const char *prog_name, const char *license, +			     const struct bpf_insn *insns, size_t insn_cnt, +			     const struct bpf_prog_load_opts *opts); +/* this "specialization" should go away in libbpf 1.0 */ +LIBBPF_API int bpf_prog_load_v0_6_0(enum bpf_prog_type prog_type, +				    const char *prog_name, const char *license, +				    const struct bpf_insn *insns, size_t insn_cnt, +				    const struct bpf_prog_load_opts *opts); + +/* This is an elaborate way to not conflict with deprecated bpf_prog_load() + * API, defined in libbpf.h. Once we hit libbpf 1.0, all this will be gone. + * With this approach, if someone is calling bpf_prog_load() with + * 4 arguments, they will use the deprecated API, which keeps backwards + * compatibility (both source code and binary). If bpf_prog_load() is called + * with 6 arguments, though, it gets redirected to __bpf_prog_load. + * So looking forward to libbpf 1.0 when this hack will be gone and + * __bpf_prog_load() will be called just bpf_prog_load(). + */ +#ifndef bpf_prog_load +#define bpf_prog_load(...) ___libbpf_overload(___bpf_prog_load, __VA_ARGS__) +#define ___bpf_prog_load4(file, type, pobj, prog_fd) \ +	bpf_prog_load_deprecated(file, type, pobj, prog_fd) +#define ___bpf_prog_load6(prog_type, prog_name, license, insns, insn_cnt, opts) \ +	bpf_prog_load(prog_type, prog_name, license, insns, insn_cnt, opts) +#endif /* bpf_prog_load */ +  struct bpf_load_program_attr {  	enum bpf_prog_type prog_type;  	enum bpf_attach_type expected_attach_type; @@ -100,15 +197,18 @@ struct bpf_load_program_attr {  /* Flags to direct loading requirements */  #define MAPS_RELAX_COMPAT	0x01 -/* Recommend log buffer size */ +/* Recommended log buffer size */  #define BPF_LOG_BUF_SIZE (UINT32_MAX >> 8) /* verifier maximum in kernels <= 5.1 */ -LIBBPF_API int -bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, -		       char *log_buf, size_t log_buf_sz); + +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead") +LIBBPF_API int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, +				      char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")  LIBBPF_API int bpf_load_program(enum bpf_prog_type type,  				const struct bpf_insn *insns, size_t insns_cnt,  				const char *license, __u32 kern_version,  				char *log_buf, size_t log_buf_sz); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_prog_load() instead")  LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,  				  const struct bpf_insn *insns,  				  size_t insns_cnt, __u32 prog_flags, @@ -116,6 +216,23 @@ LIBBPF_API int bpf_verify_program(enum bpf_prog_type type,  				  char *log_buf, size_t log_buf_sz,  				  int log_level); +struct bpf_btf_load_opts { +	size_t sz; /* size of this struct for forward/backward compatibility */ + +	/* kernel log options */ +	char *log_buf; +	__u32 log_level; +	__u32 log_size; +}; +#define bpf_btf_load_opts__last_field log_size + +LIBBPF_API int bpf_btf_load(const void *btf_data, size_t btf_size, +			    const struct bpf_btf_load_opts *opts); + +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_btf_load() instead") +LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, +			    __u32 log_buf_size, bool do_log); +  LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value,  				   __u64 flags); @@ -137,17 +254,128 @@ struct bpf_map_batch_opts {  };  #define bpf_map_batch_opts__last_field flags -LIBBPF_API int bpf_map_delete_batch(int fd, void *keys, + +/** + * @brief **bpf_map_delete_batch()** allows for batch deletion of multiple + * elements in a BPF map. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param count input and output parameter; on input **count** represents the + * number of  elements in the map to delete in batch; + * on output if a non-EFAULT error is returned, **count** represents the number of deleted + * elements if the output **count** value is not equal to the input **count** value + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch deletion works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_delete_batch(int fd, const void *keys,  				    __u32 *count,  				    const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_batch()** allows for batch lookup of BPF map elements. + * + * The parameter *in_batch* is the address of the first element in the batch to read. + * *out_batch* is an output parameter that should be passed as *in_batch* to subsequent + * calls to **bpf_map_lookup_batch()**. NULL can be passed for *in_batch* to indicate + * that the batched lookup starts from the beginning of the map. + * + * The *keys* and *values* are output parameters which must point to memory large enough to + * hold *count* items based on the key and value size of the map *map_fd*. The *keys* + * buffer must be of *key_size* * *count*. The *values* buffer must be of + * *value_size* * *count*. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * indicate that the batched lookup starts from the beginning of the map. + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array large enough for *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read in batch; on output it's the number of elements that were + * successfully read. + * If a non-EFAULT error is returned, count will be set as the number of elements + * that were read before the error occurred. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch lookup works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */  LIBBPF_API int bpf_map_lookup_batch(int fd, void *in_batch, void *out_batch,  				    void *keys, void *values, __u32 *count,  				    const struct bpf_map_batch_opts *opts); + +/** + * @brief **bpf_map_lookup_and_delete_batch()** allows for batch lookup and deletion + * of BPF map elements where each element is deleted after being retrieved. + * + * @param fd BPF map file descriptor + * @param in_batch address of the first element in batch to read, can pass NULL to + * get address of the first element in *out_batch* + * @param out_batch output parameter that should be passed to next call as *in_batch* + * @param keys pointer to an array of *count* keys + * @param values pointer to an array large enough for *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to read and delete in batch; on output it represents the number of + * elements that were successfully read and deleted + * If a non-**EFAULT** error code is returned and if the output **count** value + * is not equal to the input **count** value, up to **count** elements may + * have been deleted. + * if **EFAULT** is returned up to *count* elements may have been deleted without + * being returned via the *keys* and *values* output parameters. + * @param opts options for configuring the way the batch lookup and delete works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */  LIBBPF_API int bpf_map_lookup_and_delete_batch(int fd, void *in_batch,  					void *out_batch, void *keys,  					void *values, __u32 *count,  					const struct bpf_map_batch_opts *opts); -LIBBPF_API int bpf_map_update_batch(int fd, void *keys, void *values, + +/** + * @brief **bpf_map_update_batch()** updates multiple elements in a map + * by specifying keys and their corresponding values. + * + * The *keys* and *values* parameters must point to memory large enough + * to hold *count* items based on the key and value size of the map. + * + * The *opts* parameter can be used to control how *bpf_map_update_batch()* + * should handle keys that either do or do not already exist in the map. + * In particular the *flags* parameter of *bpf_map_batch_opts* can be + * one of the following: + * + * Note that *count* is an input and output parameter, where on output it + * represents how many elements were successfully updated. Also note that if + * **EFAULT** then *count* should not be trusted to be correct. + * + * **BPF_ANY** + *    Create new elements or update existing. + * + * **BPF_NOEXIST** + *    Create new elements only if they do not exist. + * + * **BPF_EXIST** + *    Update existing elements. + * + * **BPF_F_LOCK** + *    Update spin_lock-ed map elements. This must be + *    specified if the map value contains a spinlock. + * + * @param fd BPF map file descriptor + * @param keys pointer to an array of *count* keys + * @param values pointer to an array of *count* values + * @param count input and output parameter; on input it's the number of elements + * in the map to update in batch; on output if a non-EFAULT error is returned, + * **count** represents the number of updated elements if the output **count** + * value is not equal to the input **count** value. + * If EFAULT is returned, **count** should not be trusted to be correct. + * @param opts options for configuring the way the batch update works + * @return 0, on success; negative error code, otherwise (errno is also set to + * the error code) + */ +LIBBPF_API int bpf_map_update_batch(int fd, const void *keys, const void *values,  				    __u32 *count,  				    const struct bpf_map_batch_opts *opts); @@ -243,8 +471,6 @@ LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,  			      __u32 query_flags, __u32 *attach_flags,  			      __u32 *prog_ids, __u32 *prog_cnt);  LIBBPF_API int bpf_raw_tracepoint_open(const char *name, int prog_fd); -LIBBPF_API int bpf_load_btf(const void *btf, __u32 btf_size, char *log_buf, -			    __u32 log_buf_size, bool do_log);  LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,  				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,  				 __u64 *probe_offset, __u64 *probe_addr); diff --git a/tools/lib/bpf/bpf_gen_internal.h b/tools/lib/bpf/bpf_gen_internal.h index 6f3df004479b..223308931d55 100644 --- a/tools/lib/bpf/bpf_gen_internal.h +++ b/tools/lib/bpf/bpf_gen_internal.h @@ -3,6 +3,8 @@  #ifndef __BPF_GEN_INTERNAL_H  #define __BPF_GEN_INTERNAL_H +#include "bpf.h" +  struct ksym_relo_desc {  	const char *name;  	int kind; @@ -37,6 +39,8 @@ struct bpf_gen {  	int error;  	struct ksym_relo_desc *relos;  	int relo_cnt; +	struct bpf_core_relo *core_relos; +	int core_relo_cnt;  	char attach_target[128];  	int attach_kind;  	struct ksym_desc *ksyms; @@ -49,13 +53,20 @@ void bpf_gen__init(struct bpf_gen *gen, int log_level, int nr_progs, int nr_maps  int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps);  void bpf_gen__free(struct bpf_gen *gen);  void bpf_gen__load_btf(struct bpf_gen *gen, const void *raw_data, __u32 raw_size); -void bpf_gen__map_create(struct bpf_gen *gen, struct bpf_create_map_params *map_attr, int map_idx); -struct bpf_prog_load_params; -void bpf_gen__prog_load(struct bpf_gen *gen, struct bpf_prog_load_params *load_attr, int prog_idx); +void bpf_gen__map_create(struct bpf_gen *gen, +			 enum bpf_map_type map_type, const char *map_name, +			 __u32 key_size, __u32 value_size, __u32 max_entries, +			 struct bpf_map_create_opts *map_attr, int map_idx); +void bpf_gen__prog_load(struct bpf_gen *gen, +			enum bpf_prog_type prog_type, const char *prog_name, +			const char *license, struct bpf_insn *insns, size_t insn_cnt, +			struct bpf_prog_load_opts *load_attr, int prog_idx);  void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *value, __u32 value_size);  void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx);  void bpf_gen__record_attach_target(struct bpf_gen *gen, const char *name, enum bpf_attach_type type);  void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,  			    bool is_typeless, int kind, int insn_idx); +void bpf_gen__record_relo_core(struct bpf_gen *gen, const struct bpf_core_relo *core_relo); +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int key, int inner_map_idx);  #endif diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h index db05a5937105..90f56b0f585f 100644 --- a/tools/lib/bpf/bpf_tracing.h +++ b/tools/lib/bpf/bpf_tracing.h @@ -66,277 +66,204 @@  #if defined(__KERNEL__) || defined(__VMLINUX_H__) -#define PT_REGS_PARM1(x) ((x)->di) -#define PT_REGS_PARM2(x) ((x)->si) -#define PT_REGS_PARM3(x) ((x)->dx) -#define PT_REGS_PARM4(x) ((x)->cx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->sp) -#define PT_REGS_FP(x) ((x)->bp) -#define PT_REGS_RC(x) ((x)->ax) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->ip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), di) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), si) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), dx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), cx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), bp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), ax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), ip) +#define __PT_PARM1_REG di +#define __PT_PARM2_REG si +#define __PT_PARM3_REG dx +#define __PT_PARM4_REG cx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG sp +#define __PT_FP_REG bp +#define __PT_RC_REG ax +#define __PT_SP_REG sp +#define __PT_IP_REG ip  #else  #ifdef __i386__ -/* i386 kernel is built with -mregparm=3 */ -#define PT_REGS_PARM1(x) ((x)->eax) -#define PT_REGS_PARM2(x) ((x)->edx) -#define PT_REGS_PARM3(x) ((x)->ecx) -#define PT_REGS_PARM4(x) 0 -#define PT_REGS_PARM5(x) 0 -#define PT_REGS_RET(x) ((x)->esp) -#define PT_REGS_FP(x) ((x)->ebp) -#define PT_REGS_RC(x) ((x)->eax) -#define PT_REGS_SP(x) ((x)->esp) -#define PT_REGS_IP(x) ((x)->eip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), edx) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), ecx) -#define PT_REGS_PARM4_CORE(x) 0 -#define PT_REGS_PARM5_CORE(x) 0 -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), ebp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), eax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), esp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), eip) - -#else -#define PT_REGS_PARM1(x) ((x)->rdi) -#define PT_REGS_PARM2(x) ((x)->rsi) -#define PT_REGS_PARM3(x) ((x)->rdx) -#define PT_REGS_PARM4(x) ((x)->rcx) -#define PT_REGS_PARM5(x) ((x)->r8) -#define PT_REGS_RET(x) ((x)->rsp) -#define PT_REGS_FP(x) ((x)->rbp) -#define PT_REGS_RC(x) ((x)->rax) -#define PT_REGS_SP(x) ((x)->rsp) -#define PT_REGS_IP(x) ((x)->rip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), rdi) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), rsi) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), rdx) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), rcx) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), r8) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), rbp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), rax) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), rsp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), rip) - -#endif -#endif +#define __PT_PARM1_REG eax +#define __PT_PARM2_REG edx +#define __PT_PARM3_REG ecx +/* i386 kernel is built with -mregparm=3 */ +#define __PT_PARM4_REG __unsupported__ +#define __PT_PARM5_REG __unsupported__ +#define __PT_RET_REG esp +#define __PT_FP_REG ebp +#define __PT_RC_REG eax +#define __PT_SP_REG esp +#define __PT_IP_REG eip + +#else /* __i386__ */ + +#define __PT_PARM1_REG rdi +#define __PT_PARM2_REG rsi +#define __PT_PARM3_REG rdx +#define __PT_PARM4_REG rcx +#define __PT_PARM5_REG r8 +#define __PT_RET_REG rsp +#define __PT_FP_REG rbp +#define __PT_RC_REG rax +#define __PT_SP_REG rsp +#define __PT_IP_REG rip + +#endif /* __i386__ */ + +#endif /* __KERNEL__ || __VMLINUX_H__ */  #elif defined(bpf_target_s390)  /* s390 provides user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_S390 const volatile user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) -#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) -#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) -#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) -#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) -#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) -#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) -#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[3]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[4]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[5]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[6]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), gprs[15]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_S390 *)(x), psw.addr) +#define __PT_REGS_CAST(x) ((const user_pt_regs *)(x)) +#define __PT_PARM1_REG gprs[2] +#define __PT_PARM2_REG gprs[3] +#define __PT_PARM3_REG gprs[4] +#define __PT_PARM4_REG gprs[5] +#define __PT_PARM5_REG gprs[6] +#define __PT_RET_REG grps[14] +#define __PT_FP_REG gprs[11]	/* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG gprs[2] +#define __PT_SP_REG gprs[15] +#define __PT_IP_REG psw.addr  #elif defined(bpf_target_arm) -#define PT_REGS_PARM1(x) ((x)->uregs[0]) -#define PT_REGS_PARM2(x) ((x)->uregs[1]) -#define PT_REGS_PARM3(x) ((x)->uregs[2]) -#define PT_REGS_PARM4(x) ((x)->uregs[3]) -#define PT_REGS_PARM5(x) ((x)->uregs[4]) -#define PT_REGS_RET(x) ((x)->uregs[14]) -#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->uregs[0]) -#define PT_REGS_SP(x) ((x)->uregs[13]) -#define PT_REGS_IP(x) ((x)->uregs[12]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), uregs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), uregs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), uregs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), uregs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), uregs[14]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), uregs[11]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), uregs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), uregs[13]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), uregs[12]) +#define __PT_PARM1_REG uregs[0] +#define __PT_PARM2_REG uregs[1] +#define __PT_PARM3_REG uregs[2] +#define __PT_PARM4_REG uregs[3] +#define __PT_PARM5_REG uregs[4] +#define __PT_RET_REG uregs[14] +#define __PT_FP_REG uregs[11]	/* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG uregs[0] +#define __PT_SP_REG uregs[13] +#define __PT_IP_REG uregs[12]  #elif defined(bpf_target_arm64)  /* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ -struct pt_regs; -#define PT_REGS_ARM64 const volatile struct user_pt_regs -#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) -#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) -#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) -#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) -#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) -/* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) -#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) -#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[30]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[29]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), regs[0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_ARM64 *)(x), pc) +#define __PT_REGS_CAST(x) ((const struct user_pt_regs *)(x)) +#define __PT_PARM1_REG regs[0] +#define __PT_PARM2_REG regs[1] +#define __PT_PARM3_REG regs[2] +#define __PT_PARM4_REG regs[3] +#define __PT_PARM5_REG regs[4] +#define __PT_RET_REG regs[30] +#define __PT_FP_REG regs[29]	/* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[0] +#define __PT_SP_REG sp +#define __PT_IP_REG pc  #elif defined(bpf_target_mips) -#define PT_REGS_PARM1(x) ((x)->regs[4]) -#define PT_REGS_PARM2(x) ((x)->regs[5]) -#define PT_REGS_PARM3(x) ((x)->regs[6]) -#define PT_REGS_PARM4(x) ((x)->regs[7]) -#define PT_REGS_PARM5(x) ((x)->regs[8]) -#define PT_REGS_RET(x) ((x)->regs[31]) -#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ -#define PT_REGS_RC(x) ((x)->regs[2]) -#define PT_REGS_SP(x) ((x)->regs[29]) -#define PT_REGS_IP(x) ((x)->cp0_epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), regs[4]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), regs[5]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), regs[6]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), regs[7]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), regs[8]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), regs[31]) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((x), regs[30]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), regs[2]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), regs[29]) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), cp0_epc) +#define __PT_PARM1_REG regs[4] +#define __PT_PARM2_REG regs[5] +#define __PT_PARM3_REG regs[6] +#define __PT_PARM4_REG regs[7] +#define __PT_PARM5_REG regs[8] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG regs[30]	/* Works only with CONFIG_FRAME_POINTER */ +#define __PT_RC_REG regs[2] +#define __PT_SP_REG regs[29] +#define __PT_IP_REG cp0_epc  #elif defined(bpf_target_powerpc) -#define PT_REGS_PARM1(x) ((x)->gpr[3]) -#define PT_REGS_PARM2(x) ((x)->gpr[4]) -#define PT_REGS_PARM3(x) ((x)->gpr[5]) -#define PT_REGS_PARM4(x) ((x)->gpr[6]) -#define PT_REGS_PARM5(x) ((x)->gpr[7]) -#define PT_REGS_RC(x) ((x)->gpr[3]) -#define PT_REGS_SP(x) ((x)->sp) -#define PT_REGS_IP(x) ((x)->nip) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), gpr[4]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), gpr[5]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), gpr[6]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), gpr[7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), gpr[3]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), nip) +#define __PT_PARM1_REG gpr[3] +#define __PT_PARM2_REG gpr[4] +#define __PT_PARM3_REG gpr[5] +#define __PT_PARM4_REG gpr[6] +#define __PT_PARM5_REG gpr[7] +#define __PT_RET_REG regs[31] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG gpr[3] +#define __PT_SP_REG sp +#define __PT_IP_REG nip  #elif defined(bpf_target_sparc) -#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) -#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) -#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) -#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) -#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) -#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) -#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I1]) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I2]) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I3]) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I4]) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I7]) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((x), u_regs[UREG_I0]) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((x), u_regs[UREG_FP]) - +#define __PT_PARM1_REG u_regs[UREG_I0] +#define __PT_PARM2_REG u_regs[UREG_I1] +#define __PT_PARM3_REG u_regs[UREG_I2] +#define __PT_PARM4_REG u_regs[UREG_I3] +#define __PT_PARM5_REG u_regs[UREG_I4] +#define __PT_RET_REG u_regs[UREG_I7] +#define __PT_FP_REG __unsupported__ +#define __PT_RC_REG u_regs[UREG_I0] +#define __PT_SP_REG u_regs[UREG_FP]  /* Should this also be a bpf_target check for the sparc case? */  #if defined(__arch64__) -#define PT_REGS_IP(x) ((x)->tpc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), tpc) +#define __PT_IP_REG tpc  #else -#define PT_REGS_IP(x) ((x)->pc) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((x), pc) +#define __PT_IP_REG pc  #endif  #elif defined(bpf_target_riscv) +#define __PT_REGS_CAST(x) ((const struct user_regs_struct *)(x)) +#define __PT_PARM1_REG a0 +#define __PT_PARM2_REG a1 +#define __PT_PARM3_REG a2 +#define __PT_PARM4_REG a3 +#define __PT_PARM5_REG a4 +#define __PT_RET_REG ra +#define __PT_FP_REG fp +#define __PT_RC_REG a5 +#define __PT_SP_REG sp +#define __PT_IP_REG epc + +#endif + +#if defined(bpf_target_defined) +  struct pt_regs; -#define PT_REGS_RV const volatile struct user_regs_struct -#define PT_REGS_PARM1(x) (((PT_REGS_RV *)(x))->a0) -#define PT_REGS_PARM2(x) (((PT_REGS_RV *)(x))->a1) -#define PT_REGS_PARM3(x) (((PT_REGS_RV *)(x))->a2) -#define PT_REGS_PARM4(x) (((PT_REGS_RV *)(x))->a3) -#define PT_REGS_PARM5(x) (((PT_REGS_RV *)(x))->a4) -#define PT_REGS_RET(x) (((PT_REGS_RV *)(x))->ra) -#define PT_REGS_FP(x) (((PT_REGS_RV *)(x))->s5) -#define PT_REGS_RC(x) (((PT_REGS_RV *)(x))->a5) -#define PT_REGS_SP(x) (((PT_REGS_RV *)(x))->sp) -#define PT_REGS_IP(x) (((PT_REGS_RV *)(x))->epc) - -#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a0) -#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a1) -#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a2) -#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a3) -#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a4) -#define PT_REGS_RET_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), ra) -#define PT_REGS_FP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), fp) -#define PT_REGS_RC_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), a5) -#define PT_REGS_SP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), sp) -#define PT_REGS_IP_CORE(x) BPF_CORE_READ((PT_REGS_RV *)(x), epc) +/* allow some architecutres to override `struct pt_regs` */ +#ifndef __PT_REGS_CAST +#define __PT_REGS_CAST(x) (x)  #endif +#define PT_REGS_PARM1(x) (__PT_REGS_CAST(x)->__PT_PARM1_REG) +#define PT_REGS_PARM2(x) (__PT_REGS_CAST(x)->__PT_PARM2_REG) +#define PT_REGS_PARM3(x) (__PT_REGS_CAST(x)->__PT_PARM3_REG) +#define PT_REGS_PARM4(x) (__PT_REGS_CAST(x)->__PT_PARM4_REG) +#define PT_REGS_PARM5(x) (__PT_REGS_CAST(x)->__PT_PARM5_REG) +#define PT_REGS_RET(x) (__PT_REGS_CAST(x)->__PT_RET_REG) +#define PT_REGS_FP(x) (__PT_REGS_CAST(x)->__PT_FP_REG) +#define PT_REGS_RC(x) (__PT_REGS_CAST(x)->__PT_RC_REG) +#define PT_REGS_SP(x) (__PT_REGS_CAST(x)->__PT_SP_REG) +#define PT_REGS_IP(x) (__PT_REGS_CAST(x)->__PT_IP_REG) + +#define PT_REGS_PARM1_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM1_REG) +#define PT_REGS_PARM2_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM2_REG) +#define PT_REGS_PARM3_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM3_REG) +#define PT_REGS_PARM4_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM4_REG) +#define PT_REGS_PARM5_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_PARM5_REG) +#define PT_REGS_RET_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RET_REG) +#define PT_REGS_FP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_FP_REG) +#define PT_REGS_RC_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_RC_REG) +#define PT_REGS_SP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_SP_REG) +#define PT_REGS_IP_CORE(x) BPF_CORE_READ(__PT_REGS_CAST(x), __PT_IP_REG) +  #if defined(bpf_target_powerpc) +  #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; })  #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP +  #elif defined(bpf_target_sparc) +  #define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); })  #define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP -#elif defined(bpf_target_defined) + +#else +  #define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \  	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); })  #define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \ -	({ bpf_probe_read_kernel(&(ip), sizeof(ip),			    \ -			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +	({ bpf_probe_read_kernel(&(ip), sizeof(ip), (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +  #endif -#if !defined(bpf_target_defined) +#else /* defined(bpf_target_defined) */  #define PT_REGS_PARM1(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })  #define PT_REGS_PARM2(x) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) @@ -363,7 +290,7 @@ struct pt_regs;  #define BPF_KPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; })  #define BPF_KRETPROBE_READ_RET_IP(ip, ctx) ({ _Pragma(__BPF_TARGET_MISSING); 0l; }) -#endif /* !defined(bpf_target_defined) */ +#endif /* defined(bpf_target_defined) */  #ifndef ___bpf_concat  #define ___bpf_concat(a, b) a ## b @@ -375,25 +302,23 @@ struct pt_regs;  #define ___bpf_nth(_, _1, _2, _3, _4, _5, _6, _7, _8, _9, _a, _b, _c, N, ...) N  #endif  #ifndef ___bpf_narg -#define ___bpf_narg(...) \ -	___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +#define ___bpf_narg(...) ___bpf_nth(_, ##__VA_ARGS__, 12, 11, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0)  #endif -#define ___bpf_ctx_cast0() ctx -#define ___bpf_ctx_cast1(x) ___bpf_ctx_cast0(), (void *)ctx[0] -#define ___bpf_ctx_cast2(x, args...) ___bpf_ctx_cast1(args), (void *)ctx[1] -#define ___bpf_ctx_cast3(x, args...) ___bpf_ctx_cast2(args), (void *)ctx[2] -#define ___bpf_ctx_cast4(x, args...) ___bpf_ctx_cast3(args), (void *)ctx[3] -#define ___bpf_ctx_cast5(x, args...) ___bpf_ctx_cast4(args), (void *)ctx[4] -#define ___bpf_ctx_cast6(x, args...) ___bpf_ctx_cast5(args), (void *)ctx[5] -#define ___bpf_ctx_cast7(x, args...) ___bpf_ctx_cast6(args), (void *)ctx[6] -#define ___bpf_ctx_cast8(x, args...) ___bpf_ctx_cast7(args), (void *)ctx[7] -#define ___bpf_ctx_cast9(x, args...) ___bpf_ctx_cast8(args), (void *)ctx[8] +#define ___bpf_ctx_cast0()            ctx +#define ___bpf_ctx_cast1(x)           ___bpf_ctx_cast0(), (void *)ctx[0] +#define ___bpf_ctx_cast2(x, args...)  ___bpf_ctx_cast1(args), (void *)ctx[1] +#define ___bpf_ctx_cast3(x, args...)  ___bpf_ctx_cast2(args), (void *)ctx[2] +#define ___bpf_ctx_cast4(x, args...)  ___bpf_ctx_cast3(args), (void *)ctx[3] +#define ___bpf_ctx_cast5(x, args...)  ___bpf_ctx_cast4(args), (void *)ctx[4] +#define ___bpf_ctx_cast6(x, args...)  ___bpf_ctx_cast5(args), (void *)ctx[5] +#define ___bpf_ctx_cast7(x, args...)  ___bpf_ctx_cast6(args), (void *)ctx[6] +#define ___bpf_ctx_cast8(x, args...)  ___bpf_ctx_cast7(args), (void *)ctx[7] +#define ___bpf_ctx_cast9(x, args...)  ___bpf_ctx_cast8(args), (void *)ctx[8]  #define ___bpf_ctx_cast10(x, args...) ___bpf_ctx_cast9(args), (void *)ctx[9]  #define ___bpf_ctx_cast11(x, args...) ___bpf_ctx_cast10(args), (void *)ctx[10]  #define ___bpf_ctx_cast12(x, args...) ___bpf_ctx_cast11(args), (void *)ctx[11] -#define ___bpf_ctx_cast(args...) \ -	___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args) +#define ___bpf_ctx_cast(args...)      ___bpf_apply(___bpf_ctx_cast, ___bpf_narg(args))(args)  /*   * BPF_PROG is a convenience wrapper for generic tp_btf/fentry/fexit and @@ -426,19 +351,13 @@ ____##name(unsigned long long *ctx, ##args)  struct pt_regs; -#define ___bpf_kprobe_args0() ctx -#define ___bpf_kprobe_args1(x) \ -	___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) -#define ___bpf_kprobe_args2(x, args...) \ -	___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) -#define ___bpf_kprobe_args3(x, args...) \ -	___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) -#define ___bpf_kprobe_args4(x, args...) \ -	___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) -#define ___bpf_kprobe_args5(x, args...) \ -	___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) -#define ___bpf_kprobe_args(args...) \ -	___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kprobe_args0()           ctx +#define ___bpf_kprobe_args1(x)          ___bpf_kprobe_args0(), (void *)PT_REGS_PARM1(ctx) +#define ___bpf_kprobe_args2(x, args...) ___bpf_kprobe_args1(args), (void *)PT_REGS_PARM2(ctx) +#define ___bpf_kprobe_args3(x, args...) ___bpf_kprobe_args2(args), (void *)PT_REGS_PARM3(ctx) +#define ___bpf_kprobe_args4(x, args...) ___bpf_kprobe_args3(args), (void *)PT_REGS_PARM4(ctx) +#define ___bpf_kprobe_args5(x, args...) ___bpf_kprobe_args4(args), (void *)PT_REGS_PARM5(ctx) +#define ___bpf_kprobe_args(args...)     ___bpf_apply(___bpf_kprobe_args, ___bpf_narg(args))(args)  /*   * BPF_KPROBE serves the same purpose for kprobes as BPF_PROG for @@ -464,11 +383,9 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \  static __attribute__((always_inline)) typeof(name(0))			    \  ____##name(struct pt_regs *ctx, ##args) -#define ___bpf_kretprobe_args0() ctx -#define ___bpf_kretprobe_args1(x) \ -	___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) -#define ___bpf_kretprobe_args(args...) \ -	___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args) +#define ___bpf_kretprobe_args0()       ctx +#define ___bpf_kretprobe_args1(x)      ___bpf_kretprobe_args0(), (void *)PT_REGS_RC(ctx) +#define ___bpf_kretprobe_args(args...) ___bpf_apply(___bpf_kretprobe_args, ___bpf_narg(args))(args)  /*   * BPF_KRETPROBE is similar to BPF_KPROBE, except, it only provides optional diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 7e4c5586bd87..9aa19c89f758 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -299,6 +299,7 @@ static int btf_type_size(const struct btf_type *t)  	case BTF_KIND_TYPEDEF:  	case BTF_KIND_FUNC:  	case BTF_KIND_FLOAT: +	case BTF_KIND_TYPE_TAG:  		return base_size;  	case BTF_KIND_INT:  		return base_size + sizeof(__u32); @@ -349,6 +350,7 @@ static int btf_bswap_type_rest(struct btf_type *t)  	case BTF_KIND_TYPEDEF:  	case BTF_KIND_FUNC:  	case BTF_KIND_FLOAT: +	case BTF_KIND_TYPE_TAG:  		return 0;  	case BTF_KIND_INT:  		*(__u32 *)(t + 1) = bswap_32(*(__u32 *)(t + 1)); @@ -452,7 +454,7 @@ const struct btf *btf__base_btf(const struct btf *btf)  }  /* internal helper returning non-const pointer to a type */ -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id) +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id)  {  	if (type_id == 0)  		return &btf_void; @@ -608,6 +610,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)  		case BTF_KIND_RESTRICT:  		case BTF_KIND_VAR:  		case BTF_KIND_DECL_TAG: +		case BTF_KIND_TYPE_TAG:  			type_id = t->type;  			break;  		case BTF_KIND_ARRAY: @@ -649,6 +652,7 @@ int btf__align_of(const struct btf *btf, __u32 id)  	case BTF_KIND_VOLATILE:  	case BTF_KIND_CONST:  	case BTF_KIND_RESTRICT: +	case BTF_KIND_TYPE_TAG:  		return btf__align_of(btf, t->type);  	case BTF_KIND_ARRAY:  		return btf__align_of(btf, btf_array(t)->type); @@ -1120,54 +1124,86 @@ struct btf *btf__parse_split(const char *path, struct btf *base_btf)  static void *btf_get_raw_data(const struct btf *btf, __u32 *size, bool swap_endian); -int btf__load_into_kernel(struct btf *btf) +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level)  { -	__u32 log_buf_size = 0, raw_size; -	char *log_buf = NULL; +	LIBBPF_OPTS(bpf_btf_load_opts, opts); +	__u32 buf_sz = 0, raw_size; +	char *buf = NULL, *tmp;  	void *raw_data;  	int err = 0;  	if (btf->fd >= 0)  		return libbpf_err(-EEXIST); +	if (log_sz && !log_buf) +		return libbpf_err(-EINVAL); -retry_load: -	if (log_buf_size) { -		log_buf = malloc(log_buf_size); -		if (!log_buf) -			return libbpf_err(-ENOMEM); - -		*log_buf = 0; -	} - +	/* cache native raw data representation */  	raw_data = btf_get_raw_data(btf, &raw_size, false);  	if (!raw_data) {  		err = -ENOMEM;  		goto done;  	} -	/* cache native raw data representation */  	btf->raw_size = raw_size;  	btf->raw_data = raw_data; -	btf->fd = bpf_load_btf(raw_data, raw_size, log_buf, log_buf_size, false); +retry_load: +	/* if log_level is 0, we won't provide log_buf/log_size to the kernel, +	 * initially. Only if BTF loading fails, we bump log_level to 1 and +	 * retry, using either auto-allocated or custom log_buf. This way +	 * non-NULL custom log_buf provides a buffer just in case, but hopes +	 * for successful load and no need for log_buf. +	 */ +	if (log_level) { +		/* if caller didn't provide custom log_buf, we'll keep +		 * allocating our own progressively bigger buffers for BTF +		 * verification log +		 */ +		if (!log_buf) { +			buf_sz = max((__u32)BPF_LOG_BUF_SIZE, buf_sz * 2); +			tmp = realloc(buf, buf_sz); +			if (!tmp) { +				err = -ENOMEM; +				goto done; +			} +			buf = tmp; +			buf[0] = '\0'; +		} + +		opts.log_buf = log_buf ? log_buf : buf; +		opts.log_size = log_buf ? log_sz : buf_sz; +		opts.log_level = log_level; +	} + +	btf->fd = bpf_btf_load(raw_data, raw_size, &opts);  	if (btf->fd < 0) { -		if (!log_buf || errno == ENOSPC) { -			log_buf_size = max((__u32)BPF_LOG_BUF_SIZE, -					   log_buf_size << 1); -			free(log_buf); +		/* time to turn on verbose mode and try again */ +		if (log_level == 0) { +			log_level = 1;  			goto retry_load;  		} +		/* only retry if caller didn't provide custom log_buf, but +		 * make sure we can never overflow buf_sz +		 */ +		if (!log_buf && errno == ENOSPC && buf_sz <= UINT_MAX / 2) +			goto retry_load;  		err = -errno; -		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno); -		if (*log_buf) -			pr_warn("%s\n", log_buf); -		goto done; +		pr_warn("BTF loading error: %d\n", err); +		/* don't print out contents of custom log_buf */ +		if (!log_buf && buf[0]) +			pr_warn("-- BEGIN BTF LOAD LOG ---\n%s\n-- END BTF LOAD LOG --\n", buf);  	}  done: -	free(log_buf); +	free(buf);  	return libbpf_err(err);  } + +int btf__load_into_kernel(struct btf *btf) +{ +	return btf_load_into_kernel(btf, NULL, 0, 0); +} +  int btf__load(struct btf *) __attribute__((alias("btf__load_into_kernel")));  int btf__fd(const struct btf *btf) @@ -2236,6 +2272,22 @@ int btf__add_restrict(struct btf *btf, int ref_type_id)  }  /* + * Append new BTF_KIND_TYPE_TAG type with: + *   - *value*, non-empty/non-NULL tag value; + *   - *ref_type_id* - referenced type ID, it might not exist yet; + * Returns: + *   - >0, type ID of newly added BTF type; + *   - <0, on error. + */ +int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id) +{ +	if (!value|| !value[0]) +		return libbpf_err(-EINVAL); + +	return btf_add_ref_kind(btf, BTF_KIND_TYPE_TAG, value, ref_type_id); +} + +/*   * Append new BTF_KIND_FUNC type with:   *   - *name*, non-empty/non-NULL name;   *   - *proto_type_id* - FUNC_PROTO's type ID, it might not exist yet; @@ -2711,15 +2763,11 @@ void btf_ext__free(struct btf_ext *btf_ext)  	free(btf_ext);  } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size) +struct btf_ext *btf_ext__new(const __u8 *data, __u32 size)  {  	struct btf_ext *btf_ext;  	int err; -	err = btf_ext_parse_hdr(data, size); -	if (err) -		return libbpf_err_ptr(err); -  	btf_ext = calloc(1, sizeof(struct btf_ext));  	if (!btf_ext)  		return libbpf_err_ptr(-ENOMEM); @@ -2732,6 +2780,10 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)  	}  	memcpy(btf_ext->data, data, size); +	err = btf_ext_parse_hdr(btf_ext->data, size); +	if (err) +		goto done; +  	if (btf_ext->hdr->hdr_len < offsetofend(struct btf_ext_header, line_info_len)) {  		err = -EINVAL;  		goto done; @@ -2846,8 +2898,7 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext)  struct btf_dedup; -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, -				       const struct btf_dedup_opts *opts); +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts);  static void btf_dedup_free(struct btf_dedup *d);  static int btf_dedup_prep(struct btf_dedup *d);  static int btf_dedup_strings(struct btf_dedup *d); @@ -2994,12 +3045,17 @@ static int btf_dedup_remap_types(struct btf_dedup *d);   * deduplicating structs/unions is described in greater details in comments for   * `btf_dedup_is_equiv` function.   */ -int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, -	       const struct btf_dedup_opts *opts) + +DEFAULT_VERSION(btf__dedup_v0_6_0, btf__dedup, LIBBPF_0.6.0) +int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts)  { -	struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); +	struct btf_dedup *d;  	int err; +	if (!OPTS_VALID(opts, btf_dedup_opts)) +		return libbpf_err(-EINVAL); + +	d = btf_dedup_new(btf, opts);  	if (IS_ERR(d)) {  		pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d));  		return libbpf_err(-EINVAL); @@ -3051,6 +3107,19 @@ done:  	return libbpf_err(err);  } +COMPAT_VERSION(btf__dedup_deprecated, btf__dedup, LIBBPF_0.0.2) +int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *unused_opts) +{ +	LIBBPF_OPTS(btf_dedup_opts, opts, .btf_ext = btf_ext); + +	if (unused_opts) { +		pr_warn("please use new version of btf__dedup() that supports options\n"); +		return libbpf_err(-ENOTSUP); +	} + +	return btf__dedup(btf, &opts); +} +  #define BTF_UNPROCESSED_ID ((__u32)-1)  #define BTF_IN_PROGRESS_ID ((__u32)-2) @@ -3163,8 +3232,7 @@ static bool btf_dedup_equal_fn(const void *k1, const void *k2, void *ctx)  	return k1 == k2;  } -static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, -				       const struct btf_dedup_opts *opts) +static struct btf_dedup *btf_dedup_new(struct btf *btf, const struct btf_dedup_opts *opts)  {  	struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup));  	hashmap_hash_fn hash_fn = btf_dedup_identity_hash_fn; @@ -3173,13 +3241,11 @@ static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext,  	if (!d)  		return ERR_PTR(-ENOMEM); -	d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; -	/* dedup_table_size is now used only to force collisions in tests */ -	if (opts && opts->dedup_table_size == 1) +	if (OPTS_GET(opts, force_collisions, false))  		hash_fn = btf_dedup_collision_hash_fn;  	d->btf = btf; -	d->btf_ext = btf_ext; +	d->btf_ext = OPTS_GET(opts, btf_ext, NULL);  	d->dedup_table = hashmap__new(hash_fn, btf_dedup_equal_fn, NULL);  	if (IS_ERR(d->dedup_table)) { @@ -3443,8 +3509,8 @@ static long btf_hash_struct(struct btf_type *t)  }  /* - * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type - * IDs. This check is performed during type graph equivalence check and + * Check structural compatibility of two STRUCTs/UNIONs, ignoring referenced + * type IDs. This check is performed during type graph equivalence check and   * referenced types equivalence is checked separately.   */  static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) @@ -3625,6 +3691,7 @@ static int btf_dedup_prep(struct btf_dedup *d)  		case BTF_KIND_TYPEDEF:  		case BTF_KIND_FUNC:  		case BTF_KIND_FLOAT: +		case BTF_KIND_TYPE_TAG:  			h = btf_hash_common(t);  			break;  		case BTF_KIND_INT: @@ -3685,6 +3752,7 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  	case BTF_KIND_VAR:  	case BTF_KIND_DATASEC:  	case BTF_KIND_DECL_TAG: +	case BTF_KIND_TYPE_TAG:  		return 0;  	case BTF_KIND_INT: @@ -3708,8 +3776,6 @@ static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id)  				new_id = cand_id;  				break;  			} -			if (d->opts.dont_resolve_fwds) -				continue;  			if (btf_compat_enum(t, cand)) {  				if (btf_is_enum_fwd(t)) {  					/* resolve fwd to full enum */ @@ -3817,6 +3883,31 @@ static int btf_dedup_identical_arrays(struct btf_dedup *d, __u32 id1, __u32 id2)  	return btf_equal_array(t1, t2);  } +/* Check if given two types are identical STRUCT/UNION definitions */ +static bool btf_dedup_identical_structs(struct btf_dedup *d, __u32 id1, __u32 id2) +{ +	const struct btf_member *m1, *m2; +	struct btf_type *t1, *t2; +	int n, i; + +	t1 = btf_type_by_id(d->btf, id1); +	t2 = btf_type_by_id(d->btf, id2); + +	if (!btf_is_composite(t1) || btf_kind(t1) != btf_kind(t2)) +		return false; + +	if (!btf_shallow_equal_struct(t1, t2)) +		return false; + +	m1 = btf_members(t1); +	m2 = btf_members(t2); +	for (i = 0, n = btf_vlen(t1); i < n; i++, m1++, m2++) { +		if (m1->type != m2->type) +			return false; +	} +	return true; +} +  /*   * Check equivalence of BTF type graph formed by candidate struct/union (we'll   * call it "candidate graph" in this description for brevity) to a type graph @@ -3928,6 +4019,8 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  	hypot_type_id = d->hypot_map[canon_id];  	if (hypot_type_id <= BTF_MAX_NR_TYPES) { +		if (hypot_type_id == cand_id) +			return 1;  		/* In some cases compiler will generate different DWARF types  		 * for *identical* array type definitions and use them for  		 * different fields within the *same* struct. This breaks type @@ -3936,8 +4029,18 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  		 * types within a single CU. So work around that by explicitly  		 * allowing identical array types here.  		 */ -		return hypot_type_id == cand_id || -		       btf_dedup_identical_arrays(d, hypot_type_id, cand_id); +		if (btf_dedup_identical_arrays(d, hypot_type_id, cand_id)) +			return 1; +		/* It turns out that similar situation can happen with +		 * struct/union sometimes, sigh... Handle the case where +		 * structs/unions are exactly the same, down to the referenced +		 * type IDs. Anything more complicated (e.g., if referenced +		 * types are different, but equivalent) is *way more* +		 * complicated and requires a many-to-many equivalence mapping. +		 */ +		if (btf_dedup_identical_structs(d, hypot_type_id, cand_id)) +			return 1; +		return 0;  	}  	if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) @@ -3952,8 +4055,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  		return 0;  	/* FWD <--> STRUCT/UNION equivalence check, if enabled */ -	if (!d->opts.dont_resolve_fwds -	    && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) +	if ((cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD)  	    && cand_kind != canon_kind) {  		__u16 real_kind;  		__u16 fwd_kind; @@ -3979,10 +4081,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  		return btf_equal_int_tag(cand_type, canon_type);  	case BTF_KIND_ENUM: -		if (d->opts.dont_resolve_fwds) -			return btf_equal_enum(cand_type, canon_type); -		else -			return btf_compat_enum(cand_type, canon_type); +		return btf_compat_enum(cand_type, canon_type);  	case BTF_KIND_FWD:  	case BTF_KIND_FLOAT: @@ -3994,6 +4093,7 @@ static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id,  	case BTF_KIND_PTR:  	case BTF_KIND_TYPEDEF:  	case BTF_KIND_FUNC: +	case BTF_KIND_TYPE_TAG:  		if (cand_type->info != canon_type->info)  			return 0;  		return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); @@ -4289,6 +4389,7 @@ static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id)  	case BTF_KIND_PTR:  	case BTF_KIND_TYPEDEF:  	case BTF_KIND_FUNC: +	case BTF_KIND_TYPE_TAG:  		ref_type_id = btf_dedup_ref_type(d, t->type);  		if (ref_type_id < 0)  			return ref_type_id; @@ -4595,6 +4696,7 @@ int btf_type_visit_type_ids(struct btf_type *t, type_id_visit_fn visit, void *ct  	case BTF_KIND_FUNC:  	case BTF_KIND_VAR:  	case BTF_KIND_DECL_TAG: +	case BTF_KIND_TYPE_TAG:  		return visit(&t->type, ctx);  	case BTF_KIND_ARRAY: { diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index bc005ba3ceec..061839f04525 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -157,7 +157,7 @@ LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  				    __u32 expected_value_size,  				    __u32 *key_type_id, __u32 *value_type_id); -LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API struct btf_ext *btf_ext__new(const __u8 *data, __u32 size);  LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext);  LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext,  					     __u32 *size); @@ -227,6 +227,7 @@ LIBBPF_API int btf__add_typedef(struct btf *btf, const char *name, int ref_type_  LIBBPF_API int btf__add_volatile(struct btf *btf, int ref_type_id);  LIBBPF_API int btf__add_const(struct btf *btf, int ref_type_id);  LIBBPF_API int btf__add_restrict(struct btf *btf, int ref_type_id); +LIBBPF_API int btf__add_type_tag(struct btf *btf, const char *value, int ref_type_id);  /* func and func_proto construction APIs */  LIBBPF_API int btf__add_func(struct btf *btf, const char *name, @@ -245,25 +246,86 @@ LIBBPF_API int btf__add_decl_tag(struct btf *btf, const char *value, int ref_typ  			    int component_idx);  struct btf_dedup_opts { -	unsigned int dedup_table_size; -	bool dont_resolve_fwds; +	size_t sz; +	/* optional .BTF.ext info to dedup along the main BTF info */ +	struct btf_ext *btf_ext; +	/* force hash collisions (used for testing) */ +	bool force_collisions; +	size_t :0;  }; +#define btf_dedup_opts__last_field force_collisions + +LIBBPF_API int btf__dedup(struct btf *btf, const struct btf_dedup_opts *opts); + +LIBBPF_API int btf__dedup_v0_6_0(struct btf *btf, const struct btf_dedup_opts *opts); -LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, -			  const struct btf_dedup_opts *opts); +LIBBPF_DEPRECATED_SINCE(0, 7, "use btf__dedup() instead") +LIBBPF_API int btf__dedup_deprecated(struct btf *btf, struct btf_ext *btf_ext, const void *opts); +#define btf__dedup(...) ___libbpf_overload(___btf_dedup, __VA_ARGS__) +#define ___btf_dedup3(btf, btf_ext, opts) btf__dedup_deprecated(btf, btf_ext, opts) +#define ___btf_dedup2(btf, opts) btf__dedup(btf, opts)  struct btf_dump;  struct btf_dump_opts { -	void *ctx; +	union { +		size_t sz; +		void *ctx; /* DEPRECATED: will be gone in v1.0 */ +	};  };  typedef void (*btf_dump_printf_fn_t)(void *ctx, const char *fmt, va_list args);  LIBBPF_API struct btf_dump *btf_dump__new(const struct btf *btf, -					  const struct btf_ext *btf_ext, -					  const struct btf_dump_opts *opts, -					  btf_dump_printf_fn_t printf_fn); +					  btf_dump_printf_fn_t printf_fn, +					  void *ctx, +					  const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, +						 btf_dump_printf_fn_t printf_fn, +						 void *ctx, +						 const struct btf_dump_opts *opts); + +LIBBPF_API struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, +						     const struct btf_ext *btf_ext, +						     const struct btf_dump_opts *opts, +						     btf_dump_printf_fn_t printf_fn); + +/* Choose either btf_dump__new() or btf_dump__new_deprecated() based on the + * type of 4th argument. If it's btf_dump's print callback, use deprecated + * API; otherwise, choose the new btf_dump__new(). ___libbpf_override() + * doesn't work here because both variants have 4 input arguments. + * + * (void *) casts are necessary to avoid compilation warnings about type + * mismatches, because even though __builtin_choose_expr() only ever evaluates + * one side the other side still has to satisfy type constraints (this is + * compiler implementation limitation which might be lifted eventually, + * according to the documentation). So passing struct btf_ext in place of + * btf_dump_printf_fn_t would be generating compilation warning.  Casting to + * void * avoids this issue. + * + * Also, two type compatibility checks for a function and function pointer are + * required because passing function reference into btf_dump__new() as + * btf_dump__new(..., my_callback, ...) and as btf_dump__new(..., + * &my_callback, ...) (not explicit ampersand in the latter case) actually + * differs as far as __builtin_types_compatible_p() is concerned. Thus two + * checks are combined to detect callback argument. + * + * The rest works just like in case of ___libbpf_override() usage with symbol + * versioning. + * + * C++ compilers don't support __builtin_types_compatible_p(), so at least + * don't screw up compilation for them and let C++ users pick btf_dump__new + * vs btf_dump__new_deprecated explicitly. + */ +#ifndef __cplusplus +#define btf_dump__new(a1, a2, a3, a4) __builtin_choose_expr(				\ +	__builtin_types_compatible_p(typeof(a4), btf_dump_printf_fn_t) ||		\ +	__builtin_types_compatible_p(typeof(a4), void(void *, const char *, va_list)),	\ +	btf_dump__new_deprecated((void *)a1, (void *)a2, (void *)a3, (void *)a4),	\ +	btf_dump__new((void *)a1, (void *)a2, (void *)a3, (void *)a4)) +#endif +  LIBBPF_API void btf_dump__free(struct btf_dump *d);  LIBBPF_API int btf_dump__dump_type(struct btf_dump *d, __u32 id); @@ -403,7 +465,8 @@ static inline bool btf_is_mod(const struct btf_type *t)  	return kind == BTF_KIND_VOLATILE ||  	       kind == BTF_KIND_CONST || -	       kind == BTF_KIND_RESTRICT; +	       kind == BTF_KIND_RESTRICT || +	       kind == BTF_KIND_TYPE_TAG;  }  static inline bool btf_is_func(const struct btf_type *t) @@ -436,6 +499,11 @@ static inline bool btf_is_decl_tag(const struct btf_type *t)  	return btf_kind(t) == BTF_KIND_DECL_TAG;  } +static inline bool btf_is_type_tag(const struct btf_type *t) +{ +	return btf_kind(t) == BTF_KIND_TYPE_TAG; +} +  static inline __u8 btf_int_encoding(const struct btf_type *t)  {  	return BTF_INT_ENCODING(*(__u32 *)(t + 1)); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index 17db62b5002e..b9a3260c83cb 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -77,9 +77,8 @@ struct btf_dump_data {  struct btf_dump {  	const struct btf *btf; -	const struct btf_ext *btf_ext;  	btf_dump_printf_fn_t printf_fn; -	struct btf_dump_opts opts; +	void *cb_ctx;  	int ptr_sz;  	bool strip_mods;  	bool skip_anon_defs; @@ -138,29 +137,32 @@ static void btf_dump_printf(const struct btf_dump *d, const char *fmt, ...)  	va_list args;  	va_start(args, fmt); -	d->printf_fn(d->opts.ctx, fmt, args); +	d->printf_fn(d->cb_ctx, fmt, args);  	va_end(args);  }  static int btf_dump_mark_referenced(struct btf_dump *d);  static int btf_dump_resize(struct btf_dump *d); -struct btf_dump *btf_dump__new(const struct btf *btf, -			       const struct btf_ext *btf_ext, -			       const struct btf_dump_opts *opts, -			       btf_dump_printf_fn_t printf_fn) +DEFAULT_VERSION(btf_dump__new_v0_6_0, btf_dump__new, LIBBPF_0.6.0) +struct btf_dump *btf_dump__new_v0_6_0(const struct btf *btf, +				      btf_dump_printf_fn_t printf_fn, +				      void *ctx, +				      const struct btf_dump_opts *opts)  {  	struct btf_dump *d;  	int err; +	if (!printf_fn) +		return libbpf_err_ptr(-EINVAL); +  	d = calloc(1, sizeof(struct btf_dump));  	if (!d)  		return libbpf_err_ptr(-ENOMEM);  	d->btf = btf; -	d->btf_ext = btf_ext;  	d->printf_fn = printf_fn; -	d->opts.ctx = opts ? opts->ctx : NULL; +	d->cb_ctx = ctx;  	d->ptr_sz = btf__pointer_size(btf) ? : sizeof(void *);  	d->type_names = hashmap__new(str_hash_fn, str_equal_fn, NULL); @@ -186,6 +188,17 @@ err:  	return libbpf_err_ptr(err);  } +COMPAT_VERSION(btf_dump__new_deprecated, btf_dump__new, LIBBPF_0.0.4) +struct btf_dump *btf_dump__new_deprecated(const struct btf *btf, +					  const struct btf_ext *btf_ext, +					  const struct btf_dump_opts *opts, +					  btf_dump_printf_fn_t printf_fn) +{ +	if (!printf_fn) +		return libbpf_err_ptr(-EINVAL); +	return btf_dump__new_v0_6_0(btf, printf_fn, opts ? opts->ctx : NULL, opts); +} +  static int btf_dump_resize(struct btf_dump *d)  {  	int err, last_id = btf__type_cnt(d->btf) - 1; @@ -317,6 +330,7 @@ static int btf_dump_mark_referenced(struct btf_dump *d)  		case BTF_KIND_FUNC:  		case BTF_KIND_VAR:  		case BTF_KIND_DECL_TAG: +		case BTF_KIND_TYPE_TAG:  			d->type_states[t->type].referenced = 1;  			break; @@ -560,6 +574,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)  	case BTF_KIND_VOLATILE:  	case BTF_KIND_CONST:  	case BTF_KIND_RESTRICT: +	case BTF_KIND_TYPE_TAG:  		return btf_dump_order_type(d, t->type, through_ptr);  	case BTF_KIND_FUNC_PROTO: { @@ -734,6 +749,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)  	case BTF_KIND_VOLATILE:  	case BTF_KIND_CONST:  	case BTF_KIND_RESTRICT: +	case BTF_KIND_TYPE_TAG:  		btf_dump_emit_type(d, t->type, cont_id);  		break;  	case BTF_KIND_ARRAY: @@ -1154,6 +1170,7 @@ skip_mod:  		case BTF_KIND_CONST:  		case BTF_KIND_RESTRICT:  		case BTF_KIND_FUNC_PROTO: +		case BTF_KIND_TYPE_TAG:  			id = t->type;  			break;  		case BTF_KIND_ARRAY: @@ -1322,6 +1339,11 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,  		case BTF_KIND_RESTRICT:  			btf_dump_printf(d, " restrict");  			break; +		case BTF_KIND_TYPE_TAG: +			btf_dump_emit_mods(d, decls); +			name = btf_name_of(d, t->name_off); +			btf_dump_printf(d, " __attribute__((btf_type_tag(\"%s\")))", name); +			break;  		case BTF_KIND_ARRAY: {  			const struct btf_array *a = btf_array(t);  			const struct btf_type *next_t; @@ -2194,7 +2216,7 @@ static int btf_dump_dump_type_data(struct btf_dump *d,  				   __u8 bits_offset,  				   __u8 bit_sz)  { -	int size, err; +	int size, err = 0;  	size = btf_dump_type_data_check_overflow(d, t, id, data, bits_offset);  	if (size < 0) @@ -2299,8 +2321,8 @@ int btf_dump__dump_type_data(struct btf_dump *d, __u32 id,  	if (!opts->indent_str)  		d->typed_dump->indent_str[0] = '\t';  	else -		strncat(d->typed_dump->indent_str, opts->indent_str, -			sizeof(d->typed_dump->indent_str) - 1); +		libbpf_strlcpy(d->typed_dump->indent_str, opts->indent_str, +			       sizeof(d->typed_dump->indent_str));  	d->typed_dump->compact = OPTS_GET(opts, compact, false);  	d->typed_dump->skip_names = OPTS_GET(opts, skip_names, false); diff --git a/tools/lib/bpf/gen_loader.c b/tools/lib/bpf/gen_loader.c index 9934851ccde7..8ecef1088ba2 100644 --- a/tools/lib/bpf/gen_loader.c +++ b/tools/lib/bpf/gen_loader.c @@ -371,8 +371,9 @@ int bpf_gen__finish(struct bpf_gen *gen, int nr_progs, int nr_maps)  {  	int i; -	if (nr_progs != gen->nr_progs || nr_maps != gen->nr_maps) { -		pr_warn("progs/maps mismatch\n"); +	if (nr_progs < gen->nr_progs || nr_maps != gen->nr_maps) { +		pr_warn("nr_progs %d/%d nr_maps %d/%d mismatch\n", +			nr_progs, gen->nr_progs, nr_maps, gen->nr_maps);  		gen->error = -EFAULT;  		return gen->error;  	} @@ -445,47 +446,32 @@ void bpf_gen__load_btf(struct bpf_gen *gen, const void *btf_raw_data,  }  void bpf_gen__map_create(struct bpf_gen *gen, -			 struct bpf_create_map_params *map_attr, int map_idx) +			 enum bpf_map_type map_type, +			 const char *map_name, +			 __u32 key_size, __u32 value_size, __u32 max_entries, +			 struct bpf_map_create_opts *map_attr, int map_idx)  { -	int attr_size = offsetofend(union bpf_attr, btf_vmlinux_value_type_id); +	int attr_size = offsetofend(union bpf_attr, map_extra);  	bool close_inner_map_fd = false;  	int map_create_attr, idx;  	union bpf_attr attr;  	memset(&attr, 0, attr_size); -	attr.map_type = map_attr->map_type; -	attr.key_size = map_attr->key_size; -	attr.value_size = map_attr->value_size; +	attr.map_type = map_type; +	attr.key_size = key_size; +	attr.value_size = value_size;  	attr.map_flags = map_attr->map_flags;  	attr.map_extra = map_attr->map_extra; -	memcpy(attr.map_name, map_attr->name, -	       min((unsigned)strlen(map_attr->name), BPF_OBJ_NAME_LEN - 1)); +	if (map_name) +		libbpf_strlcpy(attr.map_name, map_name, sizeof(attr.map_name));  	attr.numa_node = map_attr->numa_node;  	attr.map_ifindex = map_attr->map_ifindex; -	attr.max_entries = map_attr->max_entries; -	switch (attr.map_type) { -	case BPF_MAP_TYPE_PERF_EVENT_ARRAY: -	case BPF_MAP_TYPE_CGROUP_ARRAY: -	case BPF_MAP_TYPE_STACK_TRACE: -	case BPF_MAP_TYPE_ARRAY_OF_MAPS: -	case BPF_MAP_TYPE_HASH_OF_MAPS: -	case BPF_MAP_TYPE_DEVMAP: -	case BPF_MAP_TYPE_DEVMAP_HASH: -	case BPF_MAP_TYPE_CPUMAP: -	case BPF_MAP_TYPE_XSKMAP: -	case BPF_MAP_TYPE_SOCKMAP: -	case BPF_MAP_TYPE_SOCKHASH: -	case BPF_MAP_TYPE_QUEUE: -	case BPF_MAP_TYPE_STACK: -	case BPF_MAP_TYPE_RINGBUF: -		break; -	default: -		attr.btf_key_type_id = map_attr->btf_key_type_id; -		attr.btf_value_type_id = map_attr->btf_value_type_id; -	} +	attr.max_entries = max_entries; +	attr.btf_key_type_id = map_attr->btf_key_type_id; +	attr.btf_value_type_id = map_attr->btf_value_type_id;  	pr_debug("gen: map_create: %s idx %d type %d value_type_id %d\n", -		 attr.map_name, map_idx, map_attr->map_type, attr.btf_value_type_id); +		 attr.map_name, map_idx, map_type, attr.btf_value_type_id);  	map_create_attr = add_data(gen, &attr, attr_size);  	if (attr.btf_value_type_id) @@ -512,7 +498,7 @@ void bpf_gen__map_create(struct bpf_gen *gen,  	/* emit MAP_CREATE command */  	emit_sys_bpf(gen, BPF_MAP_CREATE, map_create_attr, attr_size);  	debug_ret(gen, "map_create %s idx %d type %d value_size %d value_btf_id %d", -		  attr.map_name, map_idx, map_attr->map_type, attr.value_size, +		  attr.map_name, map_idx, map_type, value_size,  		  attr.btf_value_type_id);  	emit_check_err(gen);  	/* remember map_fd in the stack, if successful */ @@ -597,8 +583,9 @@ void bpf_gen__record_extern(struct bpf_gen *gen, const char *name, bool is_weak,  static struct ksym_desc *get_ksym_desc(struct bpf_gen *gen, struct ksym_relo_desc *relo)  {  	struct ksym_desc *kdesc; +	int i; -	for (int i = 0; i < gen->nr_ksyms; i++) { +	for (i = 0; i < gen->nr_ksyms; i++) {  		if (!strcmp(gen->ksyms[i].name, relo->name)) {  			gen->ksyms[i].ref++;  			return &gen->ksyms[i]; @@ -700,27 +687,29 @@ static void emit_relo_kfunc_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo  		return;  	}  	kdesc->off = btf_fd_idx; -	/* set a default value for imm */ +	/* jump to success case */ +	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); +	/* set value for imm, off as 0 */  	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0)); -	/* skip success case store if ret < 0 */ -	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 1)); +	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); +	/* skip success case for ret < 0 */ +	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 10));  	/* store btf_id into insn[insn_idx].imm */  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm))); +	/* obtain fd in BPF_REG_9 */ +	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); +	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32)); +	/* jump to fd_array store if fd denotes module BTF */ +	emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 2)); +	/* set the default value for off */ +	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); +	/* skip BTF fd store for vmlinux BTF */ +	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));  	/* load fd_array slot pointer */  	emit2(gen, BPF_LD_IMM64_RAW_FULL(BPF_REG_0, BPF_PSEUDO_MAP_IDX_VALUE,  					 0, 0, 0, blob_fd_array_off(gen, btf_fd_idx))); -	/* skip store of BTF fd if ret < 0 */ -	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 3));  	/* store BTF fd in slot */ -	emit(gen, BPF_MOV64_REG(BPF_REG_9, BPF_REG_7)); -	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_9, 32));  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_9, 0)); -	/* set a default value for off */ -	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), 0)); -	/* skip insn->off store if ret < 0 */ -	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 2)); -	/* skip if vmlinux BTF */ -	emit(gen, BPF_JMP_IMM(BPF_JEQ, BPF_REG_9, 0, 1));  	/* store index into insn[insn_idx].off */  	emit(gen, BPF_ST_MEM(BPF_H, BPF_REG_8, offsetof(struct bpf_insn, off), btf_fd_idx));  log: @@ -819,9 +808,8 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,  			       kdesc->insn + offsetof(struct bpf_insn, imm));  		move_blob2blob(gen, insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 4,  			       kdesc->insn + sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm)); -		emit(gen, BPF_LDX_MEM(BPF_W, BPF_REG_9, BPF_REG_8, offsetof(struct bpf_insn, imm))); -		/* jump over src_reg adjustment if imm is not 0 */ -		emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_9, 0, 3)); +		/* jump over src_reg adjustment if imm is not 0, reuse BPF_REG_0 from move_blob2blob */ +		emit(gen, BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 3));  		goto clear_src_reg;  	}  	/* remember insn offset, so we can copy BTF ID and FD later */ @@ -829,17 +817,20 @@ static void emit_relo_ksym_btf(struct bpf_gen *gen, struct ksym_relo_desc *relo,  	emit_bpf_find_by_name_kind(gen, relo);  	if (!relo->is_weak)  		emit_check_err(gen); -	/* set default values as 0 */ +	/* jump to success case */ +	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3)); +	/* set values for insn[insn_idx].imm, insn[insn_idx + 1].imm as 0 */  	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, offsetof(struct bpf_insn, imm), 0));  	emit(gen, BPF_ST_MEM(BPF_W, BPF_REG_8, sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm), 0)); -	/* skip success case stores if ret < 0 */ -	emit(gen, BPF_JMP_IMM(BPF_JSLT, BPF_REG_7, 0, 4)); +	/* skip success case for ret < 0 */ +	emit(gen, BPF_JMP_IMM(BPF_JA, 0, 0, 4));  	/* store btf_id into insn[insn_idx].imm */  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7, offsetof(struct bpf_insn, imm)));  	/* store btf_obj_fd into insn[insn_idx + 1].imm */  	emit(gen, BPF_ALU64_IMM(BPF_RSH, BPF_REG_7, 32));  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_8, BPF_REG_7,  			      sizeof(struct bpf_insn) + offsetof(struct bpf_insn, imm))); +	/* skip src_reg adjustment */  	emit(gen, BPF_JMP_IMM(BPF_JSGE, BPF_REG_7, 0, 3));  clear_src_reg:  	/* clear bpf_object__relocate_data's src_reg assignment, otherwise we get a verifier failure */ @@ -851,6 +842,22 @@ clear_src_reg:  	emit_ksym_relo_log(gen, relo, kdesc->ref);  } +void bpf_gen__record_relo_core(struct bpf_gen *gen, +			       const struct bpf_core_relo *core_relo) +{ +	struct bpf_core_relo *relos; + +	relos = libbpf_reallocarray(gen->core_relos, gen->core_relo_cnt + 1, sizeof(*relos)); +	if (!relos) { +		gen->error = -ENOMEM; +		return; +	} +	gen->core_relos = relos; +	relos += gen->core_relo_cnt; +	memcpy(relos, core_relo, sizeof(*relos)); +	gen->core_relo_cnt++; +} +  static void emit_relo(struct bpf_gen *gen, struct ksym_relo_desc *relo, int insns)  {  	int insn; @@ -883,6 +890,15 @@ static void emit_relos(struct bpf_gen *gen, int insns)  		emit_relo(gen, gen->relos + i, insns);  } +static void cleanup_core_relo(struct bpf_gen *gen) +{ +	if (!gen->core_relo_cnt) +		return; +	free(gen->core_relos); +	gen->core_relo_cnt = 0; +	gen->core_relos = NULL; +} +  static void cleanup_relos(struct bpf_gen *gen, int insns)  {  	int i, insn; @@ -910,30 +926,32 @@ static void cleanup_relos(struct bpf_gen *gen, int insns)  		gen->relo_cnt = 0;  		gen->relos = NULL;  	} +	cleanup_core_relo(gen);  }  void bpf_gen__prog_load(struct bpf_gen *gen, -			struct bpf_prog_load_params *load_attr, int prog_idx) +			enum bpf_prog_type prog_type, const char *prog_name, +			const char *license, struct bpf_insn *insns, size_t insn_cnt, +			struct bpf_prog_load_opts *load_attr, int prog_idx)  { -	int attr_size = offsetofend(union bpf_attr, fd_array); -	int prog_load_attr, license, insns, func_info, line_info; +	int prog_load_attr, license_off, insns_off, func_info, line_info, core_relos; +	int attr_size = offsetofend(union bpf_attr, core_relo_rec_size);  	union bpf_attr attr;  	memset(&attr, 0, attr_size); -	pr_debug("gen: prog_load: type %d insns_cnt %zd\n", -		 load_attr->prog_type, load_attr->insn_cnt); +	pr_debug("gen: prog_load: type %d insns_cnt %zd progi_idx %d\n", +		 prog_type, insn_cnt, prog_idx);  	/* add license string to blob of bytes */ -	license = add_data(gen, load_attr->license, strlen(load_attr->license) + 1); +	license_off = add_data(gen, license, strlen(license) + 1);  	/* add insns to blob of bytes */ -	insns = add_data(gen, load_attr->insns, -			 load_attr->insn_cnt * sizeof(struct bpf_insn)); +	insns_off = add_data(gen, insns, insn_cnt * sizeof(struct bpf_insn)); -	attr.prog_type = load_attr->prog_type; +	attr.prog_type = prog_type;  	attr.expected_attach_type = load_attr->expected_attach_type;  	attr.attach_btf_id = load_attr->attach_btf_id;  	attr.prog_ifindex = load_attr->prog_ifindex;  	attr.kern_version = 0; -	attr.insn_cnt = (__u32)load_attr->insn_cnt; +	attr.insn_cnt = (__u32)insn_cnt;  	attr.prog_flags = load_attr->prog_flags;  	attr.func_info_rec_size = load_attr->func_info_rec_size; @@ -946,15 +964,19 @@ void bpf_gen__prog_load(struct bpf_gen *gen,  	line_info = add_data(gen, load_attr->line_info,  			     attr.line_info_cnt * attr.line_info_rec_size); -	memcpy(attr.prog_name, load_attr->name, -	       min((unsigned)strlen(load_attr->name), BPF_OBJ_NAME_LEN - 1)); +	attr.core_relo_rec_size = sizeof(struct bpf_core_relo); +	attr.core_relo_cnt = gen->core_relo_cnt; +	core_relos = add_data(gen, gen->core_relos, +			     attr.core_relo_cnt * attr.core_relo_rec_size); + +	libbpf_strlcpy(attr.prog_name, prog_name, sizeof(attr.prog_name));  	prog_load_attr = add_data(gen, &attr, attr_size);  	/* populate union bpf_attr with a pointer to license */ -	emit_rel_store(gen, attr_field(prog_load_attr, license), license); +	emit_rel_store(gen, attr_field(prog_load_attr, license), license_off);  	/* populate union bpf_attr with a pointer to instructions */ -	emit_rel_store(gen, attr_field(prog_load_attr, insns), insns); +	emit_rel_store(gen, attr_field(prog_load_attr, insns), insns_off);  	/* populate union bpf_attr with a pointer to func_info */  	emit_rel_store(gen, attr_field(prog_load_attr, func_info), func_info); @@ -962,6 +984,9 @@ void bpf_gen__prog_load(struct bpf_gen *gen,  	/* populate union bpf_attr with a pointer to line_info */  	emit_rel_store(gen, attr_field(prog_load_attr, line_info), line_info); +	/* populate union bpf_attr with a pointer to core_relos */ +	emit_rel_store(gen, attr_field(prog_load_attr, core_relos), core_relos); +  	/* populate union bpf_attr fd_array with a pointer to data where map_fds are saved */  	emit_rel_store(gen, attr_field(prog_load_attr, fd_array), gen->fd_array); @@ -986,15 +1011,17 @@ void bpf_gen__prog_load(struct bpf_gen *gen,  		emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_0, BPF_REG_7,  				      offsetof(union bpf_attr, attach_btf_obj_fd)));  	} -	emit_relos(gen, insns); +	emit_relos(gen, insns_off);  	/* emit PROG_LOAD command */  	emit_sys_bpf(gen, BPF_PROG_LOAD, prog_load_attr, attr_size);  	debug_ret(gen, "prog_load %s insn_cnt %d", attr.prog_name, attr.insn_cnt);  	/* successful or not, close btf module FDs used in extern ksyms and attach_btf_obj_fd */ -	cleanup_relos(gen, insns); -	if (gen->attach_kind) +	cleanup_relos(gen, insns_off); +	if (gen->attach_kind) {  		emit_sys_close_blob(gen,  				    attr_field(prog_load_attr, attach_btf_obj_fd)); +		gen->attach_kind = 0; +	}  	emit_check_err(gen);  	/* remember prog_fd in the stack, if successful */  	emit(gen, BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_7, @@ -1040,6 +1067,33 @@ void bpf_gen__map_update_elem(struct bpf_gen *gen, int map_idx, void *pvalue,  	emit_check_err(gen);  } +void bpf_gen__populate_outer_map(struct bpf_gen *gen, int outer_map_idx, int slot, +				 int inner_map_idx) +{ +	int attr_size = offsetofend(union bpf_attr, flags); +	int map_update_attr, key; +	union bpf_attr attr; + +	memset(&attr, 0, attr_size); +	pr_debug("gen: populate_outer_map: outer %d key %d inner %d\n", +		 outer_map_idx, slot, inner_map_idx); + +	key = add_data(gen, &slot, sizeof(slot)); + +	map_update_attr = add_data(gen, &attr, attr_size); +	move_blob2blob(gen, attr_field(map_update_attr, map_fd), 4, +		       blob_fd_array_off(gen, outer_map_idx)); +	emit_rel_store(gen, attr_field(map_update_attr, key), key); +	emit_rel_store(gen, attr_field(map_update_attr, value), +		       blob_fd_array_off(gen, inner_map_idx)); + +	/* emit MAP_UPDATE_ELEM command */ +	emit_sys_bpf(gen, BPF_MAP_UPDATE_ELEM, map_update_attr, attr_size); +	debug_ret(gen, "populate_outer_map outer %d key %d inner %d", +		  outer_map_idx, slot, inner_map_idx); +	emit_check_err(gen); +} +  void bpf_gen__map_freeze(struct bpf_gen *gen, int map_idx)  {  	int attr_size = offsetofend(union bpf_attr, map_fd); diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 7c74342bb668..7f10dd501a52 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -168,39 +168,24 @@ int libbpf_set_strict_mode(enum libbpf_strict_mode mode)  	return 0;  } -enum kern_feature_id { -	/* v4.14: kernel support for program & map names. */ -	FEAT_PROG_NAME, -	/* v5.2: kernel support for global data sections. */ -	FEAT_GLOBAL_DATA, -	/* BTF support */ -	FEAT_BTF, -	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ -	FEAT_BTF_FUNC, -	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */ -	FEAT_BTF_DATASEC, -	/* BTF_FUNC_GLOBAL is supported */ -	FEAT_BTF_GLOBAL_FUNC, -	/* BPF_F_MMAPABLE is supported for arrays */ -	FEAT_ARRAY_MMAP, -	/* kernel support for expected_attach_type in BPF_PROG_LOAD */ -	FEAT_EXP_ATTACH_TYPE, -	/* bpf_probe_read_{kernel,user}[_str] helpers */ -	FEAT_PROBE_READ_KERN, -	/* BPF_PROG_BIND_MAP is supported */ -	FEAT_PROG_BIND_MAP, -	/* Kernel support for module BTFs */ -	FEAT_MODULE_BTF, -	/* BTF_KIND_FLOAT support */ -	FEAT_BTF_FLOAT, -	/* BPF perf link support */ -	FEAT_PERF_LINK, -	/* BTF_KIND_DECL_TAG support */ -	FEAT_BTF_DECL_TAG, -	__FEAT_CNT, -}; +__u32 libbpf_major_version(void) +{ +	return LIBBPF_MAJOR_VERSION; +} -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +__u32 libbpf_minor_version(void) +{ +	return LIBBPF_MINOR_VERSION; +} + +const char *libbpf_version_string(void) +{ +#define __S(X) #X +#define _S(X) __S(X) +	return  "v" _S(LIBBPF_MAJOR_VERSION) "." _S(LIBBPF_MINOR_VERSION); +#undef _S +#undef __S +}  enum reloc_type {  	RELO_LD64, @@ -209,19 +194,25 @@ enum reloc_type {  	RELO_EXTERN_VAR,  	RELO_EXTERN_FUNC,  	RELO_SUBPROG_ADDR, +	RELO_CORE,  };  struct reloc_desc {  	enum reloc_type type;  	int insn_idx; -	int map_idx; -	int sym_off; +	union { +		const struct bpf_core_relo *core_relo; /* used when type == RELO_CORE */ +		struct { +			int map_idx; +			int sym_off; +		}; +	};  };  struct bpf_sec_def;  typedef int (*init_fn_t)(struct bpf_program *prog, long cookie); -typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_params *attr, long cookie); +typedef int (*preload_fn_t)(struct bpf_program *prog, struct bpf_prog_load_opts *opts, long cookie);  typedef struct bpf_link *(*attach_fn_t)(const struct bpf_program *prog, long cookie);  /* stored as sec_def->cookie for all libbpf-supported SEC()s */ @@ -304,7 +295,11 @@ struct bpf_program {  	struct reloc_desc *reloc_desc;  	int nr_reloc; -	int log_level; + +	/* BPF verifier log settings */ +	char *log_buf; +	size_t log_size; +	__u32 log_level;  	struct {  		int nr; @@ -400,6 +395,7 @@ struct bpf_map {  	char *pin_path;  	bool pinned;  	bool reused; +	bool skipped;  	__u64 map_extra;  }; @@ -546,6 +542,11 @@ struct bpf_object {  	size_t btf_module_cnt;  	size_t btf_module_cap; +	/* optional log settings passed to BPF_BTF_LOAD and BPF_PROG_LOAD commands */ +	char *log_buf; +	size_t log_size; +	__u32 log_level; +  	void *priv;  	bpf_object_clear_priv_t clear_priv; @@ -681,6 +682,9 @@ bpf_object__init_prog(struct bpf_object *obj, struct bpf_program *prog,  	prog->instances.fds = NULL;  	prog->instances.nr = -1; +	/* inherit object's log_level */ +	prog->log_level = obj->log_level; +  	prog->sec_name = strdup(sec_name);  	if (!prog->sec_name)  		goto errout; @@ -791,11 +795,36 @@ bpf_object__add_programs(struct bpf_object *obj, Elf_Data *sec_data,  	return 0;  } -static __u32 get_kernel_version(void) +__u32 get_kernel_version(void)  { +	/* On Ubuntu LINUX_VERSION_CODE doesn't correspond to info.release, +	 * but Ubuntu provides /proc/version_signature file, as described at +	 * https://ubuntu.com/kernel, with an example contents below, which we +	 * can use to get a proper LINUX_VERSION_CODE. +	 * +	 *   Ubuntu 5.4.0-12.15-generic 5.4.8 +	 * +	 * In the above, 5.4.8 is what kernel is actually expecting, while +	 * uname() call will return 5.4.0 in info.release. +	 */ +	const char *ubuntu_kver_file = "/proc/version_signature";  	__u32 major, minor, patch;  	struct utsname info; +	if (access(ubuntu_kver_file, R_OK) == 0) { +		FILE *f; + +		f = fopen(ubuntu_kver_file, "r"); +		if (f) { +			if (fscanf(f, "%*s %*s %d.%d.%d\n", &major, &minor, &patch) == 3) { +				fclose(f); +				return KERNEL_VERSION(major, minor, patch); +			} +			fclose(f); +		} +		/* something went wrong, fall back to uname() approach */ +	} +  	uname(&info);  	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3)  		return 0; @@ -1161,12 +1190,10 @@ static struct bpf_object *bpf_object__new(const char *path,  	strcpy(obj->path, path);  	if (obj_name) { -		strncpy(obj->name, obj_name, sizeof(obj->name) - 1); -		obj->name[sizeof(obj->name) - 1] = 0; +		libbpf_strlcpy(obj->name, obj_name, sizeof(obj->name));  	} else {  		/* Using basename() GNU version which doesn't modify arg. */ -		strncpy(obj->name, basename((void *)path), -			sizeof(obj->name) - 1); +		libbpf_strlcpy(obj->name, basename((void *)path), sizeof(obj->name));  		end = strchr(obj->name, '.');  		if (end)  			*end = 0; @@ -1318,7 +1345,10 @@ static int bpf_object__check_endianness(struct bpf_object *obj)  static int  bpf_object__init_license(struct bpf_object *obj, void *data, size_t size)  { -	memcpy(obj->license, data, min(size, sizeof(obj->license) - 1)); +	/* libbpf_strlcpy() only copies first N - 1 bytes, so size + 1 won't +	 * go over allowed ELF data section buffer +	 */ +	libbpf_strlcpy(obj->license, data, min(size + 1, sizeof(obj->license)));  	pr_debug("license of %s is %s\n", obj->path, obj->license);  	return 0;  } @@ -2076,6 +2106,7 @@ static const char *__btf_kind_str(__u16 kind)  	case BTF_KIND_DATASEC: return "datasec";  	case BTF_KIND_FLOAT: return "float";  	case BTF_KIND_DECL_TAG: return "decl_tag"; +	case BTF_KIND_TYPE_TAG: return "type_tag";  	default: return "unknown";  	}  } @@ -2255,6 +2286,9 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,  			map_def->parts |= MAP_DEF_VALUE_SIZE | MAP_DEF_VALUE_TYPE;  		}  		else if (strcmp(name, "values") == 0) { +			bool is_map_in_map = bpf_map_type__is_map_in_map(map_def->map_type); +			bool is_prog_array = map_def->map_type == BPF_MAP_TYPE_PROG_ARRAY; +			const char *desc = is_map_in_map ? "map-in-map inner" : "prog-array value";  			char inner_map_name[128];  			int err; @@ -2268,8 +2302,8 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,  					map_name, name);  				return -EINVAL;  			} -			if (!bpf_map_type__is_map_in_map(map_def->map_type)) { -				pr_warn("map '%s': should be map-in-map.\n", +			if (!is_map_in_map && !is_prog_array) { +				pr_warn("map '%s': should be map-in-map or prog-array.\n",  					map_name);  				return -ENOTSUP;  			} @@ -2281,22 +2315,30 @@ int parse_btf_map_def(const char *map_name, struct btf *btf,  			map_def->value_size = 4;  			t = btf__type_by_id(btf, m->type);  			if (!t) { -				pr_warn("map '%s': map-in-map inner type [%d] not found.\n", -					map_name, m->type); +				pr_warn("map '%s': %s type [%d] not found.\n", +					map_name, desc, m->type);  				return -EINVAL;  			}  			if (!btf_is_array(t) || btf_array(t)->nelems) { -				pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n", -					map_name); +				pr_warn("map '%s': %s spec is not a zero-sized array.\n", +					map_name, desc);  				return -EINVAL;  			}  			t = skip_mods_and_typedefs(btf, btf_array(t)->type, NULL);  			if (!btf_is_ptr(t)) { -				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n", -					map_name, btf_kind_str(t)); +				pr_warn("map '%s': %s def is of unexpected kind %s.\n", +					map_name, desc, btf_kind_str(t));  				return -EINVAL;  			}  			t = skip_mods_and_typedefs(btf, t->type, NULL); +			if (is_prog_array) { +				if (!btf_is_func_proto(t)) { +					pr_warn("map '%s': prog-array value def is of unexpected kind %s.\n", +						map_name, btf_kind_str(t)); +					return -EINVAL; +				} +				continue; +			}  			if (!btf_is_struct(t)) {  				pr_warn("map '%s': map-in-map inner def is of unexpected kind %s.\n",  					map_name, btf_kind_str(t)); @@ -2588,8 +2630,10 @@ static bool btf_needs_sanitization(struct bpf_object *obj)  	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);  	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);  	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); +	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG); -	return !has_func || !has_datasec || !has_func_global || !has_float || !has_decl_tag; +	return !has_func || !has_datasec || !has_func_global || !has_float || +	       !has_decl_tag || !has_type_tag;  }  static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf) @@ -2599,6 +2643,7 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)  	bool has_float = kernel_supports(obj, FEAT_BTF_FLOAT);  	bool has_func = kernel_supports(obj, FEAT_BTF_FUNC);  	bool has_decl_tag = kernel_supports(obj, FEAT_BTF_DECL_TAG); +	bool has_type_tag = kernel_supports(obj, FEAT_BTF_TYPE_TAG);  	struct btf_type *t;  	int i, j, vlen; @@ -2657,6 +2702,10 @@ static void bpf_object__sanitize_btf(struct bpf_object *obj, struct btf *btf)  			 */  			t->name_off = 0;  			t->info = BTF_INFO_ENC(BTF_KIND_STRUCT, 0, 0); +		} else if (!has_type_tag && btf_is_type_tag(t)) { +			/* replace TYPE_TAG with a CONST */ +			t->name_off = 0; +			t->info = BTF_INFO_ENC(BTF_KIND_CONST, 0, 0);  		}  	}  } @@ -2752,13 +2801,12 @@ static int btf_fixup_datasec(struct bpf_object *obj, struct btf *btf,  	for (i = 0, vsi = btf_var_secinfos(t); i < vars; i++, vsi++) {  		t_var = btf__type_by_id(btf, vsi->type); -		var = btf_var(t_var); - -		if (!btf_is_var(t_var)) { +		if (!t_var || !btf_is_var(t_var)) {  			pr_debug("Non-VAR type seen in section %s\n", name);  			return -EINVAL;  		} +		var = btf_var(t_var);  		if (var->linkage == BTF_VAR_STATIC)  			continue; @@ -2972,7 +3020,9 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  		 */  		btf__set_fd(kern_btf, 0);  	} else { -		err = btf__load_into_kernel(kern_btf); +		/* currently BPF_BTF_LOAD only supports log_level 1 */ +		err = btf_load_into_kernel(kern_btf, obj->log_buf, obj->log_size, +					   obj->log_level ? 1 : 0);  	}  	if (sanitize) {  		if (!err) { @@ -3191,11 +3241,11 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  	Elf_Scn *scn;  	Elf64_Shdr *sh; -	/* ELF section indices are 1-based, so allocate +1 element to keep -	 * indexing simple. Also include 0th invalid section into sec_cnt for -	 * simpler and more traditional iteration logic. +	/* ELF section indices are 0-based, but sec #0 is special "invalid" +	 * section. e_shnum does include sec #0, so e_shnum is the necessary +	 * size of an array to keep all the sections.  	 */ -	obj->efile.sec_cnt = 1 + obj->efile.ehdr->e_shnum; +	obj->efile.sec_cnt = obj->efile.ehdr->e_shnum;  	obj->efile.secs = calloc(obj->efile.sec_cnt, sizeof(*obj->efile.secs));  	if (!obj->efile.secs)  		return -ENOMEM; @@ -3271,8 +3321,12 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  		} else if (strcmp(name, MAPS_ELF_SEC) == 0) {  			obj->efile.btf_maps_shndx = idx;  		} else if (strcmp(name, BTF_ELF_SEC) == 0) { +			if (sh->sh_type != SHT_PROGBITS) +				return -LIBBPF_ERRNO__FORMAT;  			btf_data = data;  		} else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { +			if (sh->sh_type != SHT_PROGBITS) +				return -LIBBPF_ERRNO__FORMAT;  			btf_ext_data = data;  		} else if (sh->sh_type == SHT_SYMTAB) {  			/* already processed during the first pass above */ @@ -3303,6 +3357,10 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  		} else if (sh->sh_type == SHT_REL) {  			int targ_sec_idx = sh->sh_info; /* points to other section */ +			if (sh->sh_entsize != sizeof(Elf64_Rel) || +			    targ_sec_idx >= obj->efile.sec_cnt) +				return -LIBBPF_ERRNO__FORMAT; +  			/* Only do relo for section with exec instructions */  			if (!section_have_execinstr(obj, targ_sec_idx) &&  			    strcmp(name, ".rel" STRUCT_OPS_SEC) && @@ -3333,7 +3391,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)  	/* sort BPF programs by section name and in-section instruction offset  	 * for faster search */ -	qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs); +	if (obj->nr_programs) +		qsort(obj->programs, obj->nr_programs, sizeof(*obj->programs), cmp_progs);  	return bpf_object__init_btf(obj, btf_data, btf_ext_data);  } @@ -3555,7 +3614,7 @@ static int bpf_object__collect_externs(struct bpf_object *obj)  	scn = elf_sec_by_idx(obj, obj->efile.symbols_shndx);  	sh = elf_sec_hdr(obj, scn); -	if (!sh) +	if (!sh || sh->sh_entsize != sizeof(Elf64_Sym))  		return -LIBBPF_ERRNO__FORMAT;  	dummy_var_btf_id = add_dummy_ksym_var(obj->btf); @@ -4022,7 +4081,7 @@ static int  bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Data *data)  {  	const char *relo_sec_name, *sec_name; -	size_t sec_idx = shdr->sh_info; +	size_t sec_idx = shdr->sh_info, sym_idx;  	struct bpf_program *prog;  	struct reloc_desc *relos;  	int err, i, nrels; @@ -4033,6 +4092,9 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat  	Elf64_Sym *sym;  	Elf64_Rel *rel; +	if (sec_idx >= obj->efile.sec_cnt) +		return -EINVAL; +  	scn = elf_sec_by_idx(obj, sec_idx);  	scn_data = elf_sec_data(obj, scn); @@ -4052,16 +4114,23 @@ bpf_object__collect_prog_relos(struct bpf_object *obj, Elf64_Shdr *shdr, Elf_Dat  			return -LIBBPF_ERRNO__FORMAT;  		} -		sym = elf_sym_by_idx(obj, ELF64_R_SYM(rel->r_info)); +		sym_idx = ELF64_R_SYM(rel->r_info); +		sym = elf_sym_by_idx(obj, sym_idx);  		if (!sym) { -			pr_warn("sec '%s': symbol 0x%zx not found for relo #%d\n", -				relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); +			pr_warn("sec '%s': symbol #%zu not found for relo #%d\n", +				relo_sec_name, sym_idx, i); +			return -LIBBPF_ERRNO__FORMAT; +		} + +		if (sym->st_shndx >= obj->efile.sec_cnt) { +			pr_warn("sec '%s': corrupted symbol #%zu pointing to invalid section #%zu for relo #%d\n", +				relo_sec_name, sym_idx, (size_t)sym->st_shndx, i);  			return -LIBBPF_ERRNO__FORMAT;  		}  		if (rel->r_offset % BPF_INSN_SZ || rel->r_offset >= scn_data->d_size) {  			pr_warn("sec '%s': invalid offset 0x%zx for relo #%d\n", -				relo_sec_name, (size_t)ELF64_R_SYM(rel->r_info), i); +				relo_sec_name, (size_t)rel->r_offset, i);  			return -LIBBPF_ERRNO__FORMAT;  		} @@ -4265,30 +4334,24 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries)  static int  bpf_object__probe_loading(struct bpf_object *obj)  { -	struct bpf_load_program_attr attr;  	char *cp, errmsg[STRERR_BUFSIZE];  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; -	int ret; +	int ret, insn_cnt = ARRAY_SIZE(insns);  	if (obj->gen_loader)  		return 0; -	/* make sure basic loading works */ - -	memset(&attr, 0, sizeof(attr)); -	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; -	attr.insns = insns; -	attr.insns_cnt = ARRAY_SIZE(insns); -	attr.license = "GPL"; +	ret = bump_rlimit_memlock(); +	if (ret) +		pr_warn("Failed to bump RLIMIT_MEMLOCK (err = %d), you might need to do it explicitly!\n", ret); -	ret = bpf_load_program_xattr(&attr, NULL, 0); -	if (ret < 0) { -		attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; -		ret = bpf_load_program_xattr(&attr, NULL, 0); -	} +	/* make sure basic loading works */ +	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL); +	if (ret < 0) +		ret = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", insns, insn_cnt, NULL);  	if (ret < 0) {  		ret = errno;  		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4312,29 +4375,19 @@ static int probe_fd(int fd)  static int probe_kern_prog_name(void)  { -	struct bpf_load_program_attr attr;  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; -	int ret; +	int ret, insn_cnt = ARRAY_SIZE(insns);  	/* make sure loading with name works */ - -	memset(&attr, 0, sizeof(attr)); -	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; -	attr.insns = insns; -	attr.insns_cnt = ARRAY_SIZE(insns); -	attr.license = "GPL"; -	attr.name = "test"; -	ret = bpf_load_program_xattr(&attr, NULL, 0); +	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, "test", "GPL", insns, insn_cnt, NULL);  	return probe_fd(ret);  }  static int probe_kern_global_data(void)  { -	struct bpf_load_program_attr prg_attr; -	struct bpf_create_map_attr map_attr;  	char *cp, errmsg[STRERR_BUFSIZE];  	struct bpf_insn insns[] = {  		BPF_LD_MAP_VALUE(BPF_REG_1, 0, 16), @@ -4342,15 +4395,9 @@ static int probe_kern_global_data(void)  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; -	int ret, map; - -	memset(&map_attr, 0, sizeof(map_attr)); -	map_attr.map_type = BPF_MAP_TYPE_ARRAY; -	map_attr.key_size = sizeof(int); -	map_attr.value_size = 32; -	map_attr.max_entries = 1; +	int ret, map, insn_cnt = ARRAY_SIZE(insns); -	map = bpf_create_map_xattr(&map_attr); +	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);  	if (map < 0) {  		ret = -errno;  		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4361,13 +4408,7 @@ static int probe_kern_global_data(void)  	insns[0].imm = map; -	memset(&prg_attr, 0, sizeof(prg_attr)); -	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; -	prg_attr.insns = insns; -	prg_attr.insns_cnt = ARRAY_SIZE(insns); -	prg_attr.license = "GPL"; - -	ret = bpf_load_program_xattr(&prg_attr, NULL, 0); +	ret = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);  	close(map);  	return probe_fd(ret);  } @@ -4468,45 +4509,51 @@ static int probe_kern_btf_decl_tag(void)  					     strs, sizeof(strs)));  } -static int probe_kern_array_mmap(void) +static int probe_kern_btf_type_tag(void)  { -	struct bpf_create_map_attr attr = { -		.map_type = BPF_MAP_TYPE_ARRAY, -		.map_flags = BPF_F_MMAPABLE, -		.key_size = sizeof(int), -		.value_size = sizeof(int), -		.max_entries = 1, +	static const char strs[] = "\0tag"; +	__u32 types[] = { +		/* int */ +		BTF_TYPE_INT_ENC(0, BTF_INT_SIGNED, 0, 32, 4),		/* [1] */ +		/* attr */ +		BTF_TYPE_TYPE_TAG_ENC(1, 1),				/* [2] */ +		/* ptr */ +		BTF_TYPE_ENC(0, BTF_INFO_ENC(BTF_KIND_PTR, 0, 0), 2),	/* [3] */  	}; -	return probe_fd(bpf_create_map_xattr(&attr)); +	return probe_fd(libbpf__load_raw_btf((char *)types, sizeof(types), +					     strs, sizeof(strs))); +} + +static int probe_kern_array_mmap(void) +{ +	LIBBPF_OPTS(bpf_map_create_opts, opts, .map_flags = BPF_F_MMAPABLE); +	int fd; + +	fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), sizeof(int), 1, &opts); +	return probe_fd(fd);  }  static int probe_kern_exp_attach_type(void)  { -	struct bpf_load_program_attr attr; +	LIBBPF_OPTS(bpf_prog_load_opts, opts, .expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE);  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; +	int fd, insn_cnt = ARRAY_SIZE(insns); -	memset(&attr, 0, sizeof(attr));  	/* use any valid combination of program type and (optional)  	 * non-zero expected attach type (i.e., not a BPF_CGROUP_INET_INGRESS)  	 * to see if kernel supports expected_attach_type field for  	 * BPF_PROG_LOAD command  	 */ -	attr.prog_type = BPF_PROG_TYPE_CGROUP_SOCK; -	attr.expected_attach_type = BPF_CGROUP_INET_SOCK_CREATE; -	attr.insns = insns; -	attr.insns_cnt = ARRAY_SIZE(insns); -	attr.license = "GPL"; - -	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); +	fd = bpf_prog_load(BPF_PROG_TYPE_CGROUP_SOCK, NULL, "GPL", insns, insn_cnt, &opts); +	return probe_fd(fd);  }  static int probe_kern_probe_read_kernel(void)  { -	struct bpf_load_program_attr attr;  	struct bpf_insn insns[] = {  		BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),	/* r1 = r10 (fp) */  		BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),	/* r1 += -8 */ @@ -4515,34 +4562,22 @@ static int probe_kern_probe_read_kernel(void)  		BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_probe_read_kernel),  		BPF_EXIT_INSN(),  	}; +	int fd, insn_cnt = ARRAY_SIZE(insns); -	memset(&attr, 0, sizeof(attr)); -	attr.prog_type = BPF_PROG_TYPE_KPROBE; -	attr.insns = insns; -	attr.insns_cnt = ARRAY_SIZE(insns); -	attr.license = "GPL"; - -	return probe_fd(bpf_load_program_xattr(&attr, NULL, 0)); +	fd = bpf_prog_load(BPF_PROG_TYPE_KPROBE, NULL, "GPL", insns, insn_cnt, NULL); +	return probe_fd(fd);  }  static int probe_prog_bind_map(void)  { -	struct bpf_load_program_attr prg_attr; -	struct bpf_create_map_attr map_attr;  	char *cp, errmsg[STRERR_BUFSIZE];  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	}; -	int ret, map, prog; - -	memset(&map_attr, 0, sizeof(map_attr)); -	map_attr.map_type = BPF_MAP_TYPE_ARRAY; -	map_attr.key_size = sizeof(int); -	map_attr.value_size = 32; -	map_attr.max_entries = 1; +	int ret, map, prog, insn_cnt = ARRAY_SIZE(insns); -	map = bpf_create_map_xattr(&map_attr); +	map = bpf_map_create(BPF_MAP_TYPE_ARRAY, NULL, sizeof(int), 32, 1, NULL);  	if (map < 0) {  		ret = -errno;  		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg)); @@ -4551,13 +4586,7 @@ static int probe_prog_bind_map(void)  		return ret;  	} -	memset(&prg_attr, 0, sizeof(prg_attr)); -	prg_attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER; -	prg_attr.insns = insns; -	prg_attr.insns_cnt = ARRAY_SIZE(insns); -	prg_attr.license = "GPL"; - -	prog = bpf_load_program_xattr(&prg_attr, NULL, 0); +	prog = bpf_prog_load(BPF_PROG_TYPE_SOCKET_FILTER, NULL, "GPL", insns, insn_cnt, NULL);  	if (prog < 0) {  		close(map);  		return 0; @@ -4602,19 +4631,14 @@ static int probe_module_btf(void)  static int probe_perf_link(void)  { -	struct bpf_load_program_attr attr;  	struct bpf_insn insns[] = {  		BPF_MOV64_IMM(BPF_REG_0, 0),  		BPF_EXIT_INSN(),  	};  	int prog_fd, link_fd, err; -	memset(&attr, 0, sizeof(attr)); -	attr.prog_type = BPF_PROG_TYPE_TRACEPOINT; -	attr.insns = insns; -	attr.insns_cnt = ARRAY_SIZE(insns); -	attr.license = "GPL"; -	prog_fd = bpf_load_program_xattr(&attr, NULL, 0); +	prog_fd = bpf_prog_load(BPF_PROG_TYPE_TRACEPOINT, NULL, "GPL", +				insns, ARRAY_SIZE(insns), NULL);  	if (prog_fd < 0)  		return -errno; @@ -4687,14 +4711,20 @@ static struct kern_feature_desc {  	[FEAT_BTF_DECL_TAG] = {  		"BTF_KIND_DECL_TAG support", probe_kern_btf_decl_tag,  	}, +	[FEAT_BTF_TYPE_TAG] = { +		"BTF_KIND_TYPE_TAG support", probe_kern_btf_type_tag, +	}, +	[FEAT_MEMCG_ACCOUNT] = { +		"memcg-based memory accounting", probe_memcg_account, +	},  }; -static bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id) +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id)  {  	struct kern_feature_desc *feat = &feature_probes[feat_id];  	int ret; -	if (obj->gen_loader) +	if (obj && obj->gen_loader)  		/* To generate loader program assume the latest kernel  		 * to avoid doing extra prog_load, map_create syscalls.  		 */ @@ -4821,19 +4851,16 @@ static void bpf_map__destroy(struct bpf_map *map);  static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, bool is_inner)  { -	struct bpf_create_map_params create_attr; +	LIBBPF_OPTS(bpf_map_create_opts, create_attr);  	struct bpf_map_def *def = &map->def; +	const char *map_name = NULL; +	__u32 max_entries;  	int err = 0; -	memset(&create_attr, 0, sizeof(create_attr)); -  	if (kernel_supports(obj, FEAT_PROG_NAME)) -		create_attr.name = map->name; +		map_name = map->name;  	create_attr.map_ifindex = map->map_ifindex; -	create_attr.map_type = def->type;  	create_attr.map_flags = def->map_flags; -	create_attr.key_size = def->key_size; -	create_attr.value_size = def->value_size;  	create_attr.numa_node = map->numa_node;  	create_attr.map_extra = map->map_extra; @@ -4847,18 +4874,14 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  			return nr_cpus;  		}  		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus); -		create_attr.max_entries = nr_cpus; +		max_entries = nr_cpus;  	} else { -		create_attr.max_entries = def->max_entries; +		max_entries = def->max_entries;  	}  	if (bpf_map__is_struct_ops(map)) -		create_attr.btf_vmlinux_value_type_id = -			map->btf_vmlinux_value_type_id; +		create_attr.btf_vmlinux_value_type_id = map->btf_vmlinux_value_type_id; -	create_attr.btf_fd = 0; -	create_attr.btf_key_type_id = 0; -	create_attr.btf_value_type_id = 0;  	if (obj->btf && btf__fd(obj->btf) >= 0 && !bpf_map_find_btf_info(obj, map)) {  		create_attr.btf_fd = btf__fd(obj->btf);  		create_attr.btf_key_type_id = map->btf_key_type_id; @@ -4904,13 +4927,17 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	}  	if (obj->gen_loader) { -		bpf_gen__map_create(obj->gen_loader, &create_attr, is_inner ? -1 : map - obj->maps); +		bpf_gen__map_create(obj->gen_loader, def->type, map_name, +				    def->key_size, def->value_size, max_entries, +				    &create_attr, is_inner ? -1 : map - obj->maps);  		/* Pretend to have valid FD to pass various fd >= 0 checks.  		 * This fd == 0 will not be used with any syscall and will be reset to -1 eventually.  		 */  		map->fd = 0;  	} else { -		map->fd = libbpf__bpf_create_map_xattr(&create_attr); +		map->fd = bpf_map_create(def->type, map_name, +					 def->key_size, def->value_size, +					 max_entries, &create_attr);  	}  	if (map->fd < 0 && (create_attr.btf_key_type_id ||  			    create_attr.btf_value_type_id)) { @@ -4925,7 +4952,9 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  		create_attr.btf_value_type_id = 0;  		map->btf_key_type_id = 0;  		map->btf_value_type_id = 0; -		map->fd = libbpf__bpf_create_map_xattr(&create_attr); +		map->fd = bpf_map_create(def->type, map_name, +					 def->key_size, def->value_size, +					 max_entries, &create_attr);  	}  	err = map->fd < 0 ? -errno : 0; @@ -4940,7 +4969,7 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map, b  	return err;  } -static int init_map_slots(struct bpf_object *obj, struct bpf_map *map) +static int init_map_in_map_slots(struct bpf_object *obj, struct bpf_map *map)  {  	const struct bpf_map *targ_map;  	unsigned int i; @@ -4952,18 +4981,18 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)  		targ_map = map->init_slots[i];  		fd = bpf_map__fd(targ_map); +  		if (obj->gen_loader) { -			pr_warn("// TODO map_update_elem: idx %td key %d value==map_idx %td\n", -				map - obj->maps, i, targ_map - obj->maps); -			return -ENOTSUP; +			bpf_gen__populate_outer_map(obj->gen_loader, +						    map - obj->maps, i, +						    targ_map - obj->maps);  		} else {  			err = bpf_map_update_elem(map->fd, &i, &fd, 0);  		}  		if (err) {  			err = -errno;  			pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n", -				map->name, i, targ_map->name, -				fd, err); +				map->name, i, targ_map->name, fd, err);  			return err;  		}  		pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n", @@ -4976,6 +5005,59 @@ static int init_map_slots(struct bpf_object *obj, struct bpf_map *map)  	return 0;  } +static int init_prog_array_slots(struct bpf_object *obj, struct bpf_map *map) +{ +	const struct bpf_program *targ_prog; +	unsigned int i; +	int fd, err; + +	if (obj->gen_loader) +		return -ENOTSUP; + +	for (i = 0; i < map->init_slots_sz; i++) { +		if (!map->init_slots[i]) +			continue; + +		targ_prog = map->init_slots[i]; +		fd = bpf_program__fd(targ_prog); + +		err = bpf_map_update_elem(map->fd, &i, &fd, 0); +		if (err) { +			err = -errno; +			pr_warn("map '%s': failed to initialize slot [%d] to prog '%s' fd=%d: %d\n", +				map->name, i, targ_prog->name, fd, err); +			return err; +		} +		pr_debug("map '%s': slot [%d] set to prog '%s' fd=%d\n", +			 map->name, i, targ_prog->name, fd); +	} + +	zfree(&map->init_slots); +	map->init_slots_sz = 0; + +	return 0; +} + +static int bpf_object_init_prog_arrays(struct bpf_object *obj) +{ +	struct bpf_map *map; +	int i, err; + +	for (i = 0; i < obj->nr_maps; i++) { +		map = &obj->maps[i]; + +		if (!map->init_slots_sz || map->def.type != BPF_MAP_TYPE_PROG_ARRAY) +			continue; + +		err = init_prog_array_slots(obj, map); +		if (err < 0) { +			zclose(map->fd); +			return err; +		} +	} +	return 0; +} +  static int  bpf_object__create_maps(struct bpf_object *obj)  { @@ -4988,6 +5070,26 @@ bpf_object__create_maps(struct bpf_object *obj)  	for (i = 0; i < obj->nr_maps; i++) {  		map = &obj->maps[i]; +		/* To support old kernels, we skip creating global data maps +		 * (.rodata, .data, .kconfig, etc); later on, during program +		 * loading, if we detect that at least one of the to-be-loaded +		 * programs is referencing any global data map, we'll error +		 * out with program name and relocation index logged. +		 * This approach allows to accommodate Clang emitting +		 * unnecessary .rodata.str1.1 sections for string literals, +		 * but also it allows to have CO-RE applications that use +		 * global variables in some of BPF programs, but not others. +		 * If those global variable-using programs are not loaded at +		 * runtime due to bpf_program__set_autoload(prog, false), +		 * bpf_object loading will succeed just fine even on old +		 * kernels. +		 */ +		if (bpf_map__is_internal(map) && +		    !kernel_supports(obj, FEAT_GLOBAL_DATA)) { +			map->skipped = true; +			continue; +		} +  		retried = false;  retry:  		if (map->pin_path) { @@ -5024,8 +5126,8 @@ retry:  				}  			} -			if (map->init_slots_sz) { -				err = init_map_slots(obj, map); +			if (map->init_slots_sz && map->def.type != BPF_MAP_TYPE_PROG_ARRAY) { +				err = init_map_in_map_slots(obj, map);  				if (err < 0) {  					zclose(map->fd);  					goto err_out; @@ -5097,15 +5199,18 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,  			      struct bpf_core_cand_list *cands)  {  	struct bpf_core_cand *new_cands, *cand; -	const struct btf_type *t; -	const char *targ_name; +	const struct btf_type *t, *local_t; +	const char *targ_name, *local_name;  	size_t targ_essent_len;  	int n, i; +	local_t = btf__type_by_id(local_cand->btf, local_cand->id); +	local_name = btf__str_by_offset(local_cand->btf, local_t->name_off); +  	n = btf__type_cnt(targ_btf);  	for (i = targ_start_id; i < n; i++) {  		t = btf__type_by_id(targ_btf, i); -		if (btf_kind(t) != btf_kind(local_cand->t)) +		if (btf_kind(t) != btf_kind(local_t))  			continue;  		targ_name = btf__name_by_offset(targ_btf, t->name_off); @@ -5116,12 +5221,12 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,  		if (targ_essent_len != local_essent_len)  			continue; -		if (strncmp(local_cand->name, targ_name, local_essent_len) != 0) +		if (strncmp(local_name, targ_name, local_essent_len) != 0)  			continue;  		pr_debug("CO-RE relocating [%d] %s %s: found target candidate [%d] %s %s in [%s]\n", -			 local_cand->id, btf_kind_str(local_cand->t), -			 local_cand->name, i, btf_kind_str(t), targ_name, +			 local_cand->id, btf_kind_str(local_t), +			 local_name, i, btf_kind_str(t), targ_name,  			 targ_btf_name);  		new_cands = libbpf_reallocarray(cands->cands, cands->len + 1,  					      sizeof(*cands->cands)); @@ -5130,8 +5235,6 @@ static int bpf_core_add_cands(struct bpf_core_cand *local_cand,  		cand = &new_cands[cands->len];  		cand->btf = targ_btf; -		cand->t = t; -		cand->name = targ_name;  		cand->id = i;  		cands->cands = new_cands; @@ -5238,18 +5341,21 @@ bpf_core_find_cands(struct bpf_object *obj, const struct btf *local_btf, __u32 l  	struct bpf_core_cand local_cand = {};  	struct bpf_core_cand_list *cands;  	const struct btf *main_btf; +	const struct btf_type *local_t; +	const char *local_name;  	size_t local_essent_len;  	int err, i;  	local_cand.btf = local_btf; -	local_cand.t = btf__type_by_id(local_btf, local_type_id); -	if (!local_cand.t) +	local_cand.id = local_type_id; +	local_t = btf__type_by_id(local_btf, local_type_id); +	if (!local_t)  		return ERR_PTR(-EINVAL); -	local_cand.name = btf__name_by_offset(local_btf, local_cand.t->name_off); -	if (str_is_empty(local_cand.name)) +	local_name = btf__name_by_offset(local_btf, local_t->name_off); +	if (str_is_empty(local_name))  		return ERR_PTR(-EINVAL); -	local_essent_len = bpf_core_essential_name_len(local_cand.name); +	local_essent_len = bpf_core_essential_name_len(local_name);  	cands = calloc(1, sizeof(*cands));  	if (!cands) @@ -5399,12 +5505,31 @@ static void *u32_as_hash_key(__u32 x)  	return (void *)(uintptr_t)x;  } +static int record_relo_core(struct bpf_program *prog, +			    const struct bpf_core_relo *core_relo, int insn_idx) +{ +	struct reloc_desc *relos, *relo; + +	relos = libbpf_reallocarray(prog->reloc_desc, +				    prog->nr_reloc + 1, sizeof(*relos)); +	if (!relos) +		return -ENOMEM; +	relo = &relos[prog->nr_reloc]; +	relo->type = RELO_CORE; +	relo->insn_idx = insn_idx; +	relo->core_relo = core_relo; +	prog->reloc_desc = relos; +	prog->nr_reloc++; +	return 0; +} +  static int bpf_core_apply_relo(struct bpf_program *prog,  			       const struct bpf_core_relo *relo,  			       int relo_idx,  			       const struct btf *local_btf,  			       struct hashmap *cand_cache)  { +	struct bpf_core_spec specs_scratch[3] = {};  	const void *type_key = u32_as_hash_key(relo->type_id);  	struct bpf_core_cand_list *cands = NULL;  	const char *prog_name = prog->name; @@ -5435,13 +5560,15 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  		return -EINVAL;  	if (prog->obj->gen_loader) { -		pr_warn("// TODO core_relo: prog %td insn[%d] %s kind %d\n", +		const char *spec_str = btf__name_by_offset(local_btf, relo->access_str_off); + +		pr_debug("record_relo_core: prog %td insn[%d] %s %s %s final insn_idx %d\n",  			prog - prog->obj->programs, relo->insn_off / 8, -			local_name, relo->kind); -		return -ENOTSUP; +			btf_kind_str(local_type), local_name, spec_str, insn_idx); +		return record_relo_core(prog, relo, insn_idx);  	} -	if (relo->kind != BPF_TYPE_ID_LOCAL && +	if (relo->kind != BPF_CORE_TYPE_ID_LOCAL &&  	    !hashmap__find(cand_cache, type_key, (void **)&cands)) {  		cands = bpf_core_find_cands(prog->obj, local_btf, local_id);  		if (IS_ERR(cands)) { @@ -5457,7 +5584,8 @@ static int bpf_core_apply_relo(struct bpf_program *prog,  		}  	} -	return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, relo_idx, local_btf, cands); +	return bpf_core_apply_relo_insn(prog_name, insn, insn_idx, relo, +					relo_idx, local_btf, cands, specs_scratch);  }  static int @@ -5587,6 +5715,13 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)  				insn[0].src_reg = BPF_PSEUDO_MAP_IDX_VALUE;  				insn[0].imm = relo->map_idx;  			} else { +				const struct bpf_map *map = &obj->maps[relo->map_idx]; + +				if (map->skipped) { +					pr_warn("prog '%s': relo #%d: kernel doesn't support global data\n", +						prog->name, i); +					return -ENOTSUP; +				}  				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE;  				insn[0].imm = obj->maps[relo->map_idx].fd;  			} @@ -5635,6 +5770,9 @@ bpf_object__relocate_data(struct bpf_object *obj, struct bpf_program *prog)  		case RELO_CALL:  			/* handled already */  			break; +		case RELO_CORE: +			/* will be handled by bpf_program_record_relos() */ +			break;  		default:  			pr_warn("prog '%s': relo #%d: bad relo type %d\n",  				prog->name, i, relo->type); @@ -5798,6 +5936,8 @@ static int cmp_relo_by_insn_idx(const void *key, const void *elem)  static struct reloc_desc *find_prog_insn_relo(const struct bpf_program *prog, size_t insn_idx)  { +	if (!prog->nr_reloc) +		return NULL;  	return bsearch(&insn_idx, prog->reloc_desc, prog->nr_reloc,  		       sizeof(*prog->reloc_desc), cmp_relo_by_insn_idx);  } @@ -5813,8 +5953,9 @@ static int append_subprog_relos(struct bpf_program *main_prog, struct bpf_progra  	relos = libbpf_reallocarray(main_prog->reloc_desc, new_cnt, sizeof(*relos));  	if (!relos)  		return -ENOMEM; -	memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, -	       sizeof(*relos) * subprog->nr_reloc); +	if (subprog->nr_reloc) +		memcpy(relos + main_prog->nr_reloc, subprog->reloc_desc, +		       sizeof(*relos) * subprog->nr_reloc);  	for (i = main_prog->nr_reloc; i < new_cnt; i++)  		relos[i].insn_idx += subprog->sub_insn_off; @@ -6072,6 +6213,35 @@ bpf_object__free_relocs(struct bpf_object *obj)  	}  } +static int cmp_relocs(const void *_a, const void *_b) +{ +	const struct reloc_desc *a = _a; +	const struct reloc_desc *b = _b; + +	if (a->insn_idx != b->insn_idx) +		return a->insn_idx < b->insn_idx ? -1 : 1; + +	/* no two relocations should have the same insn_idx, but ... */ +	if (a->type != b->type) +		return a->type < b->type ? -1 : 1; + +	return 0; +} + +static void bpf_object__sort_relos(struct bpf_object *obj) +{ +	int i; + +	for (i = 0; i < obj->nr_programs; i++) { +		struct bpf_program *p = &obj->programs[i]; + +		if (!p->nr_reloc) +			continue; + +		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); +	} +} +  static int  bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  { @@ -6086,6 +6256,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  				err);  			return err;  		} +		if (obj->gen_loader) +			bpf_object__sort_relos(obj);  	}  	/* Before relocating calls pre-process relocations and mark @@ -6121,6 +6293,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  		 */  		if (prog_is_subprog(obj, prog))  			continue; +		if (!prog->load) +			continue;  		err = bpf_object__relocate_calls(obj, prog);  		if (err) { @@ -6134,6 +6308,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  		prog = &obj->programs[i];  		if (prog_is_subprog(obj, prog))  			continue; +		if (!prog->load) +			continue;  		err = bpf_object__relocate_data(obj, prog);  		if (err) {  			pr_warn("prog '%s': failed to relocate data references: %d\n", @@ -6156,9 +6332,11 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  	int i, j, nrels, new_sz;  	const struct btf_var_secinfo *vi = NULL;  	const struct btf_type *sec, *var, *def; -	struct bpf_map *map = NULL, *targ_map; +	struct bpf_map *map = NULL, *targ_map = NULL; +	struct bpf_program *targ_prog = NULL; +	bool is_prog_array, is_map_in_map;  	const struct btf_member *member; -	const char *name, *mname; +	const char *name, *mname, *type;  	unsigned int moff;  	Elf64_Sym *sym;  	Elf64_Rel *rel; @@ -6185,11 +6363,6 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  			return -LIBBPF_ERRNO__FORMAT;  		}  		name = elf_sym_str(obj, sym->st_name) ?: "<?>"; -		if (sym->st_shndx != obj->efile.btf_maps_shndx) { -			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", -				i, name); -			return -LIBBPF_ERRNO__RELOC; -		}  		pr_debug(".maps relo #%d: for %zd value %zd rel->r_offset %zu name %d ('%s')\n",  			 i, (ssize_t)(rel->r_info >> 32), (size_t)sym->st_value, @@ -6211,19 +6384,45 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  			return -EINVAL;  		} -		if (!bpf_map_type__is_map_in_map(map->def.type)) -			return -EINVAL; -		if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && -		    map->def.key_size != sizeof(int)) { -			pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", -				i, map->name, sizeof(int)); +		is_map_in_map = bpf_map_type__is_map_in_map(map->def.type); +		is_prog_array = map->def.type == BPF_MAP_TYPE_PROG_ARRAY; +		type = is_map_in_map ? "map" : "prog"; +		if (is_map_in_map) { +			if (sym->st_shndx != obj->efile.btf_maps_shndx) { +				pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n", +					i, name); +				return -LIBBPF_ERRNO__RELOC; +			} +			if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS && +			    map->def.key_size != sizeof(int)) { +				pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n", +					i, map->name, sizeof(int)); +				return -EINVAL; +			} +			targ_map = bpf_object__find_map_by_name(obj, name); +			if (!targ_map) { +				pr_warn(".maps relo #%d: '%s' isn't a valid map reference\n", +					i, name); +				return -ESRCH; +			} +		} else if (is_prog_array) { +			targ_prog = bpf_object__find_program_by_name(obj, name); +			if (!targ_prog) { +				pr_warn(".maps relo #%d: '%s' isn't a valid program reference\n", +					i, name); +				return -ESRCH; +			} +			if (targ_prog->sec_idx != sym->st_shndx || +			    targ_prog->sec_insn_off * 8 != sym->st_value || +			    prog_is_subprog(obj, targ_prog)) { +				pr_warn(".maps relo #%d: '%s' isn't an entry-point program\n", +					i, name); +				return -LIBBPF_ERRNO__RELOC; +			} +		} else {  			return -EINVAL;  		} -		targ_map = bpf_object__find_map_by_name(obj, name); -		if (!targ_map) -			return -ESRCH; -  		var = btf__type_by_id(obj->btf, vi->type);  		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);  		if (btf_vlen(def) == 0) @@ -6254,30 +6453,15 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,  			       (new_sz - map->init_slots_sz) * host_ptr_sz);  			map->init_slots_sz = new_sz;  		} -		map->init_slots[moff] = targ_map; +		map->init_slots[moff] = is_map_in_map ? (void *)targ_map : (void *)targ_prog; -		pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n", -			 i, map->name, moff, name); +		pr_debug(".maps relo #%d: map '%s' slot [%d] points to %s '%s'\n", +			 i, map->name, moff, type, name);  	}  	return 0;  } -static int cmp_relocs(const void *_a, const void *_b) -{ -	const struct reloc_desc *a = _a; -	const struct reloc_desc *b = _b; - -	if (a->insn_idx != b->insn_idx) -		return a->insn_idx < b->insn_idx ? -1 : 1; - -	/* no two relocations should have the same insn_idx, but ... */ -	if (a->type != b->type) -		return a->type < b->type ? -1 : 1; - -	return 0; -} -  static int bpf_object__collect_relos(struct bpf_object *obj)  {  	int i, err; @@ -6310,14 +6494,7 @@ static int bpf_object__collect_relos(struct bpf_object *obj)  			return err;  	} -	for (i = 0; i < obj->nr_programs; i++) { -		struct bpf_program *p = &obj->programs[i]; - -		if (!p->nr_reloc) -			continue; - -		qsort(p->reloc_desc, p->nr_reloc, sizeof(*p->reloc_desc), cmp_relocs); -	} +	bpf_object__sort_relos(obj);  	return 0;  } @@ -6374,16 +6551,16 @@ static int libbpf_find_attach_btf_id(struct bpf_program *prog, const char *attac  /* this is called as prog->sec_def->preload_fn for libbpf-supported sec_defs */  static int libbpf_preload_prog(struct bpf_program *prog, -			       struct bpf_prog_load_params *attr, long cookie) +			       struct bpf_prog_load_opts *opts, long cookie)  {  	enum sec_def_flags def = cookie;  	/* old kernels might not support specifying expected_attach_type */  	if ((def & SEC_EXP_ATTACH_OPT) && !kernel_supports(prog->obj, FEAT_EXP_ATTACH_TYPE)) -		attr->expected_attach_type = 0; +		opts->expected_attach_type = 0;  	if (def & SEC_SLEEPABLE) -		attr->prog_flags |= BPF_F_SLEEPABLE; +		opts->prog_flags |= BPF_F_SLEEPABLE;  	if ((prog->type == BPF_PROG_TYPE_TRACING ||  	     prog->type == BPF_PROG_TYPE_LSM || @@ -6402,25 +6579,28 @@ static int libbpf_preload_prog(struct bpf_program *prog,  		/* but by now libbpf common logic is not utilizing  		 * prog->atach_btf_obj_fd/prog->attach_btf_id anymore because -		 * this callback is called after attrs were populated by -		 * libbpf, so this callback has to update attr explicitly here +		 * this callback is called after opts were populated by +		 * libbpf, so this callback has to update opts explicitly here  		 */ -		attr->attach_btf_obj_fd = btf_obj_fd; -		attr->attach_btf_id = btf_type_id; +		opts->attach_btf_obj_fd = btf_obj_fd; +		opts->attach_btf_id = btf_type_id;  	}  	return 0;  } -static int -load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt, -	     char *license, __u32 kern_version, int *pfd) +static int bpf_object_load_prog_instance(struct bpf_object *obj, struct bpf_program *prog, +					 struct bpf_insn *insns, int insns_cnt, +					 const char *license, __u32 kern_version, +					 int *prog_fd)  { -	struct bpf_prog_load_params load_attr = {}; -	struct bpf_object *obj = prog->obj; +	LIBBPF_OPTS(bpf_prog_load_opts, load_attr); +	const char *prog_name = NULL;  	char *cp, errmsg[STRERR_BUFSIZE];  	size_t log_buf_size = 0; -	char *log_buf = NULL; +	char *log_buf = NULL, *tmp;  	int btf_fd, ret, err; +	bool own_log_buf = true; +	__u32 log_level = prog->log_level;  	if (prog->type == BPF_PROG_TYPE_UNSPEC) {  		/* @@ -6435,14 +6615,9 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	if (!insns || !insns_cnt)  		return -EINVAL; -	load_attr.prog_type = prog->type;  	load_attr.expected_attach_type = prog->expected_attach_type;  	if (kernel_supports(obj, FEAT_PROG_NAME)) -		load_attr.name = prog->name; -	load_attr.insns = insns; -	load_attr.insn_cnt = insns_cnt; -	load_attr.license = license; -	load_attr.attach_btf_id = prog->attach_btf_id; +		prog_name = prog->name;  	load_attr.attach_prog_fd = prog->attach_prog_fd;  	load_attr.attach_btf_obj_fd = prog->attach_btf_obj_fd;  	load_attr.attach_btf_id = prog->attach_btf_id; @@ -6460,7 +6635,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  		load_attr.line_info_rec_size = prog->line_info_rec_size;  		load_attr.line_info_cnt = prog->line_info_cnt;  	} -	load_attr.log_level = prog->log_level; +	load_attr.log_level = log_level;  	load_attr.prog_flags = prog->prog_flags;  	load_attr.fd_array = obj->fd_array; @@ -6475,27 +6650,51 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	}  	if (obj->gen_loader) { -		bpf_gen__prog_load(obj->gen_loader, &load_attr, +		bpf_gen__prog_load(obj->gen_loader, prog->type, prog->name, +				   license, insns, insns_cnt, &load_attr,  				   prog - obj->programs); -		*pfd = -1; +		*prog_fd = -1;  		return 0;  	} -retry_load: -	if (log_buf_size) { -		log_buf = malloc(log_buf_size); -		if (!log_buf) -			return -ENOMEM; -		*log_buf = 0; +retry_load: +	/* if log_level is zero, we don't request logs initiallly even if +	 * custom log_buf is specified; if the program load fails, then we'll +	 * bump log_level to 1 and use either custom log_buf or we'll allocate +	 * our own and retry the load to get details on what failed +	 */ +	if (log_level) { +		if (prog->log_buf) { +			log_buf = prog->log_buf; +			log_buf_size = prog->log_size; +			own_log_buf = false; +		} else if (obj->log_buf) { +			log_buf = obj->log_buf; +			log_buf_size = obj->log_size; +			own_log_buf = false; +		} else { +			log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, log_buf_size * 2); +			tmp = realloc(log_buf, log_buf_size); +			if (!tmp) { +				ret = -ENOMEM; +				goto out; +			} +			log_buf = tmp; +			log_buf[0] = '\0'; +			own_log_buf = true; +		}  	}  	load_attr.log_buf = log_buf; -	load_attr.log_buf_sz = log_buf_size; -	ret = libbpf__bpf_prog_load(&load_attr); +	load_attr.log_size = log_buf_size; +	load_attr.log_level = log_level; +	ret = bpf_prog_load(prog->type, prog_name, license, insns, insns_cnt, &load_attr);  	if (ret >= 0) { -		if (log_buf && load_attr.log_level) -			pr_debug("verifier log:\n%s", log_buf); +		if (log_level && own_log_buf) { +			pr_debug("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", +				 prog->name, log_buf); +		}  		if (obj->has_rodata && kernel_supports(obj, FEAT_PROG_BIND_MAP)) {  			struct bpf_map *map; @@ -6508,61 +6707,53 @@ retry_load:  				if (bpf_prog_bind_map(ret, bpf_map__fd(map), NULL)) {  					cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -					pr_warn("prog '%s': failed to bind .rodata map: %s\n", -						prog->name, cp); +					pr_warn("prog '%s': failed to bind map '%s': %s\n", +						prog->name, map->real_name, cp);  					/* Don't fail hard if can't bind rodata. */  				}  			}  		} -		*pfd = ret; +		*prog_fd = ret;  		ret = 0;  		goto out;  	} -	if (!log_buf || errno == ENOSPC) { -		log_buf_size = max((size_t)BPF_LOG_BUF_SIZE, -				   log_buf_size << 1); - -		free(log_buf); +	if (log_level == 0) { +		log_level = 1;  		goto retry_load;  	} -	ret = errno ? -errno : -LIBBPF_ERRNO__LOAD; +	/* On ENOSPC, increase log buffer size and retry, unless custom +	 * log_buf is specified. +	 * Be careful to not overflow u32, though. Kernel's log buf size limit +	 * isn't part of UAPI so it can always be bumped to full 4GB. So don't +	 * multiply by 2 unless we are sure we'll fit within 32 bits. +	 * Currently, we'll get -EINVAL when we reach (UINT_MAX >> 2). +	 */ +	if (own_log_buf && errno == ENOSPC && log_buf_size <= UINT_MAX / 2) +		goto retry_load; + +	ret = -errno;  	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -	pr_warn("load bpf program failed: %s\n", cp); +	pr_warn("prog '%s': BPF program load failed: %s\n", prog->name, cp);  	pr_perm_msg(ret); -	if (log_buf && log_buf[0] != '\0') { -		ret = -LIBBPF_ERRNO__VERIFY; -		pr_warn("-- BEGIN DUMP LOG ---\n"); -		pr_warn("\n%s\n", log_buf); -		pr_warn("-- END LOG --\n"); -	} else if (load_attr.insn_cnt >= BPF_MAXINSNS) { -		pr_warn("Program too large (%zu insns), at most %d insns\n", -			load_attr.insn_cnt, BPF_MAXINSNS); -		ret = -LIBBPF_ERRNO__PROG2BIG; -	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { -		/* Wrong program type? */ -		int fd; - -		load_attr.prog_type = BPF_PROG_TYPE_KPROBE; -		load_attr.expected_attach_type = 0; -		load_attr.log_buf = NULL; -		load_attr.log_buf_sz = 0; -		fd = libbpf__bpf_prog_load(&load_attr); -		if (fd >= 0) { -			close(fd); -			ret = -LIBBPF_ERRNO__PROGTYPE; -			goto out; -		} +	if (own_log_buf && log_buf && log_buf[0] != '\0') { +		pr_warn("prog '%s': -- BEGIN PROG LOAD LOG --\n%s-- END PROG LOAD LOG --\n", +			prog->name, log_buf); +	} +	if (insns_cnt >= BPF_MAXINSNS) { +		pr_warn("prog '%s': program too large (%d insns), at most %d insns\n", +			prog->name, insns_cnt, BPF_MAXINSNS);  	}  out: -	free(log_buf); +	if (own_log_buf) +		free(log_buf);  	return ret;  } -static int bpf_program__record_externs(struct bpf_program *prog) +static int bpf_program_record_relos(struct bpf_program *prog)  {  	struct bpf_object *obj = prog->obj;  	int i; @@ -6584,6 +6775,17 @@ static int bpf_program__record_externs(struct bpf_program *prog)  					       ext->is_weak, false, BTF_KIND_FUNC,  					       relo->insn_idx);  			break; +		case RELO_CORE: { +			struct bpf_core_relo cr = { +				.insn_off = relo->insn_idx * 8, +				.type_id = relo->core_relo->type_id, +				.access_str_off = relo->core_relo->access_str_off, +				.kind = relo->core_relo->kind, +			}; + +			bpf_gen__record_relo_core(obj->gen_loader, &cr); +			break; +		}  		default:  			continue;  		} @@ -6591,11 +6793,12 @@ static int bpf_program__record_externs(struct bpf_program *prog)  	return 0;  } -int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver) +static int bpf_object_load_prog(struct bpf_object *obj, struct bpf_program *prog, +				const char *license, __u32 kern_ver)  {  	int err = 0, fd, i; -	if (prog->obj->loaded) { +	if (obj->loaded) {  		pr_warn("prog '%s': can't load after object was loaded\n", prog->name);  		return libbpf_err(-EINVAL);  	} @@ -6621,10 +6824,11 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  			pr_warn("prog '%s': inconsistent nr(%d) != 1\n",  				prog->name, prog->instances.nr);  		} -		if (prog->obj->gen_loader) -			bpf_program__record_externs(prog); -		err = load_program(prog, prog->insns, prog->insns_cnt, -				   license, kern_ver, &fd); +		if (obj->gen_loader) +			bpf_program_record_relos(prog); +		err = bpf_object_load_prog_instance(obj, prog, +						    prog->insns, prog->insns_cnt, +						    license, kern_ver, &fd);  		if (!err)  			prog->instances.fds[0] = fd;  		goto out; @@ -6652,8 +6856,9 @@ int bpf_program__load(struct bpf_program *prog, char *license, __u32 kern_ver)  			continue;  		} -		err = load_program(prog, result.new_insn_ptr, -				   result.new_insn_cnt, license, kern_ver, &fd); +		err = bpf_object_load_prog_instance(obj, prog, +						    result.new_insn_ptr, result.new_insn_cnt, +						    license, kern_ver, &fd);  		if (err) {  			pr_warn("Loading the %dth instance of program '%s' failed\n",  				i, prog->name); @@ -6670,6 +6875,11 @@ out:  	return libbpf_err(err);  } +int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_ver) +{ +	return bpf_object_load_prog(prog->obj, prog, license, kern_ver); +} +  static int  bpf_object__load_progs(struct bpf_object *obj, int log_level)  { @@ -6693,7 +6903,7 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)  			continue;  		}  		prog->log_level |= log_level; -		err = bpf_program__load(prog, obj->license, obj->kern_version); +		err = bpf_object_load_prog(obj, prog, obj->license, obj->kern_version);  		if (err)  			return err;  	} @@ -6744,14 +6954,16 @@ static int bpf_object_init_progs(struct bpf_object *obj, const struct bpf_object  	return 0;  } -static struct bpf_object * -__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, -		   const struct bpf_object_open_opts *opts) +static struct bpf_object *bpf_object_open(const char *path, const void *obj_buf, size_t obj_buf_sz, +					  const struct bpf_object_open_opts *opts)  {  	const char *obj_name, *kconfig, *btf_tmp_path;  	struct bpf_object *obj;  	char tmp_name[64];  	int err; +	char *log_buf; +	size_t log_size; +	__u32 log_level;  	if (elf_version(EV_CURRENT) == EV_NONE) {  		pr_warn("failed to init libelf for %s\n", @@ -6774,10 +6986,22 @@ __bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz,  		pr_debug("loading object '%s' from buffer\n", obj_name);  	} +	log_buf = OPTS_GET(opts, kernel_log_buf, NULL); +	log_size = OPTS_GET(opts, kernel_log_size, 0); +	log_level = OPTS_GET(opts, kernel_log_level, 0); +	if (log_size > UINT_MAX) +		return ERR_PTR(-EINVAL); +	if (log_size && !log_buf) +		return ERR_PTR(-EINVAL); +  	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);  	if (IS_ERR(obj))  		return obj; +	obj->log_buf = log_buf; +	obj->log_size = log_size; +	obj->log_level = log_level; +  	btf_tmp_path = OPTS_GET(opts, btf_custom_path, NULL);  	if (btf_tmp_path) {  		if (strlen(btf_tmp_path) >= PATH_MAX) { @@ -6831,7 +7055,7 @@ __bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)  		return NULL;  	pr_debug("loading %s\n", attr->file); -	return __bpf_object__open(attr->file, NULL, 0, &opts); +	return bpf_object_open(attr->file, NULL, 0, &opts);  }  struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -6857,7 +7081,7 @@ bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts)  	pr_debug("loading %s\n", path); -	return libbpf_ptr(__bpf_object__open(path, NULL, 0, opts)); +	return libbpf_ptr(bpf_object_open(path, NULL, 0, opts));  }  struct bpf_object * @@ -6867,7 +7091,7 @@ bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,  	if (!obj_buf || obj_buf_sz == 0)  		return libbpf_err_ptr(-EINVAL); -	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, opts)); +	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, opts));  }  struct bpf_object * @@ -6884,7 +7108,7 @@ bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz,  	if (!obj_buf || obj_buf_sz == 0)  		return errno = EINVAL, NULL; -	return libbpf_ptr(__bpf_object__open(NULL, obj_buf, obj_buf_sz, &opts)); +	return libbpf_ptr(bpf_object_open(NULL, obj_buf, obj_buf_sz, &opts));  }  static int bpf_object_unload(struct bpf_object *obj) @@ -6915,10 +7139,6 @@ static int bpf_object__sanitize_maps(struct bpf_object *obj)  	bpf_object__for_each_map(m, obj) {  		if (!bpf_map__is_internal(m))  			continue; -		if (!kernel_supports(obj, FEAT_GLOBAL_DATA)) { -			pr_warn("kernel doesn't support global data\n"); -			return -ENOTSUP; -		}  		if (!kernel_supports(obj, FEAT_ARRAY_MMAP))  			m->def.map_flags ^= BPF_F_MMAPABLE;  	} @@ -7241,14 +7461,10 @@ static int bpf_object__resolve_externs(struct bpf_object *obj,  	return 0;  } -int bpf_object__load_xattr(struct bpf_object_load_attr *attr) +static int bpf_object_load(struct bpf_object *obj, int extra_log_level, const char *target_btf_path)  { -	struct bpf_object *obj;  	int err, i; -	if (!attr) -		return libbpf_err(-EINVAL); -	obj = attr->obj;  	if (!obj)  		return libbpf_err(-EINVAL); @@ -7258,7 +7474,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	}  	if (obj->gen_loader) -		bpf_gen__init(obj->gen_loader, attr->log_level, obj->nr_programs, obj->nr_maps); +		bpf_gen__init(obj->gen_loader, extra_log_level, obj->nr_programs, obj->nr_maps);  	err = bpf_object__probe_loading(obj);  	err = err ? : bpf_object__load_vmlinux_btf(obj, false); @@ -7267,8 +7483,9 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	err = err ? : bpf_object__sanitize_maps(obj);  	err = err ? : bpf_object__init_kern_struct_ops_maps(obj);  	err = err ? : bpf_object__create_maps(obj); -	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : attr->target_btf_path); -	err = err ? : bpf_object__load_progs(obj, attr->log_level); +	err = err ? : bpf_object__relocate(obj, obj->btf_custom_path ? : target_btf_path); +	err = err ? : bpf_object__load_progs(obj, extra_log_level); +	err = err ? : bpf_object_init_prog_arrays(obj);  	if (obj->gen_loader) {  		/* reset FDs */ @@ -7312,13 +7529,14 @@ out:  	return libbpf_err(err);  } -int bpf_object__load(struct bpf_object *obj) +int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  { -	struct bpf_object_load_attr attr = { -		.obj = obj, -	}; +	return bpf_object_load(attr->obj, attr->log_level, attr->target_btf_path); +} -	return bpf_object__load_xattr(&attr); +int bpf_object__load(struct bpf_object *obj) +{ +	return bpf_object_load(obj, 0, NULL);  }  static int make_parent_dir(const char *path) @@ -7707,6 +7925,9 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)  		char *pin_path = NULL;  		char buf[PATH_MAX]; +		if (map->skipped) +			continue; +  		if (path) {  			int len; @@ -7733,7 +7954,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)  	return 0;  err_unpin_maps: -	while ((map = bpf_map__prev(map, obj))) { +	while ((map = bpf_object__prev_map(obj, map))) {  		if (!map->pin_path)  			continue; @@ -7813,7 +8034,7 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)  	return 0;  err_unpin_programs: -	while ((prog = bpf_program__prev(prog, obj))) { +	while ((prog = bpf_object__prev_program(obj, prog))) {  		char buf[PATH_MAX];  		int len; @@ -8154,9 +8375,11 @@ int bpf_program__set_autoload(struct bpf_program *prog, bool autoload)  	return 0;  } +static int bpf_program_nth_fd(const struct bpf_program *prog, int n); +  int bpf_program__fd(const struct bpf_program *prog)  { -	return bpf_program__nth_fd(prog, 0); +	return bpf_program_nth_fd(prog, 0);  }  size_t bpf_program__size(const struct bpf_program *prog) @@ -8202,7 +8425,10 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,  	return 0;  } -int bpf_program__nth_fd(const struct bpf_program *prog, int n) +__attribute__((alias("bpf_program_nth_fd"))) +int bpf_program__nth_fd(const struct bpf_program *prog, int n); + +static int bpf_program_nth_fd(const struct bpf_program *prog, int n)  {  	int fd; @@ -8281,6 +8507,54 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,  	prog->expected_attach_type = type;  } +__u32 bpf_program__flags(const struct bpf_program *prog) +{ +	return prog->prog_flags; +} + +int bpf_program__set_flags(struct bpf_program *prog, __u32 flags) +{ +	if (prog->obj->loaded) +		return libbpf_err(-EBUSY); + +	prog->prog_flags = flags; +	return 0; +} + +__u32 bpf_program__log_level(const struct bpf_program *prog) +{ +	return prog->log_level; +} + +int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level) +{ +	if (prog->obj->loaded) +		return libbpf_err(-EBUSY); + +	prog->log_level = log_level; +	return 0; +} + +const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size) +{ +	*log_size = prog->log_size; +	return prog->log_buf; +} + +int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size) +{ +	if (log_size && !log_buf) +		return -EINVAL; +	if (prog->log_size > UINT_MAX) +		return -EINVAL; +	if (prog->obj->loaded) +		return -EBUSY; + +	prog->log_buf = log_buf; +	prog->log_size = log_size; +	return 0; +} +  #define SEC_DEF(sec_pfx, ptype, atype, flags, ...) {			    \  	.sec = sec_pfx,							    \  	.prog_type = BPF_PROG_TYPE_##ptype,				    \ @@ -9028,7 +9302,10 @@ int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)  		pr_warn("error: inner_map_fd already specified\n");  		return libbpf_err(-EINVAL);  	} -	zfree(&map->inner_map); +	if (map->inner_map) { +		bpf_map__destroy(map->inner_map); +		zfree(&map->inner_map); +	}  	map->inner_map_fd = fd;  	return 0;  } @@ -9145,21 +9422,12 @@ long libbpf_get_error(const void *ptr)  	return -errno;  } -int bpf_prog_load(const char *file, enum bpf_prog_type type, -		  struct bpf_object **pobj, int *prog_fd) -{ -	struct bpf_prog_load_attr attr; - -	memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); -	attr.file = file; -	attr.prog_type = type; -	attr.expected_attach_type = 0; - -	return bpf_prog_load_xattr(&attr, pobj, prog_fd); -} - +__attribute__((alias("bpf_prog_load_xattr2")))  int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, -			struct bpf_object **pobj, int *prog_fd) +			struct bpf_object **pobj, int *prog_fd); + +static int bpf_prog_load_xattr2(const struct bpf_prog_load_attr *attr, +				struct bpf_object **pobj, int *prog_fd)  {  	struct bpf_object_open_attr open_attr = {};  	struct bpf_program *prog, *first_prog = NULL; @@ -9230,6 +9498,20 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  	return 0;  } +COMPAT_VERSION(bpf_prog_load_deprecated, bpf_prog_load, LIBBPF_0.0.1) +int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, +			     struct bpf_object **pobj, int *prog_fd) +{ +	struct bpf_prog_load_attr attr; + +	memset(&attr, 0, sizeof(struct bpf_prog_load_attr)); +	attr.file = file; +	attr.prog_type = type; +	attr.expected_attach_type = 0; + +	return bpf_prog_load_xattr2(&attr, pobj, prog_fd); +} +  struct bpf_link {  	int (*detach)(struct bpf_link *link);  	void (*dealloc)(struct bpf_link *link); @@ -9634,7 +9916,10 @@ static int append_to_file(const char *file, const char *fmt, ...)  static void gen_kprobe_legacy_event_name(char *buf, size_t buf_sz,  					 const char *kfunc_name, size_t offset)  { -	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx", getpid(), kfunc_name, offset); +	static int index = 0; + +	snprintf(buf, buf_sz, "libbpf_%u_%s_0x%zx_%d", getpid(), kfunc_name, offset, +		 __sync_fetch_and_add(&index, 1));  }  static int add_kprobe_event_legacy(const char *probe_name, bool retprobe, @@ -9735,7 +10020,7 @@ bpf_program__attach_kprobe_opts(const struct bpf_program *prog,  		gen_kprobe_legacy_event_name(probe_name, sizeof(probe_name),  					     func_name, offset); -		legacy_probe = strdup(func_name); +		legacy_probe = strdup(probe_name);  		if (!legacy_probe)  			return libbpf_err_ptr(-ENOMEM); @@ -10394,10 +10679,10 @@ struct bpf_link *bpf_map__attach_struct_ops(const struct bpf_map *map)  	return link;  } -enum bpf_perf_event_ret -bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, -			   void **copy_mem, size_t *copy_size, -			   bpf_perf_event_print_t fn, void *private_data) +static enum bpf_perf_event_ret +perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, +		       void **copy_mem, size_t *copy_size, +		       bpf_perf_event_print_t fn, void *private_data)  {  	struct perf_event_mmap_page *header = mmap_mem;  	__u64 data_head = ring_buffer_read_head(header); @@ -10442,6 +10727,12 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,  	return libbpf_err(ret);  } +__attribute__((alias("perf_event_read_simple"))) +enum bpf_perf_event_ret +bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, +			   void **copy_mem, size_t *copy_size, +			   bpf_perf_event_print_t fn, void *private_data); +  struct perf_buffer;  struct perf_buffer_params { @@ -10575,11 +10866,18 @@ error:  static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  					      struct perf_buffer_params *p); -struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt, -				     const struct perf_buffer_opts *opts) +DEFAULT_VERSION(perf_buffer__new_v0_6_0, perf_buffer__new, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, +					    perf_buffer_sample_fn sample_cb, +					    perf_buffer_lost_fn lost_cb, +					    void *ctx, +					    const struct perf_buffer_opts *opts)  {  	struct perf_buffer_params p = {}; -	struct perf_event_attr attr = { 0, }; +	struct perf_event_attr attr = {}; + +	if (!OPTS_VALID(opts, perf_buffer_opts)) +		return libbpf_err_ptr(-EINVAL);  	attr.config = PERF_COUNT_SW_BPF_OUTPUT;  	attr.type = PERF_TYPE_SOFTWARE; @@ -10588,29 +10886,62 @@ struct perf_buffer *perf_buffer__new(int map_fd, size_t page_cnt,  	attr.wakeup_events = 1;  	p.attr = &attr; -	p.sample_cb = opts ? opts->sample_cb : NULL; -	p.lost_cb = opts ? opts->lost_cb : NULL; -	p.ctx = opts ? opts->ctx : NULL; +	p.sample_cb = sample_cb; +	p.lost_cb = lost_cb; +	p.ctx = ctx;  	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));  } -struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, -		     const struct perf_buffer_raw_opts *opts) +COMPAT_VERSION(perf_buffer__new_deprecated, perf_buffer__new, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, +						const struct perf_buffer_opts *opts) +{ +	return perf_buffer__new_v0_6_0(map_fd, page_cnt, +				       opts ? opts->sample_cb : NULL, +				       opts ? opts->lost_cb : NULL, +				       opts ? opts->ctx : NULL, +				       NULL); +} + +DEFAULT_VERSION(perf_buffer__new_raw_v0_6_0, perf_buffer__new_raw, LIBBPF_0.6.0) +struct perf_buffer *perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, +						struct perf_event_attr *attr, +						perf_buffer_event_fn event_cb, void *ctx, +						const struct perf_buffer_raw_opts *opts)  {  	struct perf_buffer_params p = {}; -	p.attr = opts->attr; -	p.event_cb = opts->event_cb; -	p.ctx = opts->ctx; -	p.cpu_cnt = opts->cpu_cnt; -	p.cpus = opts->cpus; -	p.map_keys = opts->map_keys; +	if (page_cnt == 0 || !attr) +		return libbpf_err_ptr(-EINVAL); + +	if (!OPTS_VALID(opts, perf_buffer_raw_opts)) +		return libbpf_err_ptr(-EINVAL); + +	p.attr = attr; +	p.event_cb = event_cb; +	p.ctx = ctx; +	p.cpu_cnt = OPTS_GET(opts, cpu_cnt, 0); +	p.cpus = OPTS_GET(opts, cpus, NULL); +	p.map_keys = OPTS_GET(opts, map_keys, NULL);  	return libbpf_ptr(__perf_buffer__new(map_fd, page_cnt, &p));  } +COMPAT_VERSION(perf_buffer__new_raw_deprecated, perf_buffer__new_raw, LIBBPF_0.0.4) +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, +						    const struct perf_buffer_raw_opts *opts) +{ +	LIBBPF_OPTS(perf_buffer_raw_opts, inner_opts, +		.cpu_cnt = opts->cpu_cnt, +		.cpus = opts->cpus, +		.map_keys = opts->map_keys, +	); + +	return perf_buffer__new_raw_v0_6_0(map_fd, page_cnt, opts->attr, +					   opts->event_cb, opts->ctx, &inner_opts); +} +  static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  					      struct perf_buffer_params *p)  { @@ -10810,10 +11141,10 @@ static int perf_buffer__process_records(struct perf_buffer *pb,  {  	enum bpf_perf_event_ret ret; -	ret = bpf_perf_event_read_simple(cpu_buf->base, pb->mmap_size, -					 pb->page_size, &cpu_buf->buf, -					 &cpu_buf->buf_size, -					 perf_buffer__process_record, cpu_buf); +	ret = perf_event_read_simple(cpu_buf->base, pb->mmap_size, +				     pb->page_size, &cpu_buf->buf, +				     &cpu_buf->buf_size, +				     perf_buffer__process_record, cpu_buf);  	if (ret != LIBBPF_PERF_EVENT_CONT)  		return ret;  	return 0; diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 9de0f299706b..8b9bc5e90c2b 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -24,6 +24,10 @@  extern "C" {  #endif +LIBBPF_API __u32 libbpf_major_version(void); +LIBBPF_API __u32 libbpf_minor_version(void); +LIBBPF_API const char *libbpf_version_string(void); +  enum libbpf_errno {  	__LIBBPF_ERRNO__START = 4000, @@ -104,12 +108,73 @@ struct bpf_object_open_opts {  	 * struct_ops, etc) will need actual kernel BTF at /sys/kernel/btf/vmlinux.  	 */  	const char *btf_custom_path; +	/* Pointer to a buffer for storing kernel logs for applicable BPF +	 * commands. Valid kernel_log_size has to be specified as well and are +	 * passed-through to bpf() syscall. Keep in mind that kernel might +	 * fail operation with -ENOSPC error if provided buffer is too small +	 * to contain entire log output. +	 * See the comment below for kernel_log_level for interaction between +	 * log_buf and log_level settings. +	 * +	 * If specified, this log buffer will be passed for: +	 *   - each BPF progral load (BPF_PROG_LOAD) attempt, unless overriden +	 *     with bpf_program__set_log() on per-program level, to get +	 *     BPF verifier log output. +	 *   - during BPF object's BTF load into kernel (BPF_BTF_LOAD) to get +	 *     BTF sanity checking log. +	 * +	 * Each BPF command (BPF_BTF_LOAD or BPF_PROG_LOAD) will overwrite +	 * previous contents, so if you need more fine-grained control, set +	 * per-program buffer with bpf_program__set_log_buf() to preserve each +	 * individual program's verification log. Keep using kernel_log_buf +	 * for BTF verification log, if necessary. +	 */ +	char *kernel_log_buf; +	size_t kernel_log_size; +	/* +	 * Log level can be set independently from log buffer. Log_level=0 +	 * means that libbpf will attempt loading BTF or program without any +	 * logging requested, but will retry with either its own or custom log +	 * buffer, if provided, and log_level=1 on any error. +	 * And vice versa, setting log_level>0 will request BTF or prog +	 * loading with verbose log from the first attempt (and as such also +	 * for successfully loaded BTF or program), and the actual log buffer +	 * could be either libbpf's own auto-allocated log buffer, if +	 * kernel_log_buffer is NULL, or user-provided custom kernel_log_buf. +	 * If user didn't provide custom log buffer, libbpf will emit captured +	 * logs through its print callback. +	 */ +	__u32 kernel_log_level; + +	size_t :0;  }; -#define bpf_object_open_opts__last_field btf_custom_path +#define bpf_object_open_opts__last_field kernel_log_level  LIBBPF_API struct bpf_object *bpf_object__open(const char *path); + +/** + * @brief **bpf_object__open_file()** creates a bpf_object by opening + * the BPF ELF object file pointed to by the passed path and loading it + * into memory. + * @param path BPF object file path + * @param opts options for how to load the bpf object, this parameter is + * optional and can be set to NULL + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */  LIBBPF_API struct bpf_object *  bpf_object__open_file(const char *path, const struct bpf_object_open_opts *opts); + +/** + * @brief **bpf_object__open_mem()** creates a bpf_object by reading + * the BPF objects raw bytes from a memory buffer containing a valid + * BPF ELF object file. + * @param obj_buf pointer to the buffer containing ELF file bytes + * @param obj_buf_sz number of bytes in the buffer + * @param opts options for how to load the bpf object + * @return pointer to the new bpf_object; or NULL is returned on error, + * error code is stored in errno + */  LIBBPF_API struct bpf_object *  bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz,  		     const struct bpf_object_open_opts *opts); @@ -149,6 +214,7 @@ struct bpf_object_load_attr {  /* Load/unload object into/from kernel */  LIBBPF_API int bpf_object__load(struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__load() instead")  LIBBPF_API int bpf_object__load_xattr(struct bpf_object_load_attr *attr);  LIBBPF_DEPRECATED_SINCE(0, 6, "bpf_object__unload() is deprecated, use bpf_object__close() instead")  LIBBPF_API int bpf_object__unload(struct bpf_object *obj); @@ -161,6 +227,7 @@ struct btf;  LIBBPF_API struct btf *bpf_object__btf(const struct bpf_object *obj);  LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__find_program_by_name() instead")  LIBBPF_API struct bpf_program *  bpf_object__find_program_by_title(const struct bpf_object *obj,  				  const char *title); @@ -262,8 +329,8 @@ LIBBPF_API const struct bpf_insn *bpf_program__insns(const struct bpf_program *p   */  LIBBPF_API size_t bpf_program__insn_cnt(const struct bpf_program *prog); -LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license, -				 __u32 kern_version); +LIBBPF_DEPRECATED_SINCE(0, 6, "use bpf_object__load() instead") +LIBBPF_API int bpf_program__load(struct bpf_program *prog, const char *license, __u32 kern_version);  LIBBPF_API int bpf_program__fd(const struct bpf_program *prog);  LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated")  LIBBPF_API int bpf_program__pin_instance(struct bpf_program *prog, @@ -273,7 +340,31 @@ LIBBPF_DEPRECATED_SINCE(0, 7, "multi-instance bpf_program support is deprecated"  LIBBPF_API int bpf_program__unpin_instance(struct bpf_program *prog,  					   const char *path,  					   int instance); + +/** + * @brief **bpf_program__pin()** pins the BPF program to a file + * in the BPF FS specified by a path. This increments the programs + * reference count, allowing it to stay loaded after the process + * which loaded it has exited. + * + * @param prog BPF program to pin, must already be loaded + * @param path file path in a BPF file system + * @return 0, on success; negative error code, otherwise + */  LIBBPF_API int bpf_program__pin(struct bpf_program *prog, const char *path); + +/** + * @brief **bpf_program__unpin()** unpins the BPF program from a file + * in the BPFFS specified by a path. This decrements the programs + * reference count. + * + * The file pinning the BPF program can also be unlinked by a different + * process in which case this function will return an error. + * + * @param prog BPF program to unpin + * @param path file path to the pin in a BPF file system + * @return 0, on success; negative error code, otherwise + */  LIBBPF_API int bpf_program__unpin(struct bpf_program *prog, const char *path);  LIBBPF_API void bpf_program__unload(struct bpf_program *prog); @@ -344,10 +435,41 @@ struct bpf_uprobe_opts {  };  #define bpf_uprobe_opts__last_field retprobe +/** + * @brief **bpf_program__attach_uprobe()** attaches a BPF program + * to the userspace function which is found by binary path and + * offset. You can optionally specify a particular proccess to attach + * to. You can also optionally attach the program to the function + * exit instead of entry. + * + * @param prog BPF program to attach + * @param retprobe Attach to function exit + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */  LIBBPF_API struct bpf_link *  bpf_program__attach_uprobe(const struct bpf_program *prog, bool retprobe,  			   pid_t pid, const char *binary_path,  			   size_t func_offset); + +/** + * @brief **bpf_program__attach_uprobe_opts()** is just like + * bpf_program__attach_uprobe() except with a options struct + * for various configurations. + * + * @param prog BPF program to attach + * @param pid Process ID to attach the uprobe to, 0 for self (own process), + * -1 for all processes + * @param binary_path Path to binary that contains the function symbol + * @param func_offset Offset within the binary of the function symbol + * @param opts Options for altering program attachment + * @return Reference to the newly created BPF link; or NULL is returned on error, + * error code is stored in errno + */  LIBBPF_API struct bpf_link *  bpf_program__attach_uprobe_opts(const struct bpf_program *prog, pid_t pid,  				const char *binary_path, size_t func_offset, @@ -431,7 +553,6 @@ bpf_program__attach_iter(const struct bpf_program *prog,   * one instance. In this case bpf_program__fd(prog) is equal to   * bpf_program__nth_fd(prog, 0).   */ -LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_program__insns() for getting bpf_program instructions")  struct bpf_prog_prep_result {  	/*  	 * If not NULL, load new instruction array. @@ -494,6 +615,18 @@ LIBBPF_API void  bpf_program__set_expected_attach_type(struct bpf_program *prog,  				      enum bpf_attach_type type); +LIBBPF_API __u32 bpf_program__flags(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_flags(struct bpf_program *prog, __u32 flags); + +/* Per-program log level and log buffer getters/setters. + * See bpf_object_open_opts comments regarding log_level and log_buf + * interactions. + */ +LIBBPF_API __u32 bpf_program__log_level(const struct bpf_program *prog); +LIBBPF_API int bpf_program__set_log_level(struct bpf_program *prog, __u32 log_level); +LIBBPF_API const char *bpf_program__log_buf(const struct bpf_program *prog, size_t *log_size); +LIBBPF_API int bpf_program__set_log_buf(struct bpf_program *prog, char *log_buf, size_t log_size); +  LIBBPF_API int  bpf_program__set_attach_target(struct bpf_program *prog, int attach_prog_fd,  			       const char *attach_func_name); @@ -544,7 +677,8 @@ bpf_object__find_map_fd_by_name(const struct bpf_object *obj, const char *name);   * Get bpf_map through the offset of corresponding struct bpf_map_def   * in the BPF object file.   */ -LIBBPF_API struct bpf_map * +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__find_map_by_name() instead") +struct bpf_map *  bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset);  LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__next_map() instead") @@ -611,6 +745,7 @@ LIBBPF_API void *bpf_map__priv(const struct bpf_map *map);  LIBBPF_API int bpf_map__set_initial_value(struct bpf_map *map,  					  const void *data, size_t size);  LIBBPF_API const void *bpf_map__initial_value(struct bpf_map *map, size_t *psize); +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_map__type() instead")  LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);  /** @@ -674,10 +809,12 @@ struct bpf_prog_load_attr {  	int prog_flags;  }; +LIBBPF_DEPRECATED_SINCE(0, 8, "use bpf_object__open() and bpf_object__load() instead")  LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  				   struct bpf_object **pobj, int *prog_fd); -LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, -			     struct bpf_object **pobj, int *prog_fd); +LIBBPF_DEPRECATED_SINCE(0, 7, "use bpf_object__open() and bpf_object__load() instead") +LIBBPF_API int bpf_prog_load_deprecated(const char *file, enum bpf_prog_type type, +					struct bpf_object **pobj, int *prog_fd);  /* XDP related API */  struct xdp_link_info { @@ -775,18 +912,52 @@ typedef void (*perf_buffer_lost_fn)(void *ctx, int cpu, __u64 cnt);  /* common use perf buffer options */  struct perf_buffer_opts { -	/* if specified, sample_cb is called for each sample */ -	perf_buffer_sample_fn sample_cb; -	/* if specified, lost_cb is called for each batch of lost samples */ -	perf_buffer_lost_fn lost_cb; -	/* ctx is provided to sample_cb and lost_cb */ -	void *ctx; +	union { +		size_t sz; +		struct { /* DEPRECATED: will be removed in v1.0 */ +			/* if specified, sample_cb is called for each sample */ +			perf_buffer_sample_fn sample_cb; +			/* if specified, lost_cb is called for each batch of lost samples */ +			perf_buffer_lost_fn lost_cb; +			/* ctx is provided to sample_cb and lost_cb */ +			void *ctx; +		}; +	};  }; +#define perf_buffer_opts__last_field sz +/** + * @brief **perf_buffer__new()** creates BPF perfbuf manager for a specified + * BPF_PERF_EVENT_ARRAY map + * @param map_fd FD of BPF_PERF_EVENT_ARRAY BPF map that will be used by BPF + * code to send data over to user-space + * @param page_cnt number of memory pages allocated for each per-CPU buffer + * @param sample_cb function called on each received data record + * @param lost_cb function called when record loss has occurred + * @param ctx user-provided extra context passed into *sample_cb* and *lost_cb* + * @return a new instance of struct perf_buffer on success, NULL on error with + * *errno* containing an error code + */  LIBBPF_API struct perf_buffer *  perf_buffer__new(int map_fd, size_t page_cnt, +		 perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx,  		 const struct perf_buffer_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_v0_6_0(int map_fd, size_t page_cnt, +			perf_buffer_sample_fn sample_cb, perf_buffer_lost_fn lost_cb, void *ctx, +			const struct perf_buffer_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new() instead") +struct perf_buffer *perf_buffer__new_deprecated(int map_fd, size_t page_cnt, +						const struct perf_buffer_opts *opts); + +#define perf_buffer__new(...) ___libbpf_overload(___perf_buffer_new, __VA_ARGS__) +#define ___perf_buffer_new6(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) \ +	perf_buffer__new(map_fd, page_cnt, sample_cb, lost_cb, ctx, opts) +#define ___perf_buffer_new3(map_fd, page_cnt, opts) \ +	perf_buffer__new_deprecated(map_fd, page_cnt, opts) +  enum bpf_perf_event_ret {  	LIBBPF_PERF_EVENT_DONE	= 0,  	LIBBPF_PERF_EVENT_ERROR	= -1, @@ -800,12 +971,21 @@ typedef enum bpf_perf_event_ret  /* raw perf buffer options, giving most power and control */  struct perf_buffer_raw_opts { -	/* perf event attrs passed directly into perf_event_open() */ -	struct perf_event_attr *attr; -	/* raw event callback */ -	perf_buffer_event_fn event_cb; -	/* ctx is provided to event_cb */ -	void *ctx; +	union { +		struct { +			size_t sz; +			long :0; +			long :0; +		}; +		struct { /* DEPRECATED: will be removed in v1.0 */ +			/* perf event attrs passed directly into perf_event_open() */ +			struct perf_event_attr *attr; +			/* raw event callback */ +			perf_buffer_event_fn event_cb; +			/* ctx is provided to event_cb */ +			void *ctx; +		}; +	};  	/* if cpu_cnt == 0, open all on all possible CPUs (up to the number of  	 * max_entries of given PERF_EVENT_ARRAY map)  	 */ @@ -815,11 +995,28 @@ struct perf_buffer_raw_opts {  	/* if cpu_cnt > 0, map_keys specify map keys to set per-CPU FDs for */  	int *map_keys;  }; +#define perf_buffer_raw_opts__last_field map_keys  LIBBPF_API struct perf_buffer * -perf_buffer__new_raw(int map_fd, size_t page_cnt, +perf_buffer__new_raw(int map_fd, size_t page_cnt, struct perf_event_attr *attr, +		     perf_buffer_event_fn event_cb, void *ctx,  		     const struct perf_buffer_raw_opts *opts); +LIBBPF_API struct perf_buffer * +perf_buffer__new_raw_v0_6_0(int map_fd, size_t page_cnt, struct perf_event_attr *attr, +			    perf_buffer_event_fn event_cb, void *ctx, +			    const struct perf_buffer_raw_opts *opts); + +LIBBPF_API LIBBPF_DEPRECATED_SINCE(0, 7, "use new variant of perf_buffer__new_raw() instead") +struct perf_buffer *perf_buffer__new_raw_deprecated(int map_fd, size_t page_cnt, +						    const struct perf_buffer_raw_opts *opts); + +#define perf_buffer__new_raw(...) ___libbpf_overload(___perf_buffer_new_raw, __VA_ARGS__) +#define ___perf_buffer_new_raw6(map_fd, page_cnt, attr, event_cb, ctx, opts) \ +	perf_buffer__new_raw(map_fd, page_cnt, attr, event_cb, ctx, opts) +#define ___perf_buffer_new_raw3(map_fd, page_cnt, opts) \ +	perf_buffer__new_raw_deprecated(map_fd, page_cnt, opts) +  LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);  LIBBPF_API int perf_buffer__epoll_fd(const struct perf_buffer *pb);  LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms); @@ -831,6 +1028,7 @@ LIBBPF_API int perf_buffer__buffer_fd(const struct perf_buffer *pb, size_t buf_i  typedef enum bpf_perf_event_ret  	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,  				  void *private_data); +LIBBPF_DEPRECATED_SINCE(0, 8, "use perf_buffer__poll() or  perf_buffer__consume() instead")  LIBBPF_API enum bpf_perf_event_ret  bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size,  			   void **copy_mem, size_t *copy_size, @@ -857,13 +1055,57 @@ bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo,   * user, causing subsequent probes to fail. In this case, the caller may want   * to adjust that limit with setrlimit().   */ -LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, -				    __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_prog_type() instead") +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_map_type() instead")  LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); -LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, -				 enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "use libbpf_probe_bpf_helper() instead") +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, __u32 ifindex); +LIBBPF_DEPRECATED_SINCE(0, 8, "implement your own or use bpftool for feature detection")  LIBBPF_API bool bpf_probe_large_insn_limit(__u32 ifindex); +/** + * @brief **libbpf_probe_bpf_prog_type()** detects if host kernel supports + * BPF programs of a given type. + * @param prog_type BPF program type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given program type is supported; 0, if given program type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_map_type()** detects if host kernel supports + * BPF maps of a given type. + * @param map_type BPF map type to detect kernel support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given map type is supported; 0, if given map type is + * not supported; negative error code if feature detection failed or can't be + * performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts); +/** + * @brief **libbpf_probe_bpf_helper()** detects if host kernel supports the + * use of a given BPF helper from specified BPF program type. + * @param prog_type BPF program type used to check the support of BPF helper + * @param helper_id BPF helper ID (enum bpf_func_id) to check support for + * @param opts reserved for future extensibility, should be NULL + * @return 1, if given combination of program type and helper is supported; 0, + * if the combination is not supported; negative error code if feature + * detection for provided input arguments failed or can't be performed + * + * Make sure the process has required set of CAP_* permissions (or runs as + * root) when performing feature checking. + */ +LIBBPF_API int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, +				       enum bpf_func_id helper_id, const void *opts); +  /*   * Get bpf_prog_info in continuous memory   * @@ -918,12 +1160,15 @@ struct bpf_prog_info_linear {  	__u8			data[];  }; +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")  LIBBPF_API struct bpf_prog_info_linear *  bpf_program__get_prog_info_linear(int fd, __u64 arrays); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")  LIBBPF_API void  bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); +LIBBPF_DEPRECATED_SINCE(0, 6, "use a custom linear prog_info wrapper")  LIBBPF_API void  bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); @@ -965,11 +1210,11 @@ struct bpf_object_skeleton {  	struct bpf_object **obj;  	int map_cnt; -	int map_skel_sz; /* sizeof(struct bpf_skeleton_map) */ +	int map_skel_sz; /* sizeof(struct bpf_map_skeleton) */  	struct bpf_map_skeleton *maps;  	int prog_cnt; -	int prog_skel_sz; /* sizeof(struct bpf_skeleton_prog) */ +	int prog_skel_sz; /* sizeof(struct bpf_prog_skeleton) */  	struct bpf_prog_skeleton *progs;  }; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index 43580eb47740..529783967793 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -391,14 +391,44 @@ LIBBPF_0.6.0 {  	global:  		bpf_map__map_extra;  		bpf_map__set_map_extra; +		bpf_map_create;  		bpf_object__next_map;  		bpf_object__next_program;  		bpf_object__prev_map;  		bpf_object__prev_program; +		bpf_prog_load_deprecated; +		bpf_prog_load; +		bpf_program__flags;  		bpf_program__insn_cnt;  		bpf_program__insns; +		bpf_program__set_flags;  		btf__add_btf;  		btf__add_decl_tag; +		btf__add_type_tag; +		btf__dedup; +		btf__dedup_deprecated;  		btf__raw_data;  		btf__type_cnt; +		btf_dump__new; +		btf_dump__new_deprecated; +		libbpf_major_version; +		libbpf_minor_version; +		libbpf_version_string; +		perf_buffer__new; +		perf_buffer__new_deprecated; +		perf_buffer__new_raw; +		perf_buffer__new_raw_deprecated;  } LIBBPF_0.5.0; + +LIBBPF_0.7.0 { +	global: +		bpf_btf_load; +		bpf_program__log_buf; +		bpf_program__log_level; +		bpf_program__set_log_buf; +		bpf_program__set_log_level; +		libbpf_probe_bpf_helper; +		libbpf_probe_bpf_map_type; +		libbpf_probe_bpf_prog_type; +		libbpf_set_memlock_rlim_max; +}; diff --git a/tools/lib/bpf/libbpf_common.h b/tools/lib/bpf/libbpf_common.h index aaa1efbf6f51..000e37798ff2 100644 --- a/tools/lib/bpf/libbpf_common.h +++ b/tools/lib/bpf/libbpf_common.h @@ -40,6 +40,23 @@  #else  #define __LIBBPF_MARK_DEPRECATED_0_7(X)  #endif +#if __LIBBPF_CURRENT_VERSION_GEQ(0, 8) +#define __LIBBPF_MARK_DEPRECATED_0_8(X) X +#else +#define __LIBBPF_MARK_DEPRECATED_0_8(X) +#endif + +/* This set of internal macros allows to do "function overloading" based on + * number of arguments provided by used in backwards-compatible way during the + * transition to libbpf 1.0 + * It's ugly but necessary evil that will be cleaned up when we get to 1.0. + * See bpf_prog_load() overload for example. + */ +#define ___libbpf_cat(A, B) A ## B +#define ___libbpf_select(NAME, NUM) ___libbpf_cat(NAME, NUM) +#define ___libbpf_nth(_1, _2, _3, _4, _5, _6, N, ...) N +#define ___libbpf_cnt(...) ___libbpf_nth(__VA_ARGS__, 6, 5, 4, 3, 2, 1) +#define ___libbpf_overload(NAME, ...) ___libbpf_select(NAME, ___libbpf_cnt(__VA_ARGS__))(__VA_ARGS__)  /* Helper macro to declare and initialize libbpf options struct   * @@ -54,7 +71,7 @@   * including any extra padding, it with memset() and then assigns initial   * values provided by users in struct initializer-syntax as varargs.   */ -#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \ +#define LIBBPF_OPTS(TYPE, NAME, ...)					    \  	struct TYPE NAME = ({ 						    \  		memset(&NAME, 0, sizeof(struct TYPE));			    \  		(struct TYPE) {						    \ diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index aeb79e3a8ff9..1565679eb432 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -73,6 +73,8 @@  	BTF_TYPE_ENC(name, BTF_INFO_ENC(BTF_KIND_FLOAT, 0, 0), sz)  #define BTF_TYPE_DECL_TAG_ENC(value, type, component_idx) \  	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_DECL_TAG, 0, 0), type), (component_idx) +#define BTF_TYPE_TYPE_TAG_ENC(value, type) \ +	BTF_TYPE_ENC(value, BTF_INFO_ENC(BTF_KIND_TYPE_TAG, 0, 0), type)  #ifndef likely  #define likely(x) __builtin_expect(!!(x), 1) @@ -167,10 +169,31 @@ static inline void *libbpf_reallocarray(void *ptr, size_t nmemb, size_t size)  	return realloc(ptr, total);  } +/* Copy up to sz - 1 bytes from zero-terminated src string and ensure that dst + * is zero-terminated string no matter what (unless sz == 0, in which case + * it's a no-op). It's conceptually close to FreeBSD's strlcpy(), but differs + * in what is returned. Given this is internal helper, it's trivial to extend + * this, when necessary. Use this instead of strncpy inside libbpf source code. + */ +static inline void libbpf_strlcpy(char *dst, const char *src, size_t sz) +{ +	size_t i; + +	if (sz == 0) +		return; + +	sz--; +	for (i = 0; i < sz && src[i]; i++) +		dst[i] = src[i]; +	dst[i] = '\0'; +} + +__u32 get_kernel_version(void); +  struct btf;  struct btf_type; -struct btf_type *btf_type_by_id(struct btf *btf, __u32 type_id); +struct btf_type *btf_type_by_id(const struct btf *btf, __u32 type_id);  const char *btf_kind_str(const struct btf_type *t);  const struct btf_type *skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id); @@ -270,63 +293,51 @@ static inline bool libbpf_validate_opts(const char *opts,  					(opts)->sz - __off);		      \  }) +enum kern_feature_id { +	/* v4.14: kernel support for program & map names. */ +	FEAT_PROG_NAME, +	/* v5.2: kernel support for global data sections. */ +	FEAT_GLOBAL_DATA, +	/* BTF support */ +	FEAT_BTF, +	/* BTF_KIND_FUNC and BTF_KIND_FUNC_PROTO support */ +	FEAT_BTF_FUNC, +	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */ +	FEAT_BTF_DATASEC, +	/* BTF_FUNC_GLOBAL is supported */ +	FEAT_BTF_GLOBAL_FUNC, +	/* BPF_F_MMAPABLE is supported for arrays */ +	FEAT_ARRAY_MMAP, +	/* kernel support for expected_attach_type in BPF_PROG_LOAD */ +	FEAT_EXP_ATTACH_TYPE, +	/* bpf_probe_read_{kernel,user}[_str] helpers */ +	FEAT_PROBE_READ_KERN, +	/* BPF_PROG_BIND_MAP is supported */ +	FEAT_PROG_BIND_MAP, +	/* Kernel support for module BTFs */ +	FEAT_MODULE_BTF, +	/* BTF_KIND_FLOAT support */ +	FEAT_BTF_FLOAT, +	/* BPF perf link support */ +	FEAT_PERF_LINK, +	/* BTF_KIND_DECL_TAG support */ +	FEAT_BTF_DECL_TAG, +	/* BTF_KIND_TYPE_TAG support */ +	FEAT_BTF_TYPE_TAG, +	/* memcg-based accounting for BPF maps and progs */ +	FEAT_MEMCG_ACCOUNT, +	__FEAT_CNT, +}; + +int probe_memcg_account(void); +bool kernel_supports(const struct bpf_object *obj, enum kern_feature_id feat_id); +int bump_rlimit_memlock(void);  int parse_cpu_mask_str(const char *s, bool **mask, int *mask_sz);  int parse_cpu_mask_file(const char *fcpu, bool **mask, int *mask_sz);  int libbpf__load_raw_btf(const char *raw_types, size_t types_len,  			 const char *str_sec, size_t str_len); - -struct bpf_prog_load_params { -	enum bpf_prog_type prog_type; -	enum bpf_attach_type expected_attach_type; -	const char *name; -	const struct bpf_insn *insns; -	size_t insn_cnt; -	const char *license; -	__u32 kern_version; -	__u32 attach_prog_fd; -	__u32 attach_btf_obj_fd; -	__u32 attach_btf_id; -	__u32 prog_ifindex; -	__u32 prog_btf_fd; -	__u32 prog_flags; - -	__u32 func_info_rec_size; -	const void *func_info; -	__u32 func_info_cnt; - -	__u32 line_info_rec_size; -	const void *line_info; -	__u32 line_info_cnt; - -	__u32 log_level; -	char *log_buf; -	size_t log_buf_sz; -	int *fd_array; -}; - -int libbpf__bpf_prog_load(const struct bpf_prog_load_params *load_attr); - -struct bpf_create_map_params { -	const char *name; -	enum bpf_map_type map_type; -	__u32 map_flags; -	__u32 key_size; -	__u32 value_size; -	__u32 max_entries; -	__u32 numa_node; -	__u32 btf_fd; -	__u32 btf_key_type_id; -	__u32 btf_value_type_id; -	__u32 map_ifindex; -	union { -		__u32 inner_map_fd; -		__u32 btf_vmlinux_value_type_id; -	}; -	__u64 map_extra; -}; - -int libbpf__bpf_create_map_xattr(const struct bpf_create_map_params *create_attr); +int btf_load_into_kernel(struct btf *btf, char *log_buf, size_t log_sz, __u32 log_level);  struct btf *btf_get_from_fd(int btf_fd, struct btf *base_btf);  void btf_get_kernel_prefix_kind(enum bpf_attach_type attach_type, diff --git a/tools/lib/bpf/libbpf_legacy.h b/tools/lib/bpf/libbpf_legacy.h index 5ba5c9beccfa..79131f761a27 100644 --- a/tools/lib/bpf/libbpf_legacy.h +++ b/tools/lib/bpf/libbpf_legacy.h @@ -45,7 +45,6 @@ enum libbpf_strict_mode {  	 * (positive) error code.  	 */  	LIBBPF_STRICT_DIRECT_ERRS = 0x02, -  	/*  	 * Enforce strict BPF program section (SEC()) names.  	 * E.g., while prefiously SEC("xdp_whatever") or SEC("perf_event_blah") were @@ -63,12 +62,24 @@ enum libbpf_strict_mode {  	 * Clients can maintain it on their own if it is valuable for them.  	 */  	LIBBPF_STRICT_NO_OBJECT_LIST = 0x08, +	/* +	 * Automatically bump RLIMIT_MEMLOCK using setrlimit() before the +	 * first BPF program or map creation operation. This is done only if +	 * kernel is too old to support memcg-based memory accounting for BPF +	 * subsystem. By default, RLIMIT_MEMLOCK limit is set to RLIM_INFINITY, +	 * but it can be overriden with libbpf_set_memlock_rlim_max() API. +	 * Note that libbpf_set_memlock_rlim_max() needs to be called before +	 * the very first bpf_prog_load(), bpf_map_create() or bpf_object__load() +	 * operation. +	 */ +	LIBBPF_STRICT_AUTO_RLIMIT_MEMLOCK = 0x10,  	__LIBBPF_STRICT_LAST,  };  LIBBPF_API int libbpf_set_strict_mode(enum libbpf_strict_mode mode); +#define DECLARE_LIBBPF_OPTS LIBBPF_OPTS  #ifdef __cplusplus  } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 68f2dbf364aa..97b06cede56f 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -48,41 +48,65 @@ static int get_vendor_id(int ifindex)  	return strtol(buf, NULL, 0);  } -static int get_kernel_version(void) +static int probe_prog_load(enum bpf_prog_type prog_type, +			   const struct bpf_insn *insns, size_t insns_cnt, +			   char *log_buf, size_t log_buf_sz, +			   __u32 ifindex)  { -	int version, subversion, patchlevel; -	struct utsname utsn; - -	/* Return 0 on failure, and attempt to probe with empty kversion */ -	if (uname(&utsn)) -		return 0; - -	if (sscanf(utsn.release, "%d.%d.%d", -		   &version, &subversion, &patchlevel) != 3) -		return 0; - -	return (version << 16) + (subversion << 8) + patchlevel; -} - -static void -probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, -	   size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) -{ -	struct bpf_load_program_attr xattr = {}; -	int fd; +	LIBBPF_OPTS(bpf_prog_load_opts, opts, +		.log_buf = log_buf, +		.log_size = log_buf_sz, +		.log_level = log_buf ? 1 : 0, +		.prog_ifindex = ifindex, +	); +	int fd, err, exp_err = 0; +	const char *exp_msg = NULL; +	char buf[4096];  	switch (prog_type) {  	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: -		xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; +		opts.expected_attach_type = BPF_CGROUP_INET4_CONNECT;  		break;  	case BPF_PROG_TYPE_CGROUP_SOCKOPT: -		xattr.expected_attach_type = BPF_CGROUP_GETSOCKOPT; +		opts.expected_attach_type = BPF_CGROUP_GETSOCKOPT;  		break;  	case BPF_PROG_TYPE_SK_LOOKUP: -		xattr.expected_attach_type = BPF_SK_LOOKUP; +		opts.expected_attach_type = BPF_SK_LOOKUP;  		break;  	case BPF_PROG_TYPE_KPROBE: -		xattr.kern_version = get_kernel_version(); +		opts.kern_version = get_kernel_version(); +		break; +	case BPF_PROG_TYPE_LIRC_MODE2: +		opts.expected_attach_type = BPF_LIRC_MODE2; +		break; +	case BPF_PROG_TYPE_TRACING: +	case BPF_PROG_TYPE_LSM: +		opts.log_buf = buf; +		opts.log_size = sizeof(buf); +		opts.log_level = 1; +		if (prog_type == BPF_PROG_TYPE_TRACING) +			opts.expected_attach_type = BPF_TRACE_FENTRY; +		else +			opts.expected_attach_type = BPF_MODIFY_RETURN; +		opts.attach_btf_id = 1; + +		exp_err = -EINVAL; +		exp_msg = "attach_btf_id 1 is not a function"; +		break; +	case BPF_PROG_TYPE_EXT: +		opts.log_buf = buf; +		opts.log_size = sizeof(buf); +		opts.log_level = 1; +		opts.attach_btf_id = 1; + +		exp_err = -EINVAL; +		exp_msg = "Cannot replace kernel functions"; +		break; +	case BPF_PROG_TYPE_SYSCALL: +		opts.prog_flags = BPF_F_SLEEPABLE; +		break; +	case BPF_PROG_TYPE_STRUCT_OPS: +		exp_err = -524; /* -ENOTSUPP */  		break;  	case BPF_PROG_TYPE_UNSPEC:  	case BPF_PROG_TYPE_SOCKET_FILTER: @@ -103,27 +127,42 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,  	case BPF_PROG_TYPE_RAW_TRACEPOINT:  	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:  	case BPF_PROG_TYPE_LWT_SEG6LOCAL: -	case BPF_PROG_TYPE_LIRC_MODE2:  	case BPF_PROG_TYPE_SK_REUSEPORT:  	case BPF_PROG_TYPE_FLOW_DISSECTOR:  	case BPF_PROG_TYPE_CGROUP_SYSCTL: -	case BPF_PROG_TYPE_TRACING: -	case BPF_PROG_TYPE_STRUCT_OPS: -	case BPF_PROG_TYPE_EXT: -	case BPF_PROG_TYPE_LSM: -	default:  		break; +	default: +		return -EOPNOTSUPP;  	} -	xattr.prog_type = prog_type; -	xattr.insns = insns; -	xattr.insns_cnt = insns_cnt; -	xattr.license = "GPL"; -	xattr.prog_ifindex = ifindex; - -	fd = bpf_load_program_xattr(&xattr, buf, buf_len); +	fd = bpf_prog_load(prog_type, NULL, "GPL", insns, insns_cnt, &opts); +	err = -errno;  	if (fd >= 0)  		close(fd); +	if (exp_err) { +		if (fd >= 0 || err != exp_err) +			return 0; +		if (exp_msg && !strstr(buf, exp_msg)) +			return 0; +		return 1; +	} +	return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_prog_type(enum bpf_prog_type prog_type, const void *opts) +{ +	struct bpf_insn insns[] = { +		BPF_MOV64_IMM(BPF_REG_0, 0), +		BPF_EXIT_INSN() +	}; +	const size_t insn_cnt = ARRAY_SIZE(insns); +	int ret; + +	if (opts) +		return libbpf_err(-EINVAL); + +	ret = probe_prog_load(prog_type, insns, insn_cnt, NULL, 0, 0); +	return libbpf_err(ret);  }  bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) @@ -133,12 +172,16 @@ bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex)  		BPF_EXIT_INSN()  	}; +	/* prefer libbpf_probe_bpf_prog_type() unless offload is requested */ +	if (ifindex == 0) +		return libbpf_probe_bpf_prog_type(prog_type, NULL) == 1; +  	if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS)  		/* nfp returns -EINVAL on exit(0) with TC offload */  		insns[0].imm = 2;  	errno = 0; -	probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); +	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex);  	return errno != EINVAL && errno != EOPNOTSUPP;  } @@ -166,7 +209,7 @@ int libbpf__load_raw_btf(const char *raw_types, size_t types_len,  	memcpy(raw_btf + hdr.hdr_len, raw_types, hdr.type_len);  	memcpy(raw_btf + hdr.hdr_len + hdr.type_len, str_sec, hdr.str_len); -	btf_fd = bpf_load_btf(raw_btf, btf_len, NULL, 0, false); +	btf_fd = bpf_btf_load(raw_btf, btf_len, NULL);  	free(raw_btf);  	return btf_fd; @@ -199,17 +242,18 @@ static int load_local_storage_btf(void)  				     strs, sizeof(strs));  } -bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +static int probe_map_create(enum bpf_map_type map_type, __u32 ifindex)  { -	int key_size, value_size, max_entries, map_flags; +	LIBBPF_OPTS(bpf_map_create_opts, opts); +	int key_size, value_size, max_entries;  	__u32 btf_key_type_id = 0, btf_value_type_id = 0; -	struct bpf_create_map_attr attr = {}; -	int fd = -1, btf_fd = -1, fd_inner; +	int fd = -1, btf_fd = -1, fd_inner = -1, exp_err = 0, err; + +	opts.map_ifindex = ifindex;  	key_size	= sizeof(__u32);  	value_size	= sizeof(__u32);  	max_entries	= 1; -	map_flags	= 0;  	switch (map_type) {  	case BPF_MAP_TYPE_STACK_TRACE: @@ -218,7 +262,7 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)  	case BPF_MAP_TYPE_LPM_TRIE:  		key_size	= sizeof(__u64);  		value_size	= sizeof(__u64); -		map_flags	= BPF_F_NO_PREALLOC; +		opts.map_flags	= BPF_F_NO_PREALLOC;  		break;  	case BPF_MAP_TYPE_CGROUP_STORAGE:  	case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: @@ -237,17 +281,25 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)  		btf_value_type_id = 3;  		value_size = 8;  		max_entries = 0; -		map_flags = BPF_F_NO_PREALLOC; +		opts.map_flags = BPF_F_NO_PREALLOC;  		btf_fd = load_local_storage_btf();  		if (btf_fd < 0) -			return false; +			return btf_fd;  		break;  	case BPF_MAP_TYPE_RINGBUF:  		key_size = 0;  		value_size = 0;  		max_entries = 4096;  		break; -	case BPF_MAP_TYPE_UNSPEC: +	case BPF_MAP_TYPE_STRUCT_OPS: +		/* we'll get -ENOTSUPP for invalid BTF type ID for struct_ops */ +		opts.btf_vmlinux_value_type_id = 1; +		exp_err = -524; /* -ENOTSUPP */ +		break; +	case BPF_MAP_TYPE_BLOOM_FILTER: +		key_size = 0; +		max_entries = 1; +		break;  	case BPF_MAP_TYPE_HASH:  	case BPF_MAP_TYPE_ARRAY:  	case BPF_MAP_TYPE_PROG_ARRAY: @@ -266,9 +318,10 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)  	case BPF_MAP_TYPE_XSKMAP:  	case BPF_MAP_TYPE_SOCKHASH:  	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: -	case BPF_MAP_TYPE_STRUCT_OPS: -	default:  		break; +	case BPF_MAP_TYPE_UNSPEC: +	default: +		return -EOPNOTSUPP;  	}  	if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || @@ -277,37 +330,102 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)  		 * map-in-map for offload  		 */  		if (ifindex) -			return false; +			goto cleanup; -		fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, -					  sizeof(__u32), sizeof(__u32), 1, 0); +		fd_inner = bpf_map_create(BPF_MAP_TYPE_HASH, NULL, +					  sizeof(__u32), sizeof(__u32), 1, NULL);  		if (fd_inner < 0) -			return false; -		fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), -					   fd_inner, 1, 0); -		close(fd_inner); -	} else { -		/* Note: No other restriction on map type probes for offload */ -		attr.map_type = map_type; -		attr.key_size = key_size; -		attr.value_size = value_size; -		attr.max_entries = max_entries; -		attr.map_flags = map_flags; -		attr.map_ifindex = ifindex; -		if (btf_fd >= 0) { -			attr.btf_fd = btf_fd; -			attr.btf_key_type_id = btf_key_type_id; -			attr.btf_value_type_id = btf_value_type_id; -		} +			goto cleanup; + +		opts.inner_map_fd = fd_inner; +	} -		fd = bpf_create_map_xattr(&attr); +	if (btf_fd >= 0) { +		opts.btf_fd = btf_fd; +		opts.btf_key_type_id = btf_key_type_id; +		opts.btf_value_type_id = btf_value_type_id;  	} + +	fd = bpf_map_create(map_type, NULL, key_size, value_size, max_entries, &opts); +	err = -errno; + +cleanup:  	if (fd >= 0)  		close(fd); +	if (fd_inner >= 0) +		close(fd_inner);  	if (btf_fd >= 0)  		close(btf_fd); -	return fd >= 0; +	if (exp_err) +		return fd < 0 && err == exp_err ? 1 : 0; +	else +		return fd >= 0 ? 1 : 0; +} + +int libbpf_probe_bpf_map_type(enum bpf_map_type map_type, const void *opts) +{ +	int ret; + +	if (opts) +		return libbpf_err(-EINVAL); + +	ret = probe_map_create(map_type, 0); +	return libbpf_err(ret); +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ +	return probe_map_create(map_type, ifindex) == 1; +} + +int libbpf_probe_bpf_helper(enum bpf_prog_type prog_type, enum bpf_func_id helper_id, +			    const void *opts) +{ +	struct bpf_insn insns[] = { +		BPF_EMIT_CALL((__u32)helper_id), +		BPF_EXIT_INSN(), +	}; +	const size_t insn_cnt = ARRAY_SIZE(insns); +	char buf[4096]; +	int ret; + +	if (opts) +		return libbpf_err(-EINVAL); + +	/* we can't successfully load all prog types to check for BPF helper +	 * support, so bail out with -EOPNOTSUPP error +	 */ +	switch (prog_type) { +	case BPF_PROG_TYPE_TRACING: +	case BPF_PROG_TYPE_EXT: +	case BPF_PROG_TYPE_LSM: +	case BPF_PROG_TYPE_STRUCT_OPS: +		return -EOPNOTSUPP; +	default: +		break; +	} + +	buf[0] = '\0'; +	ret = probe_prog_load(prog_type, insns, insn_cnt, buf, sizeof(buf), 0); +	if (ret < 0) +		return libbpf_err(ret); + +	/* If BPF verifier doesn't recognize BPF helper ID (enum bpf_func_id) +	 * at all, it will emit something like "invalid func unknown#181". +	 * If BPF verifier recognizes BPF helper but it's not supported for +	 * given BPF program type, it will emit "unknown func bpf_sys_bpf#166". +	 * In both cases, provided combination of BPF program type and BPF +	 * helper is not supported by the kernel. +	 * In all other cases, probe_prog_load() above will either succeed (e.g., +	 * because BPF helper happens to accept no input arguments or it +	 * accepts one input argument and initial PTR_TO_CTX is fine for +	 * that), or we'll get some more specific BPF verifier error about +	 * some unsatisfied conditions. +	 */ +	if (ret == 0 && (strstr(buf, "invalid func ") || strstr(buf, "unknown func "))) +		return 0; +	return 1; /* assume supported */  }  bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, @@ -320,8 +438,7 @@ bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type,  	char buf[4096] = {};  	bool res; -	probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), -		   ifindex); +	probe_prog_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), ifindex);  	res = !grep(buf, "invalid func ") && !grep(buf, "unknown func ");  	if (ifindex) { @@ -353,8 +470,8 @@ bool bpf_probe_large_insn_limit(__u32 ifindex)  	insns[BPF_MAXINSNS] = BPF_EXIT_INSN();  	errno = 0; -	probe_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, -		   ifindex); +	probe_prog_load(BPF_PROG_TYPE_SCHED_CLS, insns, ARRAY_SIZE(insns), NULL, 0, +			ifindex);  	return errno != E2BIG && errno != EINVAL;  } diff --git a/tools/lib/bpf/libbpf_version.h b/tools/lib/bpf/libbpf_version.h index dd56d76f291c..0fefefc3500b 100644 --- a/tools/lib/bpf/libbpf_version.h +++ b/tools/lib/bpf/libbpf_version.h @@ -4,6 +4,6 @@  #define __LIBBPF_VERSION_H  #define LIBBPF_MAJOR_VERSION 0 -#define LIBBPF_MINOR_VERSION 6 +#define LIBBPF_MINOR_VERSION 7  #endif /* __LIBBPF_VERSION_H */ diff --git a/tools/lib/bpf/linker.c b/tools/lib/bpf/linker.c index f677dccdeae4..9aa016fb55aa 100644 --- a/tools/lib/bpf/linker.c +++ b/tools/lib/bpf/linker.c @@ -210,6 +210,7 @@ void bpf_linker__free(struct bpf_linker *linker)  	}  	free(linker->secs); +	free(linker->glob_syms);  	free(linker);  } @@ -1999,7 +2000,7 @@ add_sym:  static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *obj)  {  	struct src_sec *src_symtab = &obj->secs[obj->symtab_sec_idx]; -	struct dst_sec *dst_symtab = &linker->secs[linker->symtab_sec_idx]; +	struct dst_sec *dst_symtab;  	int i, err;  	for (i = 1; i < obj->sec_cnt; i++) { @@ -2032,6 +2033,9 @@ static int linker_append_elf_relos(struct bpf_linker *linker, struct src_obj *ob  			return -1;  		} +		/* add_dst_sec() above could have invalidated linker->secs */ +		dst_symtab = &linker->secs[linker->symtab_sec_idx]; +  		/* shdr->sh_link points to SYMTAB */  		dst_sec->shdr->sh_link = linker->symtab_sec_idx; @@ -2650,6 +2654,7 @@ static int emit_elf_data_sec(struct bpf_linker *linker, const char *sec_name,  static int finalize_btf(struct bpf_linker *linker)  { +	LIBBPF_OPTS(btf_dedup_opts, opts);  	struct btf *btf = linker->btf;  	const void *raw_data;  	int i, j, id, err; @@ -2686,7 +2691,8 @@ static int finalize_btf(struct bpf_linker *linker)  		return err;  	} -	err = btf__dedup(linker->btf, linker->btf_ext, NULL); +	opts.btf_ext = linker->btf_ext; +	err = btf__dedup(linker->btf, &opts);  	if (err) {  		pr_warn("BTF dedup failed: %d\n", err);  		return err; diff --git a/tools/lib/bpf/relo_core.c b/tools/lib/bpf/relo_core.c index b5b8956a1be8..910865e29edc 100644 --- a/tools/lib/bpf/relo_core.c +++ b/tools/lib/bpf/relo_core.c @@ -1,6 +1,60 @@  // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)  /* Copyright (c) 2019 Facebook */ +#ifdef __KERNEL__ +#include <linux/bpf.h> +#include <linux/btf.h> +#include <linux/string.h> +#include <linux/bpf_verifier.h> +#include "relo_core.h" + +static const char *btf_kind_str(const struct btf_type *t) +{ +	return btf_type_str(t); +} + +static bool is_ldimm64_insn(struct bpf_insn *insn) +{ +	return insn->code == (BPF_LD | BPF_IMM | BPF_DW); +} + +static const struct btf_type * +skip_mods_and_typedefs(const struct btf *btf, u32 id, u32 *res_id) +{ +	return btf_type_skip_modifiers(btf, id, res_id); +} + +static const char *btf__name_by_offset(const struct btf *btf, u32 offset) +{ +	return btf_name_by_offset(btf, offset); +} + +static s64 btf__resolve_size(const struct btf *btf, u32 type_id) +{ +	const struct btf_type *t; +	int size; + +	t = btf_type_by_id(btf, type_id); +	t = btf_resolve_size(btf, t, &size); +	if (IS_ERR(t)) +		return PTR_ERR(t); +	return size; +} + +enum libbpf_print_level { +	LIBBPF_WARN, +	LIBBPF_INFO, +	LIBBPF_DEBUG, +}; + +#undef pr_warn +#undef pr_info +#undef pr_debug +#define pr_warn(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_info(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define pr_debug(fmt, log, ...)	bpf_log((void *)log, fmt, "", ##__VA_ARGS__) +#define libbpf_print(level, fmt, ...)	bpf_log((void *)prog_name, fmt, ##__VA_ARGS__) +#else  #include <stdio.h>  #include <string.h>  #include <errno.h> @@ -12,33 +66,7 @@  #include "btf.h"  #include "str_error.h"  #include "libbpf_internal.h" - -#define BPF_CORE_SPEC_MAX_LEN 64 - -/* represents BPF CO-RE field or array element accessor */ -struct bpf_core_accessor { -	__u32 type_id;		/* struct/union type or array element type */ -	__u32 idx;		/* field index or array index */ -	const char *name;	/* field name or NULL for array accessor */ -}; - -struct bpf_core_spec { -	const struct btf *btf; -	/* high-level spec: named fields and array indices only */ -	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; -	/* original unresolved (no skip_mods_or_typedefs) root type ID */ -	__u32 root_type_id; -	/* CO-RE relocation kind */ -	enum bpf_core_relo_kind relo_kind; -	/* high-level spec length */ -	int len; -	/* raw, low-level spec: 1-to-1 with accessor spec string */ -	int raw_spec[BPF_CORE_SPEC_MAX_LEN]; -	/* raw spec length */ -	int raw_len; -	/* field bit offset represented by spec */ -	__u32 bit_offset; -}; +#endif  static bool is_flex_arr(const struct btf *btf,  			const struct bpf_core_accessor *acc, @@ -51,25 +79,25 @@ static bool is_flex_arr(const struct btf *btf,  		return false;  	/* has to be the last member of enclosing struct */ -	t = btf__type_by_id(btf, acc->type_id); +	t = btf_type_by_id(btf, acc->type_id);  	return acc->idx == btf_vlen(t) - 1;  }  static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)  {  	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: return "byte_off"; -	case BPF_FIELD_BYTE_SIZE: return "byte_sz"; -	case BPF_FIELD_EXISTS: return "field_exists"; -	case BPF_FIELD_SIGNED: return "signed"; -	case BPF_FIELD_LSHIFT_U64: return "lshift_u64"; -	case BPF_FIELD_RSHIFT_U64: return "rshift_u64"; -	case BPF_TYPE_ID_LOCAL: return "local_type_id"; -	case BPF_TYPE_ID_TARGET: return "target_type_id"; -	case BPF_TYPE_EXISTS: return "type_exists"; -	case BPF_TYPE_SIZE: return "type_size"; -	case BPF_ENUMVAL_EXISTS: return "enumval_exists"; -	case BPF_ENUMVAL_VALUE: return "enumval_value"; +	case BPF_CORE_FIELD_BYTE_OFFSET: return "byte_off"; +	case BPF_CORE_FIELD_BYTE_SIZE: return "byte_sz"; +	case BPF_CORE_FIELD_EXISTS: return "field_exists"; +	case BPF_CORE_FIELD_SIGNED: return "signed"; +	case BPF_CORE_FIELD_LSHIFT_U64: return "lshift_u64"; +	case BPF_CORE_FIELD_RSHIFT_U64: return "rshift_u64"; +	case BPF_CORE_TYPE_ID_LOCAL: return "local_type_id"; +	case BPF_CORE_TYPE_ID_TARGET: return "target_type_id"; +	case BPF_CORE_TYPE_EXISTS: return "type_exists"; +	case BPF_CORE_TYPE_SIZE: return "type_size"; +	case BPF_CORE_ENUMVAL_EXISTS: return "enumval_exists"; +	case BPF_CORE_ENUMVAL_VALUE: return "enumval_value";  	default: return "unknown";  	}  } @@ -77,12 +105,12 @@ static const char *core_relo_kind_str(enum bpf_core_relo_kind kind)  static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)  {  	switch (kind) { -	case BPF_FIELD_BYTE_OFFSET: -	case BPF_FIELD_BYTE_SIZE: -	case BPF_FIELD_EXISTS: -	case BPF_FIELD_SIGNED: -	case BPF_FIELD_LSHIFT_U64: -	case BPF_FIELD_RSHIFT_U64: +	case BPF_CORE_FIELD_BYTE_OFFSET: +	case BPF_CORE_FIELD_BYTE_SIZE: +	case BPF_CORE_FIELD_EXISTS: +	case BPF_CORE_FIELD_SIGNED: +	case BPF_CORE_FIELD_LSHIFT_U64: +	case BPF_CORE_FIELD_RSHIFT_U64:  		return true;  	default:  		return false; @@ -92,10 +120,10 @@ static bool core_relo_is_field_based(enum bpf_core_relo_kind kind)  static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)  {  	switch (kind) { -	case BPF_TYPE_ID_LOCAL: -	case BPF_TYPE_ID_TARGET: -	case BPF_TYPE_EXISTS: -	case BPF_TYPE_SIZE: +	case BPF_CORE_TYPE_ID_LOCAL: +	case BPF_CORE_TYPE_ID_TARGET: +	case BPF_CORE_TYPE_EXISTS: +	case BPF_CORE_TYPE_SIZE:  		return true;  	default:  		return false; @@ -105,8 +133,8 @@ static bool core_relo_is_type_based(enum bpf_core_relo_kind kind)  static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)  {  	switch (kind) { -	case BPF_ENUMVAL_EXISTS: -	case BPF_ENUMVAL_VALUE: +	case BPF_CORE_ENUMVAL_EXISTS: +	case BPF_CORE_ENUMVAL_VALUE:  		return true;  	default:  		return false; @@ -150,7 +178,7 @@ static bool core_relo_is_enumval_based(enum bpf_core_relo_kind kind)   * Enum value-based relocations (ENUMVAL_EXISTS/ENUMVAL_VALUE) use access   * string to specify enumerator's value index that need to be relocated.   */ -static int bpf_core_parse_spec(const struct btf *btf, +static int bpf_core_parse_spec(const char *prog_name, const struct btf *btf,  			       __u32 type_id,  			       const char *spec_str,  			       enum bpf_core_relo_kind relo_kind, @@ -272,8 +300,8 @@ static int bpf_core_parse_spec(const struct btf *btf,  				return sz;  			spec->bit_offset += access_idx * sz * 8;  		} else { -			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", -				type_id, spec_str, i, id, btf_kind_str(t)); +			pr_warn("prog '%s': relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %s\n", +				prog_name, type_id, spec_str, i, id, btf_kind_str(t));  			return -EINVAL;  		}  	} @@ -346,8 +374,6 @@ recur:  		targ_id = btf_array(targ_type)->type;  		goto recur;  	default: -		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", -			btf_kind(local_type), local_id, targ_id);  		return 0;  	}  } @@ -388,7 +414,7 @@ static int bpf_core_match_member(const struct btf *local_btf,  		return 0;  	local_id = local_acc->type_id; -	local_type = btf__type_by_id(local_btf, local_id); +	local_type = btf_type_by_id(local_btf, local_id);  	local_member = btf_members(local_type) + local_acc->idx;  	local_name = btf__name_by_offset(local_btf, local_member->name_off); @@ -571,7 +597,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,  	*field_sz = 0; -	if (relo->kind == BPF_FIELD_EXISTS) { +	if (relo->kind == BPF_CORE_FIELD_EXISTS) {  		*val = spec ? 1 : 0;  		return 0;  	} @@ -580,11 +606,11 @@ static int bpf_core_calc_field_relo(const char *prog_name,  		return -EUCLEAN; /* request instruction poisoning */  	acc = &spec->spec[spec->len - 1]; -	t = btf__type_by_id(spec->btf, acc->type_id); +	t = btf_type_by_id(spec->btf, acc->type_id);  	/* a[n] accessor needs special handling */  	if (!acc->name) { -		if (relo->kind == BPF_FIELD_BYTE_OFFSET) { +		if (relo->kind == BPF_CORE_FIELD_BYTE_OFFSET) {  			*val = spec->bit_offset / 8;  			/* remember field size for load/store mem size */  			sz = btf__resolve_size(spec->btf, acc->type_id); @@ -592,7 +618,7 @@ static int bpf_core_calc_field_relo(const char *prog_name,  				return -EINVAL;  			*field_sz = sz;  			*type_id = acc->type_id; -		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) { +		} else if (relo->kind == BPF_CORE_FIELD_BYTE_SIZE) {  			sz = btf__resolve_size(spec->btf, acc->type_id);  			if (sz < 0)  				return -EINVAL; @@ -644,36 +670,36 @@ static int bpf_core_calc_field_relo(const char *prog_name,  		*validate = !bitfield;  	switch (relo->kind) { -	case BPF_FIELD_BYTE_OFFSET: +	case BPF_CORE_FIELD_BYTE_OFFSET:  		*val = byte_off;  		if (!bitfield) {  			*field_sz = byte_sz;  			*type_id = field_type_id;  		}  		break; -	case BPF_FIELD_BYTE_SIZE: +	case BPF_CORE_FIELD_BYTE_SIZE:  		*val = byte_sz;  		break; -	case BPF_FIELD_SIGNED: +	case BPF_CORE_FIELD_SIGNED:  		/* enums will be assumed unsigned */  		*val = btf_is_enum(mt) ||  		       (btf_int_encoding(mt) & BTF_INT_SIGNED);  		if (validate)  			*validate = true; /* signedness is never ambiguous */  		break; -	case BPF_FIELD_LSHIFT_U64: +	case BPF_CORE_FIELD_LSHIFT_U64:  #if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__  		*val = 64 - (bit_off + bit_sz - byte_off  * 8);  #else  		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8);  #endif  		break; -	case BPF_FIELD_RSHIFT_U64: +	case BPF_CORE_FIELD_RSHIFT_U64:  		*val = 64 - bit_sz;  		if (validate)  			*validate = true; /* right shift is never ambiguous */  		break; -	case BPF_FIELD_EXISTS: +	case BPF_CORE_FIELD_EXISTS:  	default:  		return -EOPNOTSUPP;  	} @@ -683,10 +709,14 @@ static int bpf_core_calc_field_relo(const char *prog_name,  static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,  				   const struct bpf_core_spec *spec, -				   __u32 *val) +				   __u32 *val, bool *validate)  {  	__s64 sz; +	/* by default, always check expected value in bpf_insn */ +	if (validate) +		*validate = true; +  	/* type-based relos return zero when target type is not found */  	if (!spec) {  		*val = 0; @@ -694,20 +724,25 @@ static int bpf_core_calc_type_relo(const struct bpf_core_relo *relo,  	}  	switch (relo->kind) { -	case BPF_TYPE_ID_TARGET: +	case BPF_CORE_TYPE_ID_TARGET:  		*val = spec->root_type_id; +		/* type ID, embedded in bpf_insn, might change during linking, +		 * so enforcing it is pointless +		 */ +		if (validate) +			*validate = false;  		break; -	case BPF_TYPE_EXISTS: +	case BPF_CORE_TYPE_EXISTS:  		*val = 1;  		break; -	case BPF_TYPE_SIZE: +	case BPF_CORE_TYPE_SIZE:  		sz = btf__resolve_size(spec->btf, spec->root_type_id);  		if (sz < 0)  			return -EINVAL;  		*val = sz;  		break; -	case BPF_TYPE_ID_LOCAL: -	/* BPF_TYPE_ID_LOCAL is handled specially and shouldn't get here */ +	case BPF_CORE_TYPE_ID_LOCAL: +	/* BPF_CORE_TYPE_ID_LOCAL is handled specially and shouldn't get here */  	default:  		return -EOPNOTSUPP;  	} @@ -723,13 +758,13 @@ static int bpf_core_calc_enumval_relo(const struct bpf_core_relo *relo,  	const struct btf_enum *e;  	switch (relo->kind) { -	case BPF_ENUMVAL_EXISTS: +	case BPF_CORE_ENUMVAL_EXISTS:  		*val = spec ? 1 : 0;  		break; -	case BPF_ENUMVAL_VALUE: +	case BPF_CORE_ENUMVAL_VALUE:  		if (!spec)  			return -EUCLEAN; /* request instruction poisoning */ -		t = btf__type_by_id(spec->btf, spec->spec[0].type_id); +		t = btf_type_by_id(spec->btf, spec->spec[0].type_id);  		e = btf_enum(t) + spec->spec[0].idx;  		*val = e->val;  		break; @@ -805,8 +840,8 @@ static int bpf_core_calc_relo(const char *prog_name,  		if (res->orig_sz != res->new_sz) {  			const struct btf_type *orig_t, *new_t; -			orig_t = btf__type_by_id(local_spec->btf, res->orig_type_id); -			new_t = btf__type_by_id(targ_spec->btf, res->new_type_id); +			orig_t = btf_type_by_id(local_spec->btf, res->orig_type_id); +			new_t = btf_type_by_id(targ_spec->btf, res->new_type_id);  			/* There are two use cases in which it's safe to  			 * adjust load/store's mem size: @@ -835,8 +870,8 @@ static int bpf_core_calc_relo(const char *prog_name,  			res->fail_memsz_adjust = true;  		}  	} else if (core_relo_is_type_based(relo->kind)) { -		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val); -		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val); +		err = bpf_core_calc_type_relo(relo, local_spec, &res->orig_val, &res->validate); +		err = err ?: bpf_core_calc_type_relo(relo, targ_spec, &res->new_val, NULL);  	} else if (core_relo_is_enumval_based(relo->kind)) {  		err = bpf_core_calc_enumval_relo(relo, local_spec, &res->orig_val);  		err = err ?: bpf_core_calc_enumval_relo(relo, targ_spec, &res->new_val); @@ -1045,7 +1080,7 @@ poison:   * [<type-id>] (<type-name>) + <raw-spec> => <offset>@<spec>,   * where <spec> is a C-syntax view of recorded field access, e.g.: x.a[3].b   */ -static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec) +static void bpf_core_dump_spec(const char *prog_name, int level, const struct bpf_core_spec *spec)  {  	const struct btf_type *t;  	const struct btf_enum *e; @@ -1054,7 +1089,7 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)  	int i;  	type_id = spec->root_type_id; -	t = btf__type_by_id(spec->btf, type_id); +	t = btf_type_by_id(spec->btf, type_id);  	s = btf__name_by_offset(spec->btf, t->name_off);  	libbpf_print(level, "[%u] %s %s", type_id, btf_kind_str(t), str_is_empty(s) ? "<anon>" : s); @@ -1147,9 +1182,12 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  			     const struct bpf_core_relo *relo,  			     int relo_idx,  			     const struct btf *local_btf, -			     struct bpf_core_cand_list *cands) +			     struct bpf_core_cand_list *cands, +			     struct bpf_core_spec *specs_scratch)  { -	struct bpf_core_spec local_spec, cand_spec, targ_spec = {}; +	struct bpf_core_spec *local_spec = &specs_scratch[0]; +	struct bpf_core_spec *cand_spec = &specs_scratch[1]; +	struct bpf_core_spec *targ_spec = &specs_scratch[2];  	struct bpf_core_relo_res cand_res, targ_res;  	const struct btf_type *local_type;  	const char *local_name; @@ -1158,10 +1196,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  	int i, j, err;  	local_id = relo->type_id; -	local_type = btf__type_by_id(local_btf, local_id); -	if (!local_type) -		return -EINVAL; - +	local_type = btf_type_by_id(local_btf, local_id);  	local_name = btf__name_by_offset(local_btf, local_type->name_off);  	if (!local_name)  		return -EINVAL; @@ -1170,7 +1205,8 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  	if (str_is_empty(spec_str))  		return -EINVAL; -	err = bpf_core_parse_spec(local_btf, local_id, spec_str, relo->kind, &local_spec); +	err = bpf_core_parse_spec(prog_name, local_btf, local_id, spec_str, +				  relo->kind, local_spec);  	if (err) {  		pr_warn("prog '%s': relo #%d: parsing [%d] %s %s + %s failed: %d\n",  			prog_name, relo_idx, local_id, btf_kind_str(local_type), @@ -1181,15 +1217,17 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  	pr_debug("prog '%s': relo #%d: kind <%s> (%d), spec is ", prog_name,  		 relo_idx, core_relo_kind_str(relo->kind), relo->kind); -	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec); +	bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, local_spec);  	libbpf_print(LIBBPF_DEBUG, "\n");  	/* TYPE_ID_LOCAL relo is special and doesn't need candidate search */ -	if (relo->kind == BPF_TYPE_ID_LOCAL) { -		targ_res.validate = true; +	if (relo->kind == BPF_CORE_TYPE_ID_LOCAL) { +		/* bpf_insn's imm value could get out of sync during linking */ +		memset(&targ_res, 0, sizeof(targ_res)); +		targ_res.validate = false;  		targ_res.poison = false; -		targ_res.orig_val = local_spec.root_type_id; -		targ_res.new_val = local_spec.root_type_id; +		targ_res.orig_val = local_spec->root_type_id; +		targ_res.new_val = local_spec->root_type_id;  		goto patch_insn;  	} @@ -1200,40 +1238,39 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  		return -EOPNOTSUPP;  	} -  	for (i = 0, j = 0; i < cands->len; i++) { -		err = bpf_core_spec_match(&local_spec, cands->cands[i].btf, -					  cands->cands[i].id, &cand_spec); +		err = bpf_core_spec_match(local_spec, cands->cands[i].btf, +					  cands->cands[i].id, cand_spec);  		if (err < 0) {  			pr_warn("prog '%s': relo #%d: error matching candidate #%d ",  				prog_name, relo_idx, i); -			bpf_core_dump_spec(LIBBPF_WARN, &cand_spec); +			bpf_core_dump_spec(prog_name, LIBBPF_WARN, cand_spec);  			libbpf_print(LIBBPF_WARN, ": %d\n", err);  			return err;  		}  		pr_debug("prog '%s': relo #%d: %s candidate #%d ", prog_name,  			 relo_idx, err == 0 ? "non-matching" : "matching", i); -		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec); +		bpf_core_dump_spec(prog_name, LIBBPF_DEBUG, cand_spec);  		libbpf_print(LIBBPF_DEBUG, "\n");  		if (err == 0)  			continue; -		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, &cand_spec, &cand_res); +		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, cand_spec, &cand_res);  		if (err)  			return err;  		if (j == 0) {  			targ_res = cand_res; -			targ_spec = cand_spec; -		} else if (cand_spec.bit_offset != targ_spec.bit_offset) { +			*targ_spec = *cand_spec; +		} else if (cand_spec->bit_offset != targ_spec->bit_offset) {  			/* if there are many field relo candidates, they  			 * should all resolve to the same bit offset  			 */  			pr_warn("prog '%s': relo #%d: field offset ambiguity: %u != %u\n", -				prog_name, relo_idx, cand_spec.bit_offset, -				targ_spec.bit_offset); +				prog_name, relo_idx, cand_spec->bit_offset, +				targ_spec->bit_offset);  			return -EINVAL;  		} else if (cand_res.poison != targ_res.poison || cand_res.new_val != targ_res.new_val) {  			/* all candidates should result in the same relocation @@ -1251,7 +1288,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  	}  	/* -	 * For BPF_FIELD_EXISTS relo or when used BPF program has field +	 * For BPF_CORE_FIELD_EXISTS relo or when used BPF program has field  	 * existence checks or kernel version/config checks, it's expected  	 * that we might not find any candidates. In this case, if field  	 * wasn't found in any candidate, the list of candidates shouldn't @@ -1277,7 +1314,7 @@ int bpf_core_apply_relo_insn(const char *prog_name, struct bpf_insn *insn,  			 prog_name, relo_idx);  		/* calculate single target relo result explicitly */ -		err = bpf_core_calc_relo(prog_name, relo, relo_idx, &local_spec, NULL, &targ_res); +		err = bpf_core_calc_relo(prog_name, relo, relo_idx, local_spec, NULL, &targ_res);  		if (err)  			return err;  	} diff --git a/tools/lib/bpf/relo_core.h b/tools/lib/bpf/relo_core.h index 3b9f8f18346c..17799819ad7c 100644 --- a/tools/lib/bpf/relo_core.h +++ b/tools/lib/bpf/relo_core.h @@ -4,81 +4,10 @@  #ifndef __RELO_CORE_H  #define __RELO_CORE_H -/* bpf_core_relo_kind encodes which aspect of captured field/type/enum value - * has to be adjusted by relocations. - */ -enum bpf_core_relo_kind { -	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ -	BPF_FIELD_BYTE_SIZE = 1,	/* field size in bytes */ -	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ -	BPF_FIELD_SIGNED = 3,		/* field signedness (0 - unsigned, 1 - signed) */ -	BPF_FIELD_LSHIFT_U64 = 4,	/* bitfield-specific left bitshift */ -	BPF_FIELD_RSHIFT_U64 = 5,	/* bitfield-specific right bitshift */ -	BPF_TYPE_ID_LOCAL = 6,		/* type ID in local BPF object */ -	BPF_TYPE_ID_TARGET = 7,		/* type ID in target kernel */ -	BPF_TYPE_EXISTS = 8,		/* type existence in target kernel */ -	BPF_TYPE_SIZE = 9,		/* type size in bytes */ -	BPF_ENUMVAL_EXISTS = 10,	/* enum value existence in target kernel */ -	BPF_ENUMVAL_VALUE = 11,		/* enum value integer value */ -}; - -/* The minimum bpf_core_relo checked by the loader - * - * CO-RE relocation captures the following data: - * - insn_off - instruction offset (in bytes) within a BPF program that needs - *   its insn->imm field to be relocated with actual field info; - * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - *   type or field; - * - access_str_off - offset into corresponding .BTF string section. String - *   interpretation depends on specific relocation kind: - *     - for field-based relocations, string encodes an accessed field using - *     a sequence of field and array indices, separated by colon (:). It's - *     conceptually very close to LLVM's getelementptr ([0]) instruction's - *     arguments for identifying offset to a field. - *     - for type-based relocations, strings is expected to be just "0"; - *     - for enum value-based relocations, string contains an index of enum - *     value within its enum type; - * - * Example to provide a better feel. - * - *   struct sample { - *       int a; - *       struct { - *           int b[10]; - *       }; - *   }; - * - *   struct sample *s = ...; - *   int x = &s->a;     // encoded as "0:0" (a is field #0) - *   int y = &s->b[5];  // encoded as "0:1:0:5" (anon struct is field #1, - *                      // b is field #0 inside anon struct, accessing elem #5) - *   int z = &s[10]->b; // encoded as "10:1" (ptr is used as an array) - * - * type_id for all relocs in this example  will capture BTF type id of - * `struct sample`. - * - * Such relocation is emitted when using __builtin_preserve_access_index() - * Clang built-in, passing expression that captures field address, e.g.: - * - * bpf_probe_read(&dst, sizeof(dst), - *		  __builtin_preserve_access_index(&src->a.b.c)); - * - * In this case Clang will emit field relocation recording necessary data to - * be able to find offset of embedded `a.b.c` field within `src` struct. - * - *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction - */ -struct bpf_core_relo { -	__u32   insn_off; -	__u32   type_id; -	__u32   access_str_off; -	enum bpf_core_relo_kind kind; -}; +#include <linux/bpf.h>  struct bpf_core_cand {  	const struct btf *btf; -	const struct btf_type *t; -	const char *name;  	__u32 id;  }; @@ -88,11 +17,39 @@ struct bpf_core_cand_list {  	int len;  }; +#define BPF_CORE_SPEC_MAX_LEN 64 + +/* represents BPF CO-RE field or array element accessor */ +struct bpf_core_accessor { +	__u32 type_id;		/* struct/union type or array element type */ +	__u32 idx;		/* field index or array index */ +	const char *name;	/* field name or NULL for array accessor */ +}; + +struct bpf_core_spec { +	const struct btf *btf; +	/* high-level spec: named fields and array indices only */ +	struct bpf_core_accessor spec[BPF_CORE_SPEC_MAX_LEN]; +	/* original unresolved (no skip_mods_or_typedefs) root type ID */ +	__u32 root_type_id; +	/* CO-RE relocation kind */ +	enum bpf_core_relo_kind relo_kind; +	/* high-level spec length */ +	int len; +	/* raw, low-level spec: 1-to-1 with accessor spec string */ +	int raw_spec[BPF_CORE_SPEC_MAX_LEN]; +	/* raw spec length */ +	int raw_len; +	/* field bit offset represented by spec */ +	__u32 bit_offset; +}; +  int bpf_core_apply_relo_insn(const char *prog_name,  			     struct bpf_insn *insn, int insn_idx,  			     const struct bpf_core_relo *relo, int relo_idx,  			     const struct btf *local_btf, -			     struct bpf_core_cand_list *cands); +			     struct bpf_core_cand_list *cands, +			     struct bpf_core_spec *specs_scratch);  int bpf_core_types_are_compat(const struct btf *local_btf, __u32 local_id,  			      const struct btf *targ_btf, __u32 targ_id); diff --git a/tools/lib/bpf/skel_internal.h b/tools/lib/bpf/skel_internal.h index 9cf66702fa8d..0b84d8e6b72a 100644 --- a/tools/lib/bpf/skel_internal.h +++ b/tools/lib/bpf/skel_internal.h @@ -7,6 +7,16 @@  #include <sys/syscall.h>  #include <sys/mman.h> +#ifndef __NR_bpf +# if defined(__mips__) && defined(_ABIO32) +#  define __NR_bpf 4355 +# elif defined(__mips__) && defined(_ABIN32) +#  define __NR_bpf 6319 +# elif defined(__mips__) && defined(_ABI64) +#  define __NR_bpf 5315 +# endif +#endif +  /* This file is a base header for auto-generated *.lskel.h files.   * Its contents will change and may become part of auto-generation in the future.   * @@ -65,8 +75,7 @@ static inline int bpf_load_and_run(struct bpf_load_and_run_opts *opts)  	int map_fd = -1, prog_fd = -1, key = 0, err;  	union bpf_attr attr; -	map_fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, -				     opts->data_sz, 1, 0); +	map_fd = bpf_map_create(BPF_MAP_TYPE_ARRAY, "__loader.map", 4, opts->data_sz, 1, NULL);  	if (map_fd < 0) {  		opts->errstr = "failed to create loader map";  		err = -errno; diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index 81f8fbc85e70..edafe56664f3 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -35,6 +35,11 @@  #include "libbpf_internal.h"  #include "xsk.h" +/* entire xsk.h and xsk.c is going away in libbpf 1.0, so ignore all internal + * uses of deprecated APIs + */ +#pragma GCC diagnostic ignored "-Wdeprecated-declarations" +  #ifndef SOL_XDP   #define SOL_XDP 283  #endif @@ -364,8 +369,6 @@ int xsk_umem__create_v0_0_2(struct xsk_umem **umem_ptr, void *umem_area,  static enum xsk_prog get_xsk_prog(void)  {  	enum xsk_prog detected = XSK_PROG_FALLBACK; -	struct bpf_load_program_attr prog_attr; -	struct bpf_create_map_attr map_attr;  	__u32 size_out, retval, duration;  	char data_in = 0, data_out;  	struct bpf_insn insns[] = { @@ -375,27 +378,15 @@ static enum xsk_prog get_xsk_prog(void)  		BPF_EMIT_CALL(BPF_FUNC_redirect_map),  		BPF_EXIT_INSN(),  	}; -	int prog_fd, map_fd, ret; - -	memset(&map_attr, 0, sizeof(map_attr)); -	map_attr.map_type = BPF_MAP_TYPE_XSKMAP; -	map_attr.key_size = sizeof(int); -	map_attr.value_size = sizeof(int); -	map_attr.max_entries = 1; +	int prog_fd, map_fd, ret, insn_cnt = ARRAY_SIZE(insns); -	map_fd = bpf_create_map_xattr(&map_attr); +	map_fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, NULL, sizeof(int), sizeof(int), 1, NULL);  	if (map_fd < 0)  		return detected;  	insns[0].imm = map_fd; -	memset(&prog_attr, 0, sizeof(prog_attr)); -	prog_attr.prog_type = BPF_PROG_TYPE_XDP; -	prog_attr.insns = insns; -	prog_attr.insns_cnt = ARRAY_SIZE(insns); -	prog_attr.license = "GPL"; - -	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); +	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);  	if (prog_fd < 0) {  		close(map_fd);  		return detected; @@ -495,10 +486,13 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  	};  	struct bpf_insn *progs[] = {prog, prog_redirect_flags};  	enum xsk_prog option = get_xsk_prog(); +	LIBBPF_OPTS(bpf_prog_load_opts, opts, +		.log_buf = log_buf, +		.log_size = log_buf_size, +	); -	prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, progs[option], insns_cnt[option], -				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf, -				   log_buf_size); +	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "LGPL-2.1 or BSD-2-Clause", +				progs[option], insns_cnt[option], &opts);  	if (prog_fd < 0) {  		pr_warn("BPF log buffer:\n%s", log_buf);  		return prog_fd; @@ -554,8 +548,7 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)  		return -errno;  	ifr.ifr_data = (void *)&channels; -	memcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ - 1); -	ifr.ifr_name[IFNAMSIZ - 1] = '\0'; +	libbpf_strlcpy(ifr.ifr_name, ctx->ifname, IFNAMSIZ);  	err = ioctl(fd, SIOCETHTOOL, &ifr);  	if (err && errno != EOPNOTSUPP) {  		ret = -errno; @@ -590,8 +583,8 @@ static int xsk_create_bpf_maps(struct xsk_socket *xsk)  	if (max_queues < 0)  		return max_queues; -	fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", -				 sizeof(int), sizeof(int), max_queues, 0); +	fd = bpf_map_create(BPF_MAP_TYPE_XSKMAP, "xsks_map", +			    sizeof(int), sizeof(int), max_queues, NULL);  	if (fd < 0)  		return fd; @@ -725,14 +718,12 @@ static int xsk_link_lookup(int ifindex, __u32 *prog_id, int *link_fd)  static bool xsk_probe_bpf_link(void)  { -	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts, -			    .flags = XDP_FLAGS_SKB_MODE); -	struct bpf_load_program_attr prog_attr; +	LIBBPF_OPTS(bpf_link_create_opts, opts, .flags = XDP_FLAGS_SKB_MODE);  	struct bpf_insn insns[2] = {  		BPF_MOV64_IMM(BPF_REG_0, XDP_PASS),  		BPF_EXIT_INSN()  	}; -	int prog_fd, link_fd = -1; +	int prog_fd, link_fd = -1, insn_cnt = ARRAY_SIZE(insns);  	int ifindex_lo = 1;  	bool ret = false;  	int err; @@ -744,13 +735,7 @@ static bool xsk_probe_bpf_link(void)  	if (link_fd >= 0)  		return true; -	memset(&prog_attr, 0, sizeof(prog_attr)); -	prog_attr.prog_type = BPF_PROG_TYPE_XDP; -	prog_attr.insns = insns; -	prog_attr.insns_cnt = ARRAY_SIZE(insns); -	prog_attr.license = "GPL"; - -	prog_fd = bpf_load_program_xattr(&prog_attr, NULL, 0); +	prog_fd = bpf_prog_load(BPF_PROG_TYPE_XDP, NULL, "GPL", insns, insn_cnt, NULL);  	if (prog_fd < 0)  		return ret; @@ -782,8 +767,7 @@ static int xsk_create_xsk_struct(int ifindex, struct xsk_socket *xsk)  	}  	ctx->ifindex = ifindex; -	memcpy(ctx->ifname, ifname, IFNAMSIZ -1); -	ctx->ifname[IFNAMSIZ - 1] = 0; +	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);  	xsk->ctx = ctx;  	xsk->ctx->has_bpf_link = xsk_probe_bpf_link(); @@ -965,8 +949,7 @@ static struct xsk_ctx *xsk_create_ctx(struct xsk_socket *xsk,  	ctx->refcount = 1;  	ctx->umem = umem;  	ctx->queue_id = queue_id; -	memcpy(ctx->ifname, ifname, IFNAMSIZ - 1); -	ctx->ifname[IFNAMSIZ - 1] = '\0'; +	libbpf_strlcpy(ctx->ifname, ifname, IFNAMSIZ);  	ctx->fill = fill;  	ctx->comp = comp; diff --git a/tools/lib/perf/Documentation/libperf.txt b/tools/lib/perf/Documentation/libperf.txt index 63ae5e0195ce..32c5051c24eb 100644 --- a/tools/lib/perf/Documentation/libperf.txt +++ b/tools/lib/perf/Documentation/libperf.txt @@ -48,6 +48,7 @@ SYNOPSIS    int perf_cpu_map__nr(const struct perf_cpu_map *cpus);    bool perf_cpu_map__empty(const struct perf_cpu_map *map);    int perf_cpu_map__max(struct perf_cpu_map *map); +  bool perf_cpu_map__has(const struct perf_cpu_map *map, int cpu);    #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)  -- @@ -135,16 +136,16 @@ SYNOPSIS    int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,                         struct perf_thread_map *threads);    void perf_evsel__close(struct perf_evsel *evsel); -  void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +  void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);    int perf_evsel__mmap(struct perf_evsel *evsel, int pages);    void perf_evsel__munmap(struct perf_evsel *evsel); -  void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -  int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +  void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +  int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,                         struct perf_counts_values *count);    int perf_evsel__enable(struct perf_evsel *evsel); -  int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +  int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);    int perf_evsel__disable(struct perf_evsel *evsel); -  int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +  int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);    struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);    struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);    struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); diff --git a/tools/lib/perf/cpumap.c b/tools/lib/perf/cpumap.c index adaad3dddf6e..ee66760f1e63 100644 --- a/tools/lib/perf/cpumap.c +++ b/tools/lib/perf/cpumap.c @@ -10,15 +10,24 @@  #include <ctype.h>  #include <limits.h> -struct perf_cpu_map *perf_cpu_map__dummy_new(void) +static struct perf_cpu_map *perf_cpu_map__alloc(int nr_cpus)  { -	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(int)); +	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + sizeof(struct perf_cpu) * nr_cpus);  	if (cpus != NULL) { -		cpus->nr = 1; -		cpus->map[0] = -1; +		cpus->nr = nr_cpus;  		refcount_set(&cpus->refcnt, 1); +  	} +	return cpus; +} + +struct perf_cpu_map *perf_cpu_map__dummy_new(void) +{ +	struct perf_cpu_map *cpus = perf_cpu_map__alloc(1); + +	if (cpus) +		cpus->map[0].cpu = -1;  	return cpus;  } @@ -54,15 +63,12 @@ static struct perf_cpu_map *cpu_map__default_new(void)  	if (nr_cpus < 0)  		return NULL; -	cpus = malloc(sizeof(*cpus) + nr_cpus * sizeof(int)); +	cpus = perf_cpu_map__alloc(nr_cpus);  	if (cpus != NULL) {  		int i;  		for (i = 0; i < nr_cpus; ++i) -			cpus->map[i] = i; - -		cpus->nr = nr_cpus; -		refcount_set(&cpus->refcnt, 1); +			cpus->map[i].cpu = i;  	}  	return cpus; @@ -73,31 +79,32 @@ struct perf_cpu_map *perf_cpu_map__default_new(void)  	return cpu_map__default_new();  } -static int cmp_int(const void *a, const void *b) + +static int cmp_cpu(const void *a, const void *b)  { -	return *(const int *)a - *(const int*)b; +	const struct perf_cpu *cpu_a = a, *cpu_b = b; + +	return cpu_a->cpu - cpu_b->cpu;  } -static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) +static struct perf_cpu_map *cpu_map__trim_new(int nr_cpus, const struct perf_cpu *tmp_cpus)  { -	size_t payload_size = nr_cpus * sizeof(int); -	struct perf_cpu_map *cpus = malloc(sizeof(*cpus) + payload_size); +	size_t payload_size = nr_cpus * sizeof(struct perf_cpu); +	struct perf_cpu_map *cpus = perf_cpu_map__alloc(nr_cpus);  	int i, j;  	if (cpus != NULL) {  		memcpy(cpus->map, tmp_cpus, payload_size); -		qsort(cpus->map, nr_cpus, sizeof(int), cmp_int); +		qsort(cpus->map, nr_cpus, sizeof(struct perf_cpu), cmp_cpu);  		/* Remove dups */  		j = 0;  		for (i = 0; i < nr_cpus; i++) { -			if (i == 0 || cpus->map[i] != cpus->map[i - 1]) -				cpus->map[j++] = cpus->map[i]; +			if (i == 0 || cpus->map[i].cpu != cpus->map[i - 1].cpu) +				cpus->map[j++].cpu = cpus->map[i].cpu;  		}  		cpus->nr = j;  		assert(j <= nr_cpus); -		refcount_set(&cpus->refcnt, 1);  	} -  	return cpus;  } @@ -105,7 +112,7 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)  {  	struct perf_cpu_map *cpus = NULL;  	int nr_cpus = 0; -	int *tmp_cpus = NULL, *tmp; +	struct perf_cpu *tmp_cpus = NULL, *tmp;  	int max_entries = 0;  	int n, cpu, prev;  	char sep; @@ -124,24 +131,24 @@ struct perf_cpu_map *perf_cpu_map__read(FILE *file)  			if (new_max >= max_entries) {  				max_entries = new_max + MAX_NR_CPUS / 2; -				tmp = realloc(tmp_cpus, max_entries * sizeof(int)); +				tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));  				if (tmp == NULL)  					goto out_free_tmp;  				tmp_cpus = tmp;  			}  			while (++prev < cpu) -				tmp_cpus[nr_cpus++] = prev; +				tmp_cpus[nr_cpus++].cpu = prev;  		}  		if (nr_cpus == max_entries) {  			max_entries += MAX_NR_CPUS; -			tmp = realloc(tmp_cpus, max_entries * sizeof(int)); +			tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));  			if (tmp == NULL)  				goto out_free_tmp;  			tmp_cpus = tmp;  		} -		tmp_cpus[nr_cpus++] = cpu; +		tmp_cpus[nr_cpus++].cpu = cpu;  		if (n == 2 && sep == '-')  			prev = cpu;  		else @@ -179,7 +186,7 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)  	unsigned long start_cpu, end_cpu = 0;  	char *p = NULL;  	int i, nr_cpus = 0; -	int *tmp_cpus = NULL, *tmp; +	struct perf_cpu *tmp_cpus = NULL, *tmp;  	int max_entries = 0;  	if (!cpu_list) @@ -220,17 +227,17 @@ struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list)  		for (; start_cpu <= end_cpu; start_cpu++) {  			/* check for duplicates */  			for (i = 0; i < nr_cpus; i++) -				if (tmp_cpus[i] == (int)start_cpu) +				if (tmp_cpus[i].cpu == (int)start_cpu)  					goto invalid;  			if (nr_cpus == max_entries) {  				max_entries += MAX_NR_CPUS; -				tmp = realloc(tmp_cpus, max_entries * sizeof(int)); +				tmp = realloc(tmp_cpus, max_entries * sizeof(struct perf_cpu));  				if (tmp == NULL)  					goto invalid;  				tmp_cpus = tmp;  			} -			tmp_cpus[nr_cpus++] = (int)start_cpu; +			tmp_cpus[nr_cpus++].cpu = (int)start_cpu;  		}  		if (*p)  			++p; @@ -250,12 +257,16 @@ out:  	return cpus;  } -int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx) +struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx)  { +	struct perf_cpu result = { +		.cpu = -1 +	}; +  	if (cpus && idx < cpus->nr)  		return cpus->map[idx]; -	return -1; +	return result;  }  int perf_cpu_map__nr(const struct perf_cpu_map *cpus) @@ -265,21 +276,26 @@ int perf_cpu_map__nr(const struct perf_cpu_map *cpus)  bool perf_cpu_map__empty(const struct perf_cpu_map *map)  { -	return map ? map->map[0] == -1 : true; +	return map ? map->map[0].cpu == -1 : true;  } -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu) +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu)  { -	int low = 0, high = cpus->nr; +	int low, high; +	if (!cpus) +		return -1; + +	low = 0; +	high = cpus->nr;  	while (low < high) { -		int idx = (low + high) / 2, -		    cpu_at_idx = cpus->map[idx]; +		int idx = (low + high) / 2; +		struct perf_cpu cpu_at_idx = cpus->map[idx]; -		if (cpu_at_idx == cpu) +		if (cpu_at_idx.cpu == cpu.cpu)  			return idx; -		if (cpu_at_idx > cpu) +		if (cpu_at_idx.cpu > cpu.cpu)  			high = idx;  		else  			low = idx + 1; @@ -288,10 +304,19 @@ int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu)  	return -1;  } -int perf_cpu_map__max(struct perf_cpu_map *map) +bool perf_cpu_map__has(const struct perf_cpu_map *cpus, struct perf_cpu cpu)  { +	return perf_cpu_map__idx(cpus, cpu) != -1; +} + +struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map) +{ +	struct perf_cpu result = { +		.cpu = -1 +	}; +  	// cpu_map__trim_new() qsort()s it, cpu_map__default_new() sorts it as well. -	return map->nr > 0 ? map->map[map->nr - 1] : -1; +	return map->nr > 0 ? map->map[map->nr - 1] : result;  }  /* @@ -305,7 +330,7 @@ int perf_cpu_map__max(struct perf_cpu_map *map)  struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  					 struct perf_cpu_map *other)  { -	int *tmp_cpus; +	struct perf_cpu *tmp_cpus;  	int tmp_len;  	int i, j, k;  	struct perf_cpu_map *merged; @@ -319,19 +344,19 @@ struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  	if (!other)  		return orig;  	if (orig->nr == other->nr && -	    !memcmp(orig->map, other->map, orig->nr * sizeof(int))) +	    !memcmp(orig->map, other->map, orig->nr * sizeof(struct perf_cpu)))  		return orig;  	tmp_len = orig->nr + other->nr; -	tmp_cpus = malloc(tmp_len * sizeof(int)); +	tmp_cpus = malloc(tmp_len * sizeof(struct perf_cpu));  	if (!tmp_cpus)  		return NULL;  	/* Standard merge algorithm from wikipedia */  	i = j = k = 0;  	while (i < orig->nr && j < other->nr) { -		if (orig->map[i] <= other->map[j]) { -			if (orig->map[i] == other->map[j]) +		if (orig->map[i].cpu <= other->map[j].cpu) { +			if (orig->map[i].cpu == other->map[j].cpu)  				j++;  			tmp_cpus[k++] = orig->map[i++];  		} else diff --git a/tools/lib/perf/evlist.c b/tools/lib/perf/evlist.c index e37dfad31383..9a770bfdc804 100644 --- a/tools/lib/perf/evlist.c +++ b/tools/lib/perf/evlist.c @@ -407,7 +407,7 @@ perf_evlist__mmap_cb_get(struct perf_evlist *evlist, bool overwrite, int idx)  static int  perf_evlist__mmap_cb_mmap(struct perf_mmap *map, struct perf_mmap_param *mp, -			  int output, int cpu) +			  int output, struct perf_cpu cpu)  {  	return perf_mmap__mmap(map, mp, output, cpu);  } @@ -426,7 +426,7 @@ mmap_per_evsel(struct perf_evlist *evlist, struct perf_evlist_mmap_ops *ops,  	       int idx, struct perf_mmap_param *mp, int cpu_idx,  	       int thread, int *_output, int *_output_overwrite)  { -	int evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx); +	struct perf_cpu evlist_cpu = perf_cpu_map__cpu(evlist->cpus, cpu_idx);  	struct perf_evsel *evsel;  	int revent; @@ -643,14 +643,14 @@ perf_evlist__next_mmap(struct perf_evlist *evlist, struct perf_mmap *map,  	return overwrite ? evlist->mmap_ovw_first : evlist->mmap_first;  } -void __perf_evlist__set_leader(struct list_head *list) +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader)  { -	struct perf_evsel *evsel, *leader; +	struct perf_evsel *first, *last, *evsel; -	leader = list_entry(list->next, struct perf_evsel, node); -	evsel = list_entry(list->prev, struct perf_evsel, node); +	first = list_first_entry(list, struct perf_evsel, node); +	last = list_last_entry(list, struct perf_evsel, node); -	leader->nr_members = evsel->idx - leader->idx + 1; +	leader->nr_members = last->idx - first->idx + 1;  	__perf_evlist__for_each_entry(list, evsel)  		evsel->leader = leader; @@ -659,7 +659,10 @@ void __perf_evlist__set_leader(struct list_head *list)  void perf_evlist__set_leader(struct perf_evlist *evlist)  {  	if (evlist->nr_entries) { +		struct perf_evsel *first = list_entry(evlist->entries.next, +						struct perf_evsel, node); +  		evlist->nr_groups = evlist->nr_entries > 1 ? 1 : 0; -		__perf_evlist__set_leader(&evlist->entries); +		__perf_evlist__set_leader(&evlist->entries, first);  	}  } diff --git a/tools/lib/perf/evsel.c b/tools/lib/perf/evsel.c index 8441e3e1aaac..7ea86a44eae5 100644 --- a/tools/lib/perf/evsel.c +++ b/tools/lib/perf/evsel.c @@ -43,18 +43,22 @@ void perf_evsel__delete(struct perf_evsel *evsel)  	free(evsel);  } -#define FD(e, x, y) ((int *) xyarray__entry(e->fd, x, y)) -#define MMAP(e, x, y) (e->mmap ? ((struct perf_mmap *) xyarray__entry(e->mmap, x, y)) : NULL) +#define FD(_evsel, _cpu_map_idx, _thread)				\ +	((int *)xyarray__entry(_evsel->fd, _cpu_map_idx, _thread)) +#define MMAP(_evsel, _cpu_map_idx, _thread)				\ +	(_evsel->mmap ? ((struct perf_mmap *) xyarray__entry(_evsel->mmap, _cpu_map_idx, _thread)) \ +		      : NULL)  int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads)  {  	evsel->fd = xyarray__new(ncpus, nthreads, sizeof(int));  	if (evsel->fd) { -		int cpu, thread; -		for (cpu = 0; cpu < ncpus; cpu++) { +		int idx, thread; + +		for (idx = 0; idx < ncpus; idx++) {  			for (thread = 0; thread < nthreads; thread++) { -				int *fd = FD(evsel, cpu, thread); +				int *fd = FD(evsel, idx, thread);  				if (fd)  					*fd = -1; @@ -74,13 +78,13 @@ static int perf_evsel__alloc_mmap(struct perf_evsel *evsel, int ncpus, int nthre  static int  sys_perf_event_open(struct perf_event_attr *attr, -		    pid_t pid, int cpu, int group_fd, +		    pid_t pid, struct perf_cpu cpu, int group_fd,  		    unsigned long flags)  { -	return syscall(__NR_perf_event_open, attr, pid, cpu, group_fd, flags); +	return syscall(__NR_perf_event_open, attr, pid, cpu.cpu, group_fd, flags);  } -static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *group_fd) +static int get_group_fd(struct perf_evsel *evsel, int cpu_map_idx, int thread, int *group_fd)  {  	struct perf_evsel *leader = evsel->leader;  	int *fd; @@ -97,7 +101,7 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou  	if (!leader->fd)  		return -ENOTCONN; -	fd = FD(leader, cpu, thread); +	fd = FD(leader, cpu_map_idx, thread);  	if (fd == NULL || *fd == -1)  		return -EBADF; @@ -109,7 +113,8 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread, int *grou  int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,  		     struct perf_thread_map *threads)  { -	int cpu, thread, err = 0; +	struct perf_cpu cpu; +	int idx, thread, err = 0;  	if (cpus == NULL) {  		static struct perf_cpu_map *empty_cpu_map; @@ -139,21 +144,21 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,  	    perf_evsel__alloc_fd(evsel, cpus->nr, threads->nr) < 0)  		return -ENOMEM; -	for (cpu = 0; cpu < cpus->nr; cpu++) { +	perf_cpu_map__for_each_cpu(cpu, idx, cpus) {  		for (thread = 0; thread < threads->nr; thread++) {  			int fd, group_fd, *evsel_fd; -			evsel_fd = FD(evsel, cpu, thread); +			evsel_fd = FD(evsel, idx, thread);  			if (evsel_fd == NULL)  				return -EINVAL; -			err = get_group_fd(evsel, cpu, thread, &group_fd); +			err = get_group_fd(evsel, idx, thread, &group_fd);  			if (err < 0)  				return err;  			fd = sys_perf_event_open(&evsel->attr,  						 threads->map[thread].pid, -						 cpus->map[cpu], group_fd, 0); +						 cpu, group_fd, 0);  			if (fd < 0)  				return -errno; @@ -165,12 +170,12 @@ int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,  	return err;  } -static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu) +static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu_map_idx)  {  	int thread;  	for (thread = 0; thread < xyarray__max_y(evsel->fd); ++thread) { -		int *fd = FD(evsel, cpu, thread); +		int *fd = FD(evsel, cpu_map_idx, thread);  		if (fd && *fd >= 0) {  			close(*fd); @@ -181,10 +186,8 @@ static void perf_evsel__close_fd_cpu(struct perf_evsel *evsel, int cpu)  void perf_evsel__close_fd(struct perf_evsel *evsel)  { -	int cpu; - -	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) -		perf_evsel__close_fd_cpu(evsel, cpu); +	for (int idx = 0; idx < xyarray__max_x(evsel->fd); idx++) +		perf_evsel__close_fd_cpu(evsel, idx);  }  void perf_evsel__free_fd(struct perf_evsel *evsel) @@ -202,29 +205,29 @@ void perf_evsel__close(struct perf_evsel *evsel)  	perf_evsel__free_fd(evsel);  } -void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu) +void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx)  {  	if (evsel->fd == NULL)  		return; -	perf_evsel__close_fd_cpu(evsel, cpu); +	perf_evsel__close_fd_cpu(evsel, cpu_map_idx);  }  void perf_evsel__munmap(struct perf_evsel *evsel)  { -	int cpu, thread; +	int idx, thread;  	if (evsel->fd == NULL || evsel->mmap == NULL)  		return; -	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { +	for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {  		for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { -			int *fd = FD(evsel, cpu, thread); +			int *fd = FD(evsel, idx, thread);  			if (fd == NULL || *fd < 0)  				continue; -			perf_mmap__munmap(MMAP(evsel, cpu, thread)); +			perf_mmap__munmap(MMAP(evsel, idx, thread));  		}  	} @@ -234,7 +237,7 @@ void perf_evsel__munmap(struct perf_evsel *evsel)  int perf_evsel__mmap(struct perf_evsel *evsel, int pages)  { -	int ret, cpu, thread; +	int ret, idx, thread;  	struct perf_mmap_param mp = {  		.prot = PROT_READ | PROT_WRITE,  		.mask = (pages * page_size) - 1, @@ -246,15 +249,16 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)  	if (perf_evsel__alloc_mmap(evsel, xyarray__max_x(evsel->fd), xyarray__max_y(evsel->fd)) < 0)  		return -ENOMEM; -	for (cpu = 0; cpu < xyarray__max_x(evsel->fd); cpu++) { +	for (idx = 0; idx < xyarray__max_x(evsel->fd); idx++) {  		for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) { -			int *fd = FD(evsel, cpu, thread); +			int *fd = FD(evsel, idx, thread);  			struct perf_mmap *map; +			struct perf_cpu cpu = perf_cpu_map__cpu(evsel->cpus, idx);  			if (fd == NULL || *fd < 0)  				continue; -			map = MMAP(evsel, cpu, thread); +			map = MMAP(evsel, idx, thread);  			perf_mmap__init(map, NULL, false, NULL);  			ret = perf_mmap__mmap(map, &mp, *fd, cpu); @@ -268,14 +272,14 @@ int perf_evsel__mmap(struct perf_evsel *evsel, int pages)  	return 0;  } -void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread) +void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread)  { -	int *fd = FD(evsel, cpu, thread); +	int *fd = FD(evsel, cpu_map_idx, thread); -	if (fd == NULL || *fd < 0 || MMAP(evsel, cpu, thread) == NULL) +	if (fd == NULL || *fd < 0 || MMAP(evsel, cpu_map_idx, thread) == NULL)  		return NULL; -	return MMAP(evsel, cpu, thread)->base; +	return MMAP(evsel, cpu_map_idx, thread)->base;  }  int perf_evsel__read_size(struct perf_evsel *evsel) @@ -303,19 +307,19 @@ int perf_evsel__read_size(struct perf_evsel *evsel)  	return size;  } -int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,  		     struct perf_counts_values *count)  {  	size_t size = perf_evsel__read_size(evsel); -	int *fd = FD(evsel, cpu, thread); +	int *fd = FD(evsel, cpu_map_idx, thread);  	memset(count, 0, sizeof(*count));  	if (fd == NULL || *fd < 0)  		return -EINVAL; -	if (MMAP(evsel, cpu, thread) && -	    !perf_mmap__read_self(MMAP(evsel, cpu, thread), count)) +	if (MMAP(evsel, cpu_map_idx, thread) && +	    !perf_mmap__read_self(MMAP(evsel, cpu_map_idx, thread), count))  		return 0;  	if (readn(*fd, count->values, size) <= 0) @@ -326,13 +330,13 @@ int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread,  static int perf_evsel__run_ioctl(struct perf_evsel *evsel,  				 int ioc,  void *arg, -				 int cpu) +				 int cpu_map_idx)  {  	int thread;  	for (thread = 0; thread < xyarray__max_y(evsel->fd); thread++) {  		int err; -		int *fd = FD(evsel, cpu, thread); +		int *fd = FD(evsel, cpu_map_idx, thread);  		if (fd == NULL || *fd < 0)  			return -1; @@ -346,9 +350,9 @@ static int perf_evsel__run_ioctl(struct perf_evsel *evsel,  	return 0;  } -int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx)  { -	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu); +	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_ENABLE, NULL, cpu_map_idx);  }  int perf_evsel__enable(struct perf_evsel *evsel) @@ -361,9 +365,9 @@ int perf_evsel__enable(struct perf_evsel *evsel)  	return err;  } -int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu) +int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx)  { -	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu); +	return perf_evsel__run_ioctl(evsel, PERF_EVENT_IOC_DISABLE, NULL, cpu_map_idx);  }  int perf_evsel__disable(struct perf_evsel *evsel) @@ -431,3 +435,22 @@ void perf_evsel__free_id(struct perf_evsel *evsel)  	zfree(&evsel->id);  	evsel->ids = 0;  } + +void perf_counts_values__scale(struct perf_counts_values *count, +			       bool scale, __s8 *pscaled) +{ +	s8 scaled = 0; + +	if (scale) { +		if (count->run == 0) { +			scaled = -1; +			count->val = 0; +		} else if (count->run < count->ena) { +			scaled = 1; +			count->val = (u64)((double)count->val * count->ena / count->run); +		} +	} + +	if (pscaled) +		*pscaled = scaled; +} diff --git a/tools/lib/perf/include/internal/cpumap.h b/tools/lib/perf/include/internal/cpumap.h index 840d4032587b..581f9ffb4237 100644 --- a/tools/lib/perf/include/internal/cpumap.h +++ b/tools/lib/perf/include/internal/cpumap.h @@ -4,16 +4,30 @@  #include <linux/refcount.h> +/** A wrapper around a CPU to avoid confusion with the perf_cpu_map's map's indices. */ +struct perf_cpu { +	int cpu; +}; + +/** + * A sized, reference counted, sorted array of integers representing CPU + * numbers. This is commonly used to capture which CPUs a PMU is associated + * with. The indices into the cpumap are frequently used as they avoid having + * gaps if CPU numbers were used. For events associated with a pid, rather than + * a CPU, a single dummy map with an entry of -1 is used. + */  struct perf_cpu_map {  	refcount_t	refcnt; +	/** Length of the map array. */  	int		nr; -	int		map[]; +	/** The CPU values. */ +	struct perf_cpu	map[];  };  #ifndef MAX_NR_CPUS  #define MAX_NR_CPUS	2048  #endif -int perf_cpu_map__idx(struct perf_cpu_map *cpus, int cpu); +int perf_cpu_map__idx(const struct perf_cpu_map *cpus, struct perf_cpu cpu);  #endif /* __LIBPERF_INTERNAL_CPUMAP_H */ diff --git a/tools/lib/perf/include/internal/evlist.h b/tools/lib/perf/include/internal/evlist.h index f366dbad6a88..4cefade540bd 100644 --- a/tools/lib/perf/include/internal/evlist.h +++ b/tools/lib/perf/include/internal/evlist.h @@ -4,6 +4,7 @@  #include <linux/list.h>  #include <api/fd/array.h> +#include <internal/cpumap.h>  #include <internal/evsel.h>  #define PERF_EVLIST__HLIST_BITS 8 @@ -36,7 +37,7 @@ typedef void  typedef struct perf_mmap*  (*perf_evlist_mmap__cb_get_t)(struct perf_evlist*, bool, int);  typedef int -(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, int); +(*perf_evlist_mmap__cb_mmap_t)(struct perf_mmap*, struct perf_mmap_param*, int, struct perf_cpu);  struct perf_evlist_mmap_ops {  	perf_evlist_mmap__cb_idx_t	idx; @@ -127,5 +128,5 @@ int perf_evlist__id_add_fd(struct perf_evlist *evlist,  void perf_evlist__reset_id_hash(struct perf_evlist *evlist); -void __perf_evlist__set_leader(struct list_head *list); +void __perf_evlist__set_leader(struct list_head *list, struct perf_evsel *leader);  #endif /* __LIBPERF_INTERNAL_EVLIST_H */ diff --git a/tools/lib/perf/include/internal/evsel.h b/tools/lib/perf/include/internal/evsel.h index 1f3eacbad2e8..cfc9ebd7968e 100644 --- a/tools/lib/perf/include/internal/evsel.h +++ b/tools/lib/perf/include/internal/evsel.h @@ -6,8 +6,8 @@  #include <linux/perf_event.h>  #include <stdbool.h>  #include <sys/types.h> +#include <internal/cpumap.h> -struct perf_cpu_map;  struct perf_thread_map;  struct xyarray; @@ -27,7 +27,7 @@ struct perf_sample_id {  	* queue number.  	*/  	int			 idx; -	int			 cpu; +	struct perf_cpu		 cpu;  	pid_t			 tid;  	/* Holds total ID period value for PERF_SAMPLE_READ processing. */ diff --git a/tools/lib/perf/include/internal/mmap.h b/tools/lib/perf/include/internal/mmap.h index 5e3422f40ed5..5a062af8e9d8 100644 --- a/tools/lib/perf/include/internal/mmap.h +++ b/tools/lib/perf/include/internal/mmap.h @@ -6,6 +6,7 @@  #include <linux/refcount.h>  #include <linux/types.h>  #include <stdbool.h> +#include <internal/cpumap.h>  /* perf sample has 16 bits size limit */  #define PERF_SAMPLE_MAX_SIZE (1 << 16) @@ -24,7 +25,7 @@ struct perf_mmap {  	void			*base;  	int			 mask;  	int			 fd; -	int			 cpu; +	struct perf_cpu		 cpu;  	refcount_t		 refcnt;  	u64			 prev;  	u64			 start; @@ -46,7 +47,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map);  void perf_mmap__init(struct perf_mmap *map, struct perf_mmap *prev,  		     bool overwrite, libperf_unmap_cb_t unmap_cb);  int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, -		    int fd, int cpu); +		    int fd, struct perf_cpu cpu);  void perf_mmap__munmap(struct perf_mmap *map);  void perf_mmap__get(struct perf_mmap *map);  void perf_mmap__put(struct perf_mmap *map); diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 7c27766ea0bf..15b8faafd615 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,11 +3,10 @@  #define __LIBPERF_CPUMAP_H  #include <perf/core.h> +#include <perf/cpumap.h>  #include <stdio.h>  #include <stdbool.h> -struct perf_cpu_map; -  LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); @@ -16,10 +15,11 @@ LIBPERF_API struct perf_cpu_map *perf_cpu_map__get(struct perf_cpu_map *map);  LIBPERF_API struct perf_cpu_map *perf_cpu_map__merge(struct perf_cpu_map *orig,  						     struct perf_cpu_map *other);  LIBPERF_API void perf_cpu_map__put(struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx); +LIBPERF_API struct perf_cpu perf_cpu_map__cpu(const struct perf_cpu_map *cpus, int idx);  LIBPERF_API int perf_cpu_map__nr(const struct perf_cpu_map *cpus);  LIBPERF_API bool perf_cpu_map__empty(const struct perf_cpu_map *map); -LIBPERF_API int perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API struct perf_cpu perf_cpu_map__max(struct perf_cpu_map *map); +LIBPERF_API bool perf_cpu_map__has(const struct perf_cpu_map *map, struct perf_cpu cpu);  #define perf_cpu_map__for_each_cpu(cpu, idx, cpus)		\  	for ((idx) = 0, (cpu) = perf_cpu_map__cpu(cpus, idx);	\ diff --git a/tools/lib/perf/include/perf/evsel.h b/tools/lib/perf/include/perf/evsel.h index 60eae25076d3..2a9516b42d15 100644 --- a/tools/lib/perf/include/perf/evsel.h +++ b/tools/lib/perf/include/perf/evsel.h @@ -4,6 +4,8 @@  #include <stdint.h>  #include <perf/core.h> +#include <stdbool.h> +#include <linux/types.h>  struct perf_evsel;  struct perf_event_attr; @@ -26,18 +28,20 @@ LIBPERF_API void perf_evsel__delete(struct perf_evsel *evsel);  LIBPERF_API int perf_evsel__open(struct perf_evsel *evsel, struct perf_cpu_map *cpus,  				 struct perf_thread_map *threads);  LIBPERF_API void perf_evsel__close(struct perf_evsel *evsel); -LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API void perf_evsel__close_cpu(struct perf_evsel *evsel, int cpu_map_idx);  LIBPERF_API int perf_evsel__mmap(struct perf_evsel *evsel, int pages);  LIBPERF_API void perf_evsel__munmap(struct perf_evsel *evsel); -LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu, int thread); -LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, +LIBPERF_API void *perf_evsel__mmap_base(struct perf_evsel *evsel, int cpu_map_idx, int thread); +LIBPERF_API int perf_evsel__read(struct perf_evsel *evsel, int cpu_map_idx, int thread,  				 struct perf_counts_values *count);  LIBPERF_API int perf_evsel__enable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__enable_cpu(struct perf_evsel *evsel, int cpu_map_idx);  LIBPERF_API int perf_evsel__disable(struct perf_evsel *evsel); -LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu); +LIBPERF_API int perf_evsel__disable_cpu(struct perf_evsel *evsel, int cpu_map_idx);  LIBPERF_API struct perf_cpu_map *perf_evsel__cpus(struct perf_evsel *evsel);  LIBPERF_API struct perf_thread_map *perf_evsel__threads(struct perf_evsel *evsel);  LIBPERF_API struct perf_event_attr *perf_evsel__attr(struct perf_evsel *evsel); +LIBPERF_API void perf_counts_values__scale(struct perf_counts_values *count, +					   bool scale, __s8 *pscaled);  #endif /* __LIBPERF_EVSEL_H */ diff --git a/tools/lib/perf/libperf.map b/tools/lib/perf/libperf.map index 71468606e8a7..93696affda2e 100644 --- a/tools/lib/perf/libperf.map +++ b/tools/lib/perf/libperf.map @@ -10,6 +10,7 @@ LIBPERF_0.0.1 {  		perf_cpu_map__cpu;  		perf_cpu_map__empty;  		perf_cpu_map__max; +		perf_cpu_map__has;  		perf_thread_map__new_dummy;  		perf_thread_map__set_pid;  		perf_thread_map__comm; @@ -50,6 +51,7 @@ LIBPERF_0.0.1 {  		perf_mmap__read_init;  		perf_mmap__read_done;  		perf_mmap__read_event; +		perf_counts_values__scale;  	local:  		*;  }; diff --git a/tools/lib/perf/mmap.c b/tools/lib/perf/mmap.c index c89dfa5f67b3..f7ee07cb5818 100644 --- a/tools/lib/perf/mmap.c +++ b/tools/lib/perf/mmap.c @@ -32,7 +32,7 @@ size_t perf_mmap__mmap_len(struct perf_mmap *map)  }  int perf_mmap__mmap(struct perf_mmap *map, struct perf_mmap_param *mp, -		    int fd, int cpu) +		    int fd, struct perf_cpu cpu)  {  	map->prev = 0;  	map->mask = mp->mask; @@ -353,8 +353,6 @@ int perf_mmap__read_self(struct perf_mmap *map, struct perf_counts_values *count  		count->ena += delta;  		if (idx)  			count->run += delta; - -		cnt = mul_u64_u64_div64(cnt, count->ena, count->run);  	}  	count->val = cnt; diff --git a/tools/lib/perf/tests/test-evlist.c b/tools/lib/perf/tests/test-evlist.c index ce91a582f0e4..b3479dfa9a1c 100644 --- a/tools/lib/perf/tests/test-evlist.c +++ b/tools/lib/perf/tests/test-evlist.c @@ -21,6 +21,9 @@  #include "tests.h"  #include <internal/evsel.h> +#define EVENT_NUM 15 +#define WAIT_COUNT 100000000UL +  static int libperf_print(enum libperf_print_level level,  			 const char *fmt, va_list ap)  { @@ -331,7 +334,8 @@ static int test_mmap_cpus(void)  	};  	cpu_set_t saved_mask;  	char path[PATH_MAX]; -	int id, err, cpu, tmp; +	int id, err, tmp; +	struct perf_cpu cpu;  	union perf_event *event;  	int count = 0; @@ -374,7 +378,7 @@ static int test_mmap_cpus(void)  		cpu_set_t mask;  		CPU_ZERO(&mask); -		CPU_SET(cpu, &mask); +		CPU_SET(cpu.cpu, &mask);  		err = sched_setaffinity(0, sizeof(mask), &mask);  		__T("sched_setaffinity failed", err == 0); @@ -413,6 +417,159 @@ static int test_mmap_cpus(void)  	return 0;  } +static double display_error(long long average, +			    long long high, +			    long long low, +			    long long expected) +{ +	double error; + +	error = (((double)average - expected) / expected) * 100.0; + +	__T_VERBOSE("   Expected: %lld\n", expected); +	__T_VERBOSE("   High: %lld   Low:  %lld   Average:  %lld\n", +		    high, low, average); + +	__T_VERBOSE("   Average Error = %.2f%%\n", error); + +	return error; +} + +static int test_stat_multiplexing(void) +{ +	struct perf_counts_values expected_counts = { .val = 0 }; +	struct perf_counts_values counts[EVENT_NUM] = {{ .val = 0 },}; +	struct perf_thread_map *threads; +	struct perf_evlist *evlist; +	struct perf_evsel *evsel; +	struct perf_event_attr attr = { +		.type	     = PERF_TYPE_HARDWARE, +		.config	     = PERF_COUNT_HW_INSTRUCTIONS, +		.read_format = PERF_FORMAT_TOTAL_TIME_ENABLED | +			       PERF_FORMAT_TOTAL_TIME_RUNNING, +		.disabled    = 1, +	}; +	int err, i, nonzero = 0; +	unsigned long count; +	long long max = 0, min = 0, avg = 0; +	double error = 0.0; +	s8 scaled = 0; + +	/* read for non-multiplexing event count */ +	threads = perf_thread_map__new_dummy(); +	__T("failed to create threads", threads); + +	perf_thread_map__set_pid(threads, 0, 0); + +	evsel = perf_evsel__new(&attr); +	__T("failed to create evsel", evsel); + +	err = perf_evsel__open(evsel, NULL, threads); +	__T("failed to open evsel", err == 0); + +	err = perf_evsel__enable(evsel); +	__T("failed to enable evsel", err == 0); + +	/* wait loop */ +	count = WAIT_COUNT; +	while (count--) +		; + +	perf_evsel__read(evsel, 0, 0, &expected_counts); +	__T("failed to read value for evsel", expected_counts.val != 0); +	__T("failed to read non-multiplexing event count", +	    expected_counts.ena == expected_counts.run); + +	err = perf_evsel__disable(evsel); +	__T("failed to enable evsel", err == 0); + +	perf_evsel__close(evsel); +	perf_evsel__delete(evsel); + +	perf_thread_map__put(threads); + +	/* read for multiplexing event count */ +	threads = perf_thread_map__new_dummy(); +	__T("failed to create threads", threads); + +	perf_thread_map__set_pid(threads, 0, 0); + +	evlist = perf_evlist__new(); +	__T("failed to create evlist", evlist); + +	for (i = 0; i < EVENT_NUM; i++) { +		evsel = perf_evsel__new(&attr); +		__T("failed to create evsel", evsel); + +		perf_evlist__add(evlist, evsel); +	} +	perf_evlist__set_maps(evlist, NULL, threads); + +	err = perf_evlist__open(evlist); +	__T("failed to open evsel", err == 0); + +	perf_evlist__enable(evlist); + +	/* wait loop */ +	count = WAIT_COUNT; +	while (count--) +		; + +	i = 0; +	perf_evlist__for_each_evsel(evlist, evsel) { +		perf_evsel__read(evsel, 0, 0, &counts[i]); +		__T("failed to read value for evsel", counts[i].val != 0); +		i++; +	} + +	perf_evlist__disable(evlist); + +	min = counts[0].val; +	for (i = 0; i < EVENT_NUM; i++) { +		__T_VERBOSE("Event %2d -- Raw count = %lu, run = %lu, enable = %lu\n", +			    i, counts[i].val, counts[i].run, counts[i].ena); + +		perf_counts_values__scale(&counts[i], true, &scaled); +		if (scaled == 1) { +			__T_VERBOSE("\t Scaled count = %lu (%.2lf%%, %lu/%lu)\n", +				    counts[i].val, +				    (double)counts[i].run / (double)counts[i].ena * 100.0, +				    counts[i].run, counts[i].ena); +		} else if (scaled == -1) { +			__T_VERBOSE("\t Not Running\n"); +		} else { +			__T_VERBOSE("\t Not Scaling\n"); +		} + +		if (counts[i].val > max) +			max = counts[i].val; + +		if (counts[i].val < min) +			min = counts[i].val; + +		avg += counts[i].val; + +		if (counts[i].val != 0) +			nonzero++; +	} + +	if (nonzero != 0) +		avg = avg / nonzero; +	else +		avg = 0; + +	error = display_error(avg, max, min, expected_counts.val); + +	__T("Error out of range!", ((error <= 1.0) && (error >= -1.0))); + +	perf_evlist__close(evlist); +	perf_evlist__delete(evlist); + +	perf_thread_map__put(threads); + +	return 0; +} +  int test_evlist(int argc, char **argv)  {  	__T_START; @@ -424,6 +581,7 @@ int test_evlist(int argc, char **argv)  	test_stat_thread_enable();  	test_mmap_thread();  	test_mmap_cpus(); +	test_stat_multiplexing();  	__T_END;  	return tests_failed == 0 ? 0 : -1; diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index fe58843d047c..8e24c4c78c7f 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -1367,6 +1367,14 @@ static int field_is_dynamic(struct tep_format_field *field)  	return 0;  } +static int field_is_relative_dynamic(struct tep_format_field *field) +{ +	if (strncmp(field->type, "__rel_loc", 9) == 0) +		return 1; + +	return 0; +} +  static int field_is_long(struct tep_format_field *field)  {  	/* includes long long */ @@ -1622,6 +1630,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field **  			field->flags |= TEP_FIELD_IS_STRING;  		if (field_is_dynamic(field))  			field->flags |= TEP_FIELD_IS_DYNAMIC; +		if (field_is_relative_dynamic(field)) +			field->flags |= TEP_FIELD_IS_DYNAMIC | TEP_FIELD_IS_RELATIVE;  		if (field_is_long(field))  			field->flags |= TEP_FIELD_IS_LONG; @@ -2928,7 +2938,7 @@ process_str(struct tep_event *event __maybe_unused, struct tep_print_arg *arg,  	arg->type = TEP_PRINT_STRING;  	arg->string.string = token; -	arg->string.offset = -1; +	arg->string.field = NULL;  	if (read_expected(TEP_EVENT_DELIM, ")") < 0)  		goto out_err; @@ -2957,7 +2967,7 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar  	arg->type = TEP_PRINT_BITMASK;  	arg->bitmask.bitmask = token; -	arg->bitmask.offset = -1; +	arg->bitmask.field = NULL;  	if (read_expected(TEP_EVENT_DELIM, ")") < 0)  		goto out_err; @@ -3123,19 +3133,23 @@ process_function(struct tep_event *event, struct tep_print_arg *arg,  		free_token(token);  		return process_int_array(event, arg, tok);  	} -	if (strcmp(token, "__get_str") == 0) { +	if (strcmp(token, "__get_str") == 0 || +	    strcmp(token, "__get_rel_str") == 0) {  		free_token(token);  		return process_str(event, arg, tok);  	} -	if (strcmp(token, "__get_bitmask") == 0) { +	if (strcmp(token, "__get_bitmask") == 0 || +	    strcmp(token, "__get_rel_bitmask") == 0) {  		free_token(token);  		return process_bitmask(event, arg, tok);  	} -	if (strcmp(token, "__get_dynamic_array") == 0) { +	if (strcmp(token, "__get_dynamic_array") == 0 || +	    strcmp(token, "__get_rel_dynamic_array") == 0) {  		free_token(token);  		return process_dynamic_array(event, arg, tok);  	} -	if (strcmp(token, "__get_dynamic_array_len") == 0) { +	if (strcmp(token, "__get_dynamic_array_len") == 0 || +	    strcmp(token, "__get_rel_dynamic_array_len") == 0) {  		free_token(token);  		return process_dynamic_array_len(event, arg, tok);  	} @@ -4163,14 +4177,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  	case TEP_PRINT_STRING: {  		int str_offset; -		if (arg->string.offset == -1) { -			struct tep_format_field *f; +		if (!arg->string.field) +			arg->string.field = tep_find_any_field(event, arg->string.string); +		if (!arg->string.field) +			break; -			f = tep_find_any_field(event, arg->string.string); -			arg->string.offset = f->offset; -		} -		str_offset = data2host4(tep, *(unsigned int *)(data + arg->string.offset)); +		str_offset = data2host4(tep, +				*(unsigned int *)(data + arg->string.field->offset));  		str_offset &= 0xffff; +		if (arg->string.field->flags & TEP_FIELD_IS_RELATIVE) +			str_offset += arg->string.field->offset + arg->string.field->size;  		print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset);  		break;  	} @@ -4181,15 +4197,16 @@ static void print_str_arg(struct trace_seq *s, void *data, int size,  		int bitmask_offset;  		int bitmask_size; -		if (arg->bitmask.offset == -1) { -			struct tep_format_field *f; - -			f = tep_find_any_field(event, arg->bitmask.bitmask); -			arg->bitmask.offset = f->offset; -		} -		bitmask_offset = data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); +		if (!arg->bitmask.field) +			arg->bitmask.field = tep_find_any_field(event, arg->bitmask.bitmask); +		if (!arg->bitmask.field) +			break; +		bitmask_offset = data2host4(tep, +				*(unsigned int *)(data + arg->bitmask.field->offset));  		bitmask_size = bitmask_offset >> 16;  		bitmask_offset &= 0xffff; +		if (arg->bitmask.field->flags & TEP_FIELD_IS_RELATIVE) +			bitmask_offset += arg->bitmask.field->offset + arg->bitmask.field->size;  		print_bitmask_to_seq(tep, s, format, len_arg,  				     data + bitmask_offset, bitmask_size);  		break; @@ -5109,6 +5126,8 @@ void tep_print_field(struct trace_seq *s, void *data,  			offset = val;  			len = offset >> 16;  			offset &= 0xffff; +			if (field->flags & TEP_FIELD_IS_RELATIVE) +				offset += field->offset + field->size;  		}  		if (field->flags & TEP_FIELD_IS_STRING &&  		    is_printable_array(data + offset, len)) { @@ -6987,6 +7006,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event,  					 data + offset, field->size);  		*len = offset >> 16;  		offset &= 0xffff; +		if (field->flags & TEP_FIELD_IS_RELATIVE) +			offset += field->offset + field->size;  	} else  		*len = field->size; diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index a67ad9a5b835..41d4f9f6a843 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -125,6 +125,7 @@ enum tep_format_flags {  	TEP_FIELD_IS_LONG	= 32,  	TEP_FIELD_IS_FLAG	= 64,  	TEP_FIELD_IS_SYMBOLIC	= 128, +	TEP_FIELD_IS_RELATIVE	= 256,  };  struct tep_format_field { @@ -153,12 +154,12 @@ struct tep_print_arg_atom {  struct tep_print_arg_string {  	char			*string; -	int			offset; +	struct tep_format_field *field;  };  struct tep_print_arg_bitmask {  	char			*bitmask; -	int			offset; +	struct tep_format_field *field;  };  struct tep_print_arg_field { diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index 368826bb5a57..5df177070d53 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1712,8 +1712,11 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *  		if (arg->str.field->flags & TEP_FIELD_IS_DYNAMIC) {  			addr = *(unsigned int *)val; -			val = record->data + (addr & 0xffff);  			size = addr >> 16; +			addr &= 0xffff; +			if (arg->str.field->flags & TEP_FIELD_IS_RELATIVE) +				addr += arg->str.field->offset + arg->str.field->size; +			val = record->data + addr;  		}  		/* |