diff options
Diffstat (limited to 'tools/lib')
42 files changed, 5202 insertions, 1351 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index f81e549ddfdb..fecb78afea3f 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,2 +1,4 @@ libbpf_version.h FEATURE-DUMP.libbpf +test_libbpf +libbpf.so.* diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build index 197b40f5b5c6..ee9d5362f35b 100644 --- a/tools/lib/bpf/Build +++ b/tools/lib/bpf/Build @@ -1 +1 @@ -libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o +libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 34d9c3619c96..8e7c56e9590f 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -3,7 +3,7 @@ BPF_VERSION = 0 BPF_PATCHLEVEL = 0 -BPF_EXTRAVERSION = 1 +BPF_EXTRAVERSION = 2 MAKEFLAGS += --no-print-directory @@ -14,21 +14,6 @@ srctree := $(patsubst %/,%,$(dir $(srctree))) #$(info Determined 'srctree' to be $(srctree)) endif -# Makefiles suck: This macro sets a default value of $(2) for the -# variable named by $(1), unless the variable has been set by -# environment or command line. This is necessary for CC and AR -# because make sets default values, so the simpler ?= approach -# won't work as expected. -define allow-override - $(if $(or $(findstring environment,$(origin $(1))),\ - $(findstring command line,$(origin $(1)))),,\ - $(eval $(1) = $(2))) -endef - -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. -$(call allow-override,CC,$(CROSS_COMPILE)gcc) -$(call allow-override,AR,$(CROSS_COMPILE)ar) - INSTALL = install # Use DESTDIR for installing into a different root directory. @@ -54,7 +39,7 @@ man_dir_SQ = '$(subst ','\'',$(man_dir))' export man_dir man_dir_SQ INSTALL export DESTDIR DESTDIR_SQ -include ../../scripts/Makefile.include +include $(srctree)/tools/scripts/Makefile.include # copy a bit from Linux kbuild @@ -94,8 +79,6 @@ export prefix libdir src obj libdir_SQ = $(subst ','\'',$(libdir)) libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) -LIB_FILE = libbpf.a libbpf.so - VERSION = $(BPF_VERSION) PATCHLEVEL = $(BPF_PATCHLEVEL) EXTRAVERSION = $(BPF_EXTRAVERSION) @@ -103,7 +86,10 @@ EXTRAVERSION = $(BPF_EXTRAVERSION) OBJ = $@ N = -LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) +LIBBPF_VERSION = $(BPF_VERSION).$(BPF_PATCHLEVEL).$(BPF_EXTRAVERSION) + +LIB_TARGET = libbpf.a libbpf.so.$(LIBBPF_VERSION) +LIB_FILE = libbpf.a libbpf.so* # Set compile option CFLAGS ifdef EXTRA_CFLAGS @@ -143,16 +129,18 @@ all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include -BPF_IN := $(OUTPUT)libbpf-in.o -LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) -VERSION_SCRIPT := libbpf.map +BPF_IN := $(OUTPUT)libbpf-in.o +VERSION_SCRIPT := libbpf.map -GLOBAL_SYM_COUNT = $(shell readelf -s $(BPF_IN) | \ +LIB_TARGET := $(addprefix $(OUTPUT),$(LIB_TARGET)) +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) + +GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN) | \ awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {s++} END{print s}') -VERSIONED_SYM_COUNT = $(shell readelf -s $(OUTPUT)libbpf.so | \ +VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \ grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l) -CMD_TARGETS = $(LIB_FILE) +CMD_TARGETS = $(LIB_TARGET) CXX_TEST_TARGET = $(OUTPUT)test_libbpf @@ -162,7 +150,8 @@ endif TARGETS = $(CMD_TARGETS) -all: fixdep all_cmd +all: fixdep + $(Q)$(MAKE) all_cmd all_cmd: $(CMD_TARGETS) check @@ -179,17 +168,24 @@ $(BPF_IN): force elfdep bpfdep @(test -f ../../include/uapi/linux/if_link.h -a -f ../../../include/uapi/linux/if_link.h && ( \ (diff -B ../../include/uapi/linux/if_link.h ../../../include/uapi/linux/if_link.h >/dev/null) || \ echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_link.h' differs from latest version at 'include/uapi/linux/if_link.h'" >&2 )) || true + @(test -f ../../include/uapi/linux/if_xdp.h -a -f ../../../include/uapi/linux/if_xdp.h && ( \ + (diff -B ../../include/uapi/linux/if_xdp.h ../../../include/uapi/linux/if_xdp.h >/dev/null) || \ + echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true $(Q)$(MAKE) $(build)=libbpf -$(OUTPUT)libbpf.so: $(BPF_IN) - $(QUIET_LINK)$(CC) --shared -Wl,--version-script=$(VERSION_SCRIPT) \ - $^ -o $@ +$(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION) + +$(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN) + $(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(VERSION) \ + -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ + @ln -sf $(@F) $(OUTPUT)libbpf.so + @ln -sf $(@F) $(OUTPUT)libbpf.so.$(VERSION) $(OUTPUT)libbpf.a: $(BPF_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ $(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a - $(QUIET_LINK)$(CXX) $^ -lelf -o $@ + $(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ check: check_abi @@ -203,6 +199,12 @@ check_abi: $(OUTPUT)libbpf.so exit 1; \ fi +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + define do_install if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ @@ -211,14 +213,16 @@ define do_install endef install_lib: all_cmd - $(call QUIET_INSTALL, $(LIB_FILE)) \ - $(call do_install,$(LIB_FILE),$(libdir_SQ)) + $(call QUIET_INSTALL, $(LIB_TARGET)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) install_headers: $(call QUIET_INSTALL, headers) \ $(call do_install,bpf.h,$(prefix)/include/bpf,644); \ - $(call do_install,libbpf.h,$(prefix)/include/bpf,644); - $(call do_install,btf.h,$(prefix)/include/bpf,644); + $(call do_install,libbpf.h,$(prefix)/include/bpf,644); \ + $(call do_install,btf.h,$(prefix)/include/bpf,644); \ + $(call do_install,xsk.h,$(prefix)/include/bpf,644); install: install_lib @@ -230,7 +234,7 @@ config-clean: clean: $(call QUIET_CLEAN, libbpf) $(RM) $(TARGETS) $(CXX_TEST_TARGET) \ - *.o *~ *.a *.so .*.d .*.cmd LIBBPF-CFLAGS + *.o *~ *.a *.so *.so.$(VERSION) .*.d .*.cmd LIBBPF-CFLAGS $(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf diff --git a/tools/lib/bpf/README.rst b/tools/lib/bpf/README.rst index 056f38310722..cef7b77eab69 100644 --- a/tools/lib/bpf/README.rst +++ b/tools/lib/bpf/README.rst @@ -9,7 +9,7 @@ described here. It's recommended to follow these conventions whenever a new function or type is added to keep libbpf API clean and consistent. All types and functions provided by libbpf API should have one of the -following prefixes: ``bpf_``, ``btf_``, ``libbpf_``. +following prefixes: ``bpf_``, ``btf_``, ``libbpf_``, ``xsk_``. System call wrappers -------------------- @@ -62,6 +62,19 @@ Auxiliary functions and types that don't fit well in any of categories described above should have ``libbpf_`` prefix, e.g. ``libbpf_get_error`` or ``libbpf_prog_type_by_name``. +AF_XDP functions +------------------- + +AF_XDP functions should have an ``xsk_`` prefix, e.g. +``xsk_umem__get_data`` or ``xsk_umem__create``. The interface consists +of both low-level ring access functions and high-level configuration +functions. These can be mixed and matched. Note that these functions +are not reentrant for performance reasons. + +Please take a look at Documentation/networking/af_xdp.rst in the Linux +kernel source tree on how to use XDP sockets and for some common +mistakes in case you do not get any traffic up to user space. + libbpf ABI ========== @@ -98,6 +111,7 @@ starting from ``0.0.1``. Every time ABI is being changed, e.g. because a new symbol is added or semantic of existing symbol is changed, ABI version should be bumped. +This bump in ABI version is at most once per kernel development cycle. For example, if current state of ``libbpf.map`` is: @@ -132,6 +146,20 @@ For example, if current state of ``libbpf.map`` is: Format of version script and ways to handle ABI changes, including incompatible ones, described in details in [1]. +Stand-alone build +================= + +Under https://github.com/libbpf/libbpf there is a (semi-)automated +mirror of the mainline's version of libbpf for a stand-alone build. + +However, all changes to libbpf's code base must be upstreamed through +the mainline kernel tree. + +License +======= + +libbpf is dual-licensed under LGPL 2.1 and BSD 2-Clause. + Links ===== diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index 3caaa3428774..9cd015574e83 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -22,6 +22,7 @@ */ #include <stdlib.h> +#include <string.h> #include <memory.h> #include <unistd.h> #include <asm/unistd.h> @@ -65,6 +66,17 @@ static inline int sys_bpf(enum bpf_cmd cmd, union bpf_attr *attr, return syscall(__NR_bpf, cmd, attr, size); } +static inline int sys_bpf_prog_load(union bpf_attr *attr, unsigned int size) +{ + int fd; + + do { + fd = sys_bpf(BPF_PROG_LOAD, attr, size); + } while (fd < 0 && errno == EAGAIN); + + return fd; +} + int bpf_create_map_xattr(const struct bpf_create_map_attr *create_attr) { __u32 name_len = create_attr->name ? strlen(create_attr->name) : 0; @@ -203,23 +215,35 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, { void *finfo = NULL, *linfo = NULL; union bpf_attr attr; + __u32 log_level; __u32 name_len; int fd; - if (!load_attr) + if (!load_attr || !log_buf != !log_buf_sz) + return -EINVAL; + + log_level = load_attr->log_level; + if (log_level > 2 || (log_level && !log_buf)) return -EINVAL; name_len = load_attr->name ? strlen(load_attr->name) : 0; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = load_attr->prog_type; attr.expected_attach_type = load_attr->expected_attach_type; attr.insn_cnt = (__u32)load_attr->insns_cnt; attr.insns = ptr_to_u64(load_attr->insns); attr.license = ptr_to_u64(load_attr->license); - attr.log_buf = ptr_to_u64(NULL); - attr.log_size = 0; - attr.log_level = 0; + + attr.log_level = log_level; + if (log_level) { + attr.log_buf = ptr_to_u64(log_buf); + attr.log_size = log_buf_sz; + } else { + attr.log_buf = ptr_to_u64(NULL); + attr.log_size = 0; + } + attr.kern_version = load_attr->kern_version; attr.prog_ifindex = load_attr->prog_ifindex; attr.prog_btf_fd = load_attr->prog_btf_fd; @@ -232,7 +256,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, memcpy(attr.prog_name, load_attr->name, min(name_len, BPF_OBJ_NAME_LEN - 1)); - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) return fd; @@ -269,13 +293,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, break; } - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); if (fd >= 0) goto done; } - if (!log_buf || !log_buf_sz) + if (log_level || !log_buf) goto done; /* Try again with log */ @@ -283,7 +307,7 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr, attr.log_size = log_buf_sz; attr.log_level = 1; log_buf[0] = 0; - fd = sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + fd = sys_bpf_prog_load(&attr, sizeof(attr)); done: free(finfo); free(linfo); @@ -316,7 +340,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_type = type; attr.insn_cnt = (__u32)insns_cnt; attr.insns = ptr_to_u64(insns); @@ -328,7 +352,7 @@ int bpf_verify_program(enum bpf_prog_type type, const struct bpf_insn *insns, attr.kern_version = kern_version; attr.prog_flags = prog_flags; - return sys_bpf(BPF_PROG_LOAD, &attr, sizeof(attr)); + return sys_bpf_prog_load(&attr, sizeof(attr)); } int bpf_map_update_elem(int fd, const void *key, const void *value, @@ -336,7 +360,7 @@ int bpf_map_update_elem(int fd, const void *key, const void *value, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -349,10 +373,23 @@ int bpf_map_lookup_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); + attr.map_fd = fd; + attr.key = ptr_to_u64(key); + attr.value = ptr_to_u64(value); + + return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); +} + +int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, __u64 flags) +{ + union bpf_attr attr; + + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); + attr.flags = flags; return sys_bpf(BPF_MAP_LOOKUP_ELEM, &attr, sizeof(attr)); } @@ -361,7 +398,7 @@ int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.value = ptr_to_u64(value); @@ -373,7 +410,7 @@ int bpf_map_delete_elem(int fd, const void *key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); @@ -384,7 +421,7 @@ int bpf_map_get_next_key(int fd, const void *key, void *next_key) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_fd = fd; attr.key = ptr_to_u64(key); attr.next_key = ptr_to_u64(next_key); @@ -396,7 +433,7 @@ int bpf_obj_pin(int fd, const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); attr.bpf_fd = fd; @@ -407,7 +444,7 @@ int bpf_obj_get(const char *pathname) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.pathname = ptr_to_u64((void *)pathname); return sys_bpf(BPF_OBJ_GET, &attr, sizeof(attr)); @@ -418,7 +455,7 @@ int bpf_prog_attach(int prog_fd, int target_fd, enum bpf_attach_type type, { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -431,7 +468,7 @@ int bpf_prog_detach(int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_type = type; @@ -442,7 +479,7 @@ int bpf_prog_detach2(int prog_fd, int target_fd, enum bpf_attach_type type) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.target_fd = target_fd; attr.attach_bpf_fd = prog_fd; attr.attach_type = type; @@ -456,7 +493,7 @@ int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.query.target_fd = target_fd; attr.query.attach_type = type; attr.query.query_flags = query_flags; @@ -477,7 +514,7 @@ int bpf_prog_test_run(int prog_fd, int repeat, void *data, __u32 size, union bpf_attr attr; int ret; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = prog_fd; attr.test.data_in = ptr_to_u64(data); attr.test.data_out = ptr_to_u64(data_out); @@ -502,7 +539,7 @@ int bpf_prog_test_run_xattr(struct bpf_prog_test_run_attr *test_attr) if (!test_attr->data_out && test_attr->data_size_out > 0) return -EINVAL; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.test.prog_fd = test_attr->prog_fd; attr.test.data_in = ptr_to_u64(test_attr->data_in); attr.test.data_out = ptr_to_u64(test_attr->data_out); @@ -522,7 +559,7 @@ int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_PROG_GET_NEXT_ID, &attr, sizeof(attr)); @@ -537,7 +574,7 @@ int bpf_map_get_next_id(__u32 start_id, __u32 *next_id) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.start_id = start_id; err = sys_bpf(BPF_MAP_GET_NEXT_ID, &attr, sizeof(attr)); @@ -551,7 +588,7 @@ int bpf_prog_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.prog_id = id; return sys_bpf(BPF_PROG_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -561,7 +598,7 @@ int bpf_map_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.map_id = id; return sys_bpf(BPF_MAP_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -571,7 +608,7 @@ int bpf_btf_get_fd_by_id(__u32 id) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.btf_id = id; return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr)); @@ -582,7 +619,7 @@ int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len) union bpf_attr attr; int err; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.info.bpf_fd = prog_fd; attr.info.info_len = *info_len; attr.info.info = ptr_to_u64(info); @@ -598,7 +635,7 @@ int bpf_raw_tracepoint_open(const char *name, int prog_fd) { union bpf_attr attr; - bzero(&attr, sizeof(attr)); + memset(&attr, 0, sizeof(attr)); attr.raw_tracepoint.name = ptr_to_u64(name); attr.raw_tracepoint.prog_fd = prog_fd; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 8f09de482839..6ffdd79bea89 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -85,6 +85,7 @@ struct bpf_load_program_attr { __u32 line_info_rec_size; const void *line_info; __u32 line_info_cnt; + __u32 log_level; }; /* Flags to direct loading requirements */ @@ -110,6 +111,8 @@ LIBBPF_API int bpf_map_update_elem(int fd, const void *key, const void *value, __u64 flags); LIBBPF_API int bpf_map_lookup_elem(int fd, const void *key, void *value); +LIBBPF_API int bpf_map_lookup_elem_flags(int fd, const void *key, void *value, + __u64 flags); LIBBPF_API int bpf_map_lookup_and_delete_elem(int fd, const void *key, void *value); LIBBPF_API int bpf_map_delete_elem(int fd, const void *key); diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index d682d3b8f7b9..cf119c9b6f27 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -1,6 +1,7 @@ // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) /* Copyright (c) 2018 Facebook */ +#include <stdio.h> #include <stdlib.h> #include <string.h> #include <unistd.h> @@ -9,12 +10,14 @@ #include <linux/btf.h> #include "btf.h" #include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" -#define elog(fmt, ...) { if (err_log) err_log(fmt, ##__VA_ARGS__); } #define max(a, b) ((a) > (b) ? (a) : (b)) #define min(a, b) ((a) < (b) ? (a) : (b)) -#define BTF_MAX_NR_TYPES 65535 +#define BTF_MAX_NR_TYPES 0x7fffffff +#define BTF_MAX_STR_OFFSET 0x7fffffff #define IS_MODIFIER(k) (((k) == BTF_KIND_TYPEDEF) || \ ((k) == BTF_KIND_VOLATILE) || \ @@ -39,9 +42,8 @@ struct btf { struct btf_ext_info { /* - * info points to a deep copy of the individual info section - * (e.g. func_info and line_info) from the .BTF.ext. - * It does not include the __u32 rec_size. + * info points to the individual info section (e.g. func_info and + * line_info) from the .BTF.ext. It does not include the __u32 rec_size. */ void *info; __u32 rec_size; @@ -49,8 +51,13 @@ struct btf_ext_info { }; struct btf_ext { + union { + struct btf_ext_header *hdr; + void *data; + }; struct btf_ext_info func_info; struct btf_ext_info line_info; + __u32 data_size; }; struct btf_ext_info_sec { @@ -107,54 +114,54 @@ static int btf_add_type(struct btf *btf, struct btf_type *t) return 0; } -static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_hdr(struct btf *btf) { const struct btf_header *hdr = btf->hdr; __u32 meta_left; if (btf->data_size < sizeof(struct btf_header)) { - elog("BTF header not found\n"); + pr_debug("BTF header not found\n"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF magic:%x\n", hdr->magic); + pr_debug("Invalid BTF magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF version:%u\n", hdr->version); + pr_debug("Unsupported BTF version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF flags:%x\n", hdr->flags); return -ENOTSUP; } meta_left = btf->data_size - sizeof(*hdr); if (!meta_left) { - elog("BTF has no data\n"); + pr_debug("BTF has no data\n"); return -EINVAL; } if (meta_left < hdr->type_off) { - elog("Invalid BTF type section offset:%u\n", hdr->type_off); + pr_debug("Invalid BTF type section offset:%u\n", hdr->type_off); return -EINVAL; } if (meta_left < hdr->str_off) { - elog("Invalid BTF string section offset:%u\n", hdr->str_off); + pr_debug("Invalid BTF string section offset:%u\n", hdr->str_off); return -EINVAL; } if (hdr->type_off >= hdr->str_off) { - elog("BTF type section offset >= string section offset. No type?\n"); + pr_debug("BTF type section offset >= string section offset. No type?\n"); return -EINVAL; } if (hdr->type_off & 0x02) { - elog("BTF type section is not aligned to 4 bytes\n"); + pr_debug("BTF type section is not aligned to 4 bytes\n"); return -EINVAL; } @@ -163,15 +170,15 @@ static int btf_parse_hdr(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_parse_str_sec(struct btf *btf) { const struct btf_header *hdr = btf->hdr; const char *start = btf->nohdr_data + hdr->str_off; const char *end = start + btf->hdr->str_len; - if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_NAME_OFFSET || + if (!hdr->str_len || hdr->str_len - 1 > BTF_MAX_STR_OFFSET || start[0] || end[-1]) { - elog("Invalid BTF string section\n"); + pr_debug("Invalid BTF string section\n"); return -EINVAL; } @@ -180,7 +187,38 @@ static int btf_parse_str_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } -static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) +static int btf_type_size(struct btf_type *t) +{ + int base_size = sizeof(struct btf_type); + __u16 vlen = BTF_INFO_VLEN(t->info); + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + return base_size; + case BTF_KIND_INT: + return base_size + sizeof(__u32); + case BTF_KIND_ENUM: + return base_size + vlen * sizeof(struct btf_enum); + case BTF_KIND_ARRAY: + return base_size + sizeof(struct btf_array); + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + return base_size + vlen * sizeof(struct btf_member); + case BTF_KIND_FUNC_PROTO: + return base_size + vlen * sizeof(struct btf_param); + default: + pr_debug("Unsupported BTF_KIND:%u\n", BTF_INFO_KIND(t->info)); + return -EINVAL; + } +} + +static int btf_parse_type_sec(struct btf *btf) { struct btf_header *hdr = btf->hdr; void *nohdr_data = btf->nohdr_data; @@ -189,41 +227,13 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) while (next_type < end_type) { struct btf_type *t = next_type; - __u16 vlen = BTF_INFO_VLEN(t->info); + int type_size; int err; - next_type += sizeof(*t); - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - next_type += sizeof(int); - break; - case BTF_KIND_ARRAY: - next_type += sizeof(struct btf_array); - break; - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - next_type += vlen * sizeof(struct btf_member); - break; - case BTF_KIND_ENUM: - next_type += vlen * sizeof(struct btf_enum); - break; - case BTF_KIND_FUNC_PROTO: - next_type += vlen * sizeof(struct btf_param); - break; - case BTF_KIND_FUNC: - case BTF_KIND_TYPEDEF: - case BTF_KIND_PTR: - case BTF_KIND_FWD: - case BTF_KIND_VOLATILE: - case BTF_KIND_CONST: - case BTF_KIND_RESTRICT: - break; - default: - elog("Unsupported BTF_KIND:%u\n", - BTF_INFO_KIND(t->info)); - return -EINVAL; - } - + type_size = btf_type_size(t); + if (type_size < 0) + return type_size; + next_type += type_size; err = btf_add_type(btf, t); if (err) return err; @@ -232,6 +242,11 @@ static int btf_parse_type_sec(struct btf *btf, btf_print_fn_t err_log) return 0; } +__u32 btf__get_nr_types(const struct btf *btf) +{ + return btf->nr_types; +} + const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 type_id) { if (type_id > btf->nr_types) @@ -250,21 +265,6 @@ static bool btf_type_is_void_or_null(const struct btf_type *t) return !t || btf_type_is_void(t); } -static __s64 btf_type_size(const struct btf_type *t) -{ - switch (BTF_INFO_KIND(t->info)) { - case BTF_KIND_INT: - case BTF_KIND_STRUCT: - case BTF_KIND_UNION: - case BTF_KIND_ENUM: - return t->size; - case BTF_KIND_PTR: - return sizeof(void *); - default: - return -EINVAL; - } -} - #define MAX_RESOLVE_DEPTH 32 __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) @@ -278,11 +278,16 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) t = btf__type_by_id(btf, type_id); for (i = 0; i < MAX_RESOLVE_DEPTH && !btf_type_is_void_or_null(t); i++) { - size = btf_type_size(t); - if (size >= 0) - break; - switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_ENUM: + size = t->size; + goto done; + case BTF_KIND_PTR: + size = sizeof(void *); + goto done; case BTF_KIND_TYPEDEF: case BTF_KIND_VOLATILE: case BTF_KIND_CONST: @@ -306,6 +311,7 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id) if (size < 0) return -EINVAL; +done: if (nelems && size > UINT32_MAX / nelems) return -E2BIG; @@ -363,10 +369,8 @@ void btf__free(struct btf *btf) free(btf); } -struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf *btf__new(__u8 *data, __u32 size) { - __u32 log_buf_size = 0; - char *log_buf = NULL; struct btf *btf; int err; @@ -376,16 +380,6 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) btf->fd = -1; - if (err_log) { - log_buf = malloc(BPF_LOG_BUF_SIZE); - if (!log_buf) { - err = -ENOMEM; - goto done; - } - *log_buf = 0; - log_buf_size = BPF_LOG_BUF_SIZE; - } - btf->data = malloc(size); if (!btf->data) { err = -ENOMEM; @@ -395,30 +389,17 @@ struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log) memcpy(btf->data, data, size); btf->data_size = size; - btf->fd = bpf_load_btf(btf->data, btf->data_size, - log_buf, log_buf_size, false); - - if (btf->fd == -1) { - err = -errno; - elog("Error loading BTF: %s(%d)\n", strerror(errno), errno); - if (log_buf && *log_buf) - elog("%s\n", log_buf); - goto done; - } - - err = btf_parse_hdr(btf, err_log); + err = btf_parse_hdr(btf); if (err) goto done; - err = btf_parse_str_sec(btf, err_log); + err = btf_parse_str_sec(btf); if (err) goto done; - err = btf_parse_type_sec(btf, err_log); + err = btf_parse_type_sec(btf); done: - free(log_buf); - if (err) { btf__free(btf); return ERR_PTR(err); @@ -427,11 +408,47 @@ done: return btf; } +int btf__load(struct btf *btf) +{ + __u32 log_buf_size = BPF_LOG_BUF_SIZE; + char *log_buf = NULL; + int err = 0; + + if (btf->fd >= 0) + return -EEXIST; + + log_buf = malloc(log_buf_size); + if (!log_buf) + return -ENOMEM; + + *log_buf = 0; + + btf->fd = bpf_load_btf(btf->data, btf->data_size, + log_buf, log_buf_size, false); + if (btf->fd < 0) { + err = -errno; + pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); + if (*log_buf) + pr_warning("%s\n", log_buf); + goto done; + } + +done: + free(log_buf); + return err; +} + int btf__fd(const struct btf *btf) { return btf->fd; } +const void *btf__get_raw_data(const struct btf *btf, __u32 *size) +{ + *size = btf->data_size; + return btf->data; +} + const char *btf__name_by_offset(const struct btf *btf, __u32 offset) { if (offset < btf->hdr->str_len) @@ -467,7 +484,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); @@ -481,7 +498,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } ptr = temp_ptr; - bzero(ptr, last_size); + memset(ptr, 0, last_size); btf_info.btf = ptr_to_u64(ptr); err = bpf_obj_get_info_by_fd(btf_fd, &btf_info, &len); } @@ -491,7 +508,7 @@ int btf__get_from_id(__u32 id, struct btf **btf) goto exit_free; } - *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size, NULL); + *btf = btf__new((__u8 *)(long)btf_info.btf, btf_info.btf_size); if (IS_ERR(*btf)) { err = PTR_ERR(*btf); *btf = NULL; @@ -504,7 +521,79 @@ exit_free: return err; } -struct btf_ext_sec_copy_param { +int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id) +{ + const struct btf_type *container_type; + const struct btf_member *key, *value; + const size_t max_name = 256; + char container_name[max_name]; + __s64 key_size, value_size; + __s32 container_id; + + if (snprintf(container_name, max_name, "____btf_map_%s", map_name) == + max_name) { + pr_warning("map:%s length of '____btf_map_%s' is too long\n", + map_name, map_name); + return -EINVAL; + } + + container_id = btf__find_by_name(btf, container_name); + if (container_id < 0) { + pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", + map_name, container_name); + return container_id; + } + + container_type = btf__type_by_id(btf, container_id); + if (!container_type) { + pr_warning("map:%s cannot find BTF type for container_id:%u\n", + map_name, container_id); + return -EINVAL; + } + + if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || + BTF_INFO_VLEN(container_type->info) < 2) { + pr_warning("map:%s container_name:%s is an invalid container struct\n", + map_name, container_name); + return -EINVAL; + } + + key = (struct btf_member *)(container_type + 1); + value = key + 1; + + key_size = btf__resolve_size(btf, key->type); + if (key_size < 0) { + pr_warning("map:%s invalid BTF key_type_size\n", map_name); + return key_size; + } + + if (expected_key_size != key_size) { + pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", + map_name, (__u32)key_size, expected_key_size); + return -EINVAL; + } + + value_size = btf__resolve_size(btf, value->type); + if (value_size < 0) { + pr_warning("map:%s invalid BTF value_type_size\n", map_name); + return value_size; + } + + if (expected_value_size != value_size) { + pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", + map_name, (__u32)value_size, expected_value_size); + return -EINVAL; + } + + *key_type_id = key->type; + *value_type_id = value->type; + + return 0; +} + +struct btf_ext_sec_setup_param { __u32 off; __u32 len; __u32 min_rec_size; @@ -512,41 +601,33 @@ struct btf_ext_sec_copy_param { const char *desc; }; -static int btf_ext_copy_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - struct btf_ext_sec_copy_param *ext_sec, - btf_print_fn_t err_log) +static int btf_ext_setup_info(struct btf_ext *btf_ext, + struct btf_ext_sec_setup_param *ext_sec) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; const struct btf_ext_info_sec *sinfo; struct btf_ext_info *ext_info; __u32 info_left, record_size; /* The start of the info sec (including the __u32 record_size). */ - const void *info; - - /* data and data_size do not include btf_ext_header from now on */ - data = data + hdr->hdr_len; - data_size -= hdr->hdr_len; + void *info; if (ext_sec->off & 0x03) { - elog(".BTF.ext %s section is not aligned to 4 bytes\n", + pr_debug(".BTF.ext %s section is not aligned to 4 bytes\n", ext_sec->desc); return -EINVAL; } - if (data_size < ext_sec->off || - ext_sec->len > data_size - ext_sec->off) { - elog("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", - ext_sec->desc, ext_sec->off, ext_sec->len); + info = btf_ext->data + btf_ext->hdr->hdr_len + ext_sec->off; + info_left = ext_sec->len; + + if (btf_ext->data + btf_ext->data_size < info + ext_sec->len) { + pr_debug("%s section (off:%u len:%u) is beyond the end of the ELF section .BTF.ext\n", + ext_sec->desc, ext_sec->off, ext_sec->len); return -EINVAL; } - info = data + ext_sec->off; - info_left = ext_sec->len; - /* At least a record size */ if (info_left < sizeof(__u32)) { - elog(".BTF.ext %s record size not found\n", ext_sec->desc); + pr_debug(".BTF.ext %s record size not found\n", ext_sec->desc); return -EINVAL; } @@ -554,8 +635,8 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, record_size = *(__u32 *)info; if (record_size < ext_sec->min_rec_size || record_size & 0x03) { - elog("%s section in .BTF.ext has invalid record size %u\n", - ext_sec->desc, record_size); + pr_debug("%s section in .BTF.ext has invalid record size %u\n", + ext_sec->desc, record_size); return -EINVAL; } @@ -564,7 +645,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, /* If no records, return failure now so .BTF.ext won't be used. */ if (!info_left) { - elog("%s section in .BTF.ext has no records", ext_sec->desc); + pr_debug("%s section in .BTF.ext has no records", ext_sec->desc); return -EINVAL; } @@ -574,14 +655,14 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, __u32 num_records; if (info_left < sec_hdrlen) { - elog("%s section header is not found in .BTF.ext\n", + pr_debug("%s section header is not found in .BTF.ext\n", ext_sec->desc); return -EINVAL; } num_records = sinfo->num_info; if (num_records == 0) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -589,7 +670,7 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, total_record_size = sec_hdrlen + (__u64)num_records * record_size; if (info_left < total_record_size) { - elog("%s section has incorrect num_records in .BTF.ext\n", + pr_debug("%s section has incorrect num_records in .BTF.ext\n", ext_sec->desc); return -EINVAL; } @@ -601,74 +682,64 @@ static int btf_ext_copy_info(struct btf_ext *btf_ext, ext_info = ext_sec->ext_info; ext_info->len = ext_sec->len - sizeof(__u32); ext_info->rec_size = record_size; - ext_info->info = malloc(ext_info->len); - if (!ext_info->info) - return -ENOMEM; - memcpy(ext_info->info, info + sizeof(__u32), ext_info->len); + ext_info->info = info + sizeof(__u32); return 0; } -static int btf_ext_copy_func_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_setup_func_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->func_info_off, - .len = hdr->func_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->func_info_off, + .len = btf_ext->hdr->func_info_len, .min_rec_size = sizeof(struct bpf_func_info_min), .ext_info = &btf_ext->func_info, .desc = "func_info" }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_copy_line_info(struct btf_ext *btf_ext, - __u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_setup_line_info(struct btf_ext *btf_ext) { - const struct btf_ext_header *hdr = (struct btf_ext_header *)data; - struct btf_ext_sec_copy_param param = { - .off = hdr->line_info_off, - .len = hdr->line_info_len, + struct btf_ext_sec_setup_param param = { + .off = btf_ext->hdr->line_info_off, + .len = btf_ext->hdr->line_info_len, .min_rec_size = sizeof(struct bpf_line_info_min), .ext_info = &btf_ext->line_info, .desc = "line_info", }; - return btf_ext_copy_info(btf_ext, data, data_size, ¶m, err_log); + return btf_ext_setup_info(btf_ext, ¶m); } -static int btf_ext_parse_hdr(__u8 *data, __u32 data_size, - btf_print_fn_t err_log) +static int btf_ext_parse_hdr(__u8 *data, __u32 data_size) { const struct btf_ext_header *hdr = (struct btf_ext_header *)data; if (data_size < offsetof(struct btf_ext_header, func_info_off) || data_size < hdr->hdr_len) { - elog("BTF.ext header not found"); + pr_debug("BTF.ext header not found"); return -EINVAL; } if (hdr->magic != BTF_MAGIC) { - elog("Invalid BTF.ext magic:%x\n", hdr->magic); + pr_debug("Invalid BTF.ext magic:%x\n", hdr->magic); return -EINVAL; } if (hdr->version != BTF_VERSION) { - elog("Unsupported BTF.ext version:%u\n", hdr->version); + pr_debug("Unsupported BTF.ext version:%u\n", hdr->version); return -ENOTSUP; } if (hdr->flags) { - elog("Unsupported BTF.ext flags:%x\n", hdr->flags); + pr_debug("Unsupported BTF.ext flags:%x\n", hdr->flags); return -ENOTSUP; } if (data_size == hdr->hdr_len) { - elog("BTF.ext has no data\n"); + pr_debug("BTF.ext has no data\n"); return -EINVAL; } @@ -679,18 +750,16 @@ void btf_ext__free(struct btf_ext *btf_ext) { if (!btf_ext) return; - - free(btf_ext->func_info.info); - free(btf_ext->line_info.info); + free(btf_ext->data); free(btf_ext); } -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) +struct btf_ext *btf_ext__new(__u8 *data, __u32 size) { struct btf_ext *btf_ext; int err; - err = btf_ext_parse_hdr(data, size, err_log); + err = btf_ext_parse_hdr(data, size); if (err) return ERR_PTR(err); @@ -698,13 +767,23 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) if (!btf_ext) return ERR_PTR(-ENOMEM); - err = btf_ext_copy_func_info(btf_ext, data, size, err_log); - if (err) { - btf_ext__free(btf_ext); - return ERR_PTR(err); + btf_ext->data_size = size; + btf_ext->data = malloc(size); + if (!btf_ext->data) { + err = -ENOMEM; + goto done; } + memcpy(btf_ext->data, data, size); + + err = btf_ext_setup_func_info(btf_ext); + if (err) + goto done; - err = btf_ext_copy_line_info(btf_ext, data, size, err_log); + err = btf_ext_setup_line_info(btf_ext); + if (err) + goto done; + +done: if (err) { btf_ext__free(btf_ext); return ERR_PTR(err); @@ -713,6 +792,12 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log) return btf_ext; } +const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, __u32 *size) +{ + *size = btf_ext->data_size; + return btf_ext->data; +} + static int btf_ext_reloc_info(const struct btf *btf, const struct btf_ext_info *ext_info, const char *sec_name, __u32 insns_cnt, @@ -761,7 +846,8 @@ static int btf_ext_reloc_info(const struct btf *btf, return -ENOENT; } -int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **func_info, __u32 *cnt) { @@ -769,7 +855,8 @@ int btf_ext__reloc_func_info(const struct btf *btf, const struct btf_ext *btf_ex insns_cnt, func_info, cnt); } -int btf_ext__reloc_line_info(const struct btf *btf, const struct btf_ext *btf_ext, +int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, const char *sec_name, __u32 insns_cnt, void **line_info, __u32 *cnt) { @@ -786,3 +873,1806 @@ __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext) { return btf_ext->line_info.rec_size; } + +struct btf_dedup; + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); +static void btf_dedup_free(struct btf_dedup *d); +static int btf_dedup_strings(struct btf_dedup *d); +static int btf_dedup_prim_types(struct btf_dedup *d); +static int btf_dedup_struct_types(struct btf_dedup *d); +static int btf_dedup_ref_types(struct btf_dedup *d); +static int btf_dedup_compact_types(struct btf_dedup *d); +static int btf_dedup_remap_types(struct btf_dedup *d); + +/* + * Deduplicate BTF types and strings. + * + * BTF dedup algorithm takes as an input `struct btf` representing `.BTF` ELF + * section with all BTF type descriptors and string data. It overwrites that + * memory in-place with deduplicated types and strings without any loss of + * information. If optional `struct btf_ext` representing '.BTF.ext' ELF section + * is provided, all the strings referenced from .BTF.ext section are honored + * and updated to point to the right offsets after deduplication. + * + * If function returns with error, type/string data might be garbled and should + * be discarded. + * + * More verbose and detailed description of both problem btf_dedup is solving, + * as well as solution could be found at: + * https://facebookmicrosites.github.io/bpf/blog/2018/11/14/btf-enhancement.html + * + * Problem description and justification + * ===================================== + * + * BTF type information is typically emitted either as a result of conversion + * from DWARF to BTF or directly by compiler. In both cases, each compilation + * unit contains information about a subset of all the types that are used + * in an application. These subsets are frequently overlapping and contain a lot + * of duplicated information when later concatenated together into a single + * binary. This algorithm ensures that each unique type is represented by single + * BTF type descriptor, greatly reducing resulting size of BTF data. + * + * Compilation unit isolation and subsequent duplication of data is not the only + * problem. The same type hierarchy (e.g., struct and all the type that struct + * references) in different compilation units can be represented in BTF to + * various degrees of completeness (or, rather, incompleteness) due to + * struct/union forward declarations. + * + * Let's take a look at an example, that we'll use to better understand the + * problem (and solution). Suppose we have two compilation units, each using + * same `struct S`, but each of them having incomplete type information about + * struct's fields: + * + * // CU #1: + * struct S; + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * // CU #2: + * struct S; + * struct A; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * In case of CU #1, BTF data will know only that `struct B` exist (but no + * more), but will know the complete type information about `struct A`. While + * for CU #2, it will know full type information about `struct B`, but will + * only know about forward declaration of `struct A` (in BTF terms, it will + * have `BTF_KIND_FWD` type descriptor with name `B`). + * + * This compilation unit isolation means that it's possible that there is no + * single CU with complete type information describing structs `S`, `A`, and + * `B`. Also, we might get tons of duplicated and redundant type information. + * + * Additional complication we need to keep in mind comes from the fact that + * types, in general, can form graphs containing cycles, not just DAGs. + * + * While algorithm does deduplication, it also merges and resolves type + * information (unless disabled throught `struct btf_opts`), whenever possible. + * E.g., in the example above with two compilation units having partial type + * information for structs `A` and `B`, the output of algorithm will emit + * a single copy of each BTF type that describes structs `A`, `B`, and `S` + * (as well as type information for `int` and pointers), as if they were defined + * in a single compilation unit as: + * + * struct A { + * int a; + * struct A* self; + * struct S* parent; + * }; + * struct B { + * int b; + * struct B* self; + * struct S* parent; + * }; + * struct S { + * struct A* a_ptr; + * struct B* b_ptr; + * }; + * + * Algorithm summary + * ================= + * + * Algorithm completes its work in 6 separate passes: + * + * 1. Strings deduplication. + * 2. Primitive types deduplication (int, enum, fwd). + * 3. Struct/union types deduplication. + * 4. Reference types deduplication (pointers, typedefs, arrays, funcs, func + * protos, and const/volatile/restrict modifiers). + * 5. Types compaction. + * 6. Types remapping. + * + * Algorithm determines canonical type descriptor, which is a single + * representative type for each truly unique type. This canonical type is the + * one that will go into final deduplicated BTF type information. For + * struct/unions, it is also the type that algorithm will merge additional type + * information into (while resolving FWDs), as it discovers it from data in + * other CUs. Each input BTF type eventually gets either mapped to itself, if + * that type is canonical, or to some other type, if that type is equivalent + * and was chosen as canonical representative. This mapping is stored in + * `btf_dedup->map` array. This map is also used to record STRUCT/UNION that + * FWD type got resolved to. + * + * To facilitate fast discovery of canonical types, we also maintain canonical + * index (`btf_dedup->dedup_table`), which maps type descriptor's signature hash + * (i.e., hashed kind, name, size, fields, etc) into a list of canonical types + * that match that signature. With sufficiently good choice of type signature + * hashing function, we can limit number of canonical types for each unique type + * signature to a very small number, allowing to find canonical type for any + * duplicated type very quickly. + * + * Struct/union deduplication is the most critical part and algorithm for + * deduplicating structs/unions is described in greater details in comments for + * `btf_dedup_is_equiv` function. + */ +int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = btf_dedup_new(btf, btf_ext, opts); + int err; + + if (IS_ERR(d)) { + pr_debug("btf_dedup_new failed: %ld", PTR_ERR(d)); + return -EINVAL; + } + + err = btf_dedup_strings(d); + if (err < 0) { + pr_debug("btf_dedup_strings failed:%d\n", err); + goto done; + } + err = btf_dedup_prim_types(d); + if (err < 0) { + pr_debug("btf_dedup_prim_types failed:%d\n", err); + goto done; + } + err = btf_dedup_struct_types(d); + if (err < 0) { + pr_debug("btf_dedup_struct_types failed:%d\n", err); + goto done; + } + err = btf_dedup_ref_types(d); + if (err < 0) { + pr_debug("btf_dedup_ref_types failed:%d\n", err); + goto done; + } + err = btf_dedup_compact_types(d); + if (err < 0) { + pr_debug("btf_dedup_compact_types failed:%d\n", err); + goto done; + } + err = btf_dedup_remap_types(d); + if (err < 0) { + pr_debug("btf_dedup_remap_types failed:%d\n", err); + goto done; + } + +done: + btf_dedup_free(d); + return err; +} + +#define BTF_DEDUP_TABLE_DEFAULT_SIZE (1 << 14) +#define BTF_DEDUP_TABLE_MAX_SIZE_LOG 31 +#define BTF_UNPROCESSED_ID ((__u32)-1) +#define BTF_IN_PROGRESS_ID ((__u32)-2) + +struct btf_dedup_node { + struct btf_dedup_node *next; + __u32 type_id; +}; + +struct btf_dedup { + /* .BTF section to be deduped in-place */ + struct btf *btf; + /* + * Optional .BTF.ext section. When provided, any strings referenced + * from it will be taken into account when deduping strings + */ + struct btf_ext *btf_ext; + /* + * This is a map from any type's signature hash to a list of possible + * canonical representative type candidates. Hash collisions are + * ignored, so even types of various kinds can share same list of + * candidates, which is fine because we rely on subsequent + * btf_xxx_equal() checks to authoritatively verify type equality. + */ + struct btf_dedup_node **dedup_table; + /* Canonical types map */ + __u32 *map; + /* Hypothetical mapping, used during type graph equivalence checks */ + __u32 *hypot_map; + __u32 *hypot_list; + size_t hypot_cnt; + size_t hypot_cap; + /* Various option modifying behavior of algorithm */ + struct btf_dedup_opts opts; +}; + +struct btf_str_ptr { + const char *str; + __u32 new_off; + bool used; +}; + +struct btf_str_ptrs { + struct btf_str_ptr *ptrs; + const char *data; + __u32 cnt; + __u32 cap; +}; + +static inline __u32 hash_combine(__u32 h, __u32 value) +{ +/* 2^31 + 2^29 - 2^25 + 2^22 - 2^19 - 2^16 + 1 */ +#define GOLDEN_RATIO_PRIME 0x9e370001UL + return h * 37 + value * GOLDEN_RATIO_PRIME; +#undef GOLDEN_RATIO_PRIME +} + +#define for_each_dedup_cand(d, hash, node) \ + for (node = d->dedup_table[hash & (d->opts.dedup_table_size - 1)]; \ + node; \ + node = node->next) + +static int btf_dedup_table_add(struct btf_dedup *d, __u32 hash, __u32 type_id) +{ + struct btf_dedup_node *node = malloc(sizeof(struct btf_dedup_node)); + int bucket = hash & (d->opts.dedup_table_size - 1); + + if (!node) + return -ENOMEM; + node->type_id = type_id; + node->next = d->dedup_table[bucket]; + d->dedup_table[bucket] = node; + return 0; +} + +static int btf_dedup_hypot_map_add(struct btf_dedup *d, + __u32 from_id, __u32 to_id) +{ + if (d->hypot_cnt == d->hypot_cap) { + __u32 *new_list; + + d->hypot_cap += max(16, d->hypot_cap / 2); + new_list = realloc(d->hypot_list, sizeof(__u32) * d->hypot_cap); + if (!new_list) + return -ENOMEM; + d->hypot_list = new_list; + } + d->hypot_list[d->hypot_cnt++] = from_id; + d->hypot_map[from_id] = to_id; + return 0; +} + +static void btf_dedup_clear_hypot_map(struct btf_dedup *d) +{ + int i; + + for (i = 0; i < d->hypot_cnt; i++) + d->hypot_map[d->hypot_list[i]] = BTF_UNPROCESSED_ID; + d->hypot_cnt = 0; +} + +static void btf_dedup_table_free(struct btf_dedup *d) +{ + struct btf_dedup_node *head, *tmp; + int i; + + if (!d->dedup_table) + return; + + for (i = 0; i < d->opts.dedup_table_size; i++) { + while (d->dedup_table[i]) { + tmp = d->dedup_table[i]; + d->dedup_table[i] = tmp->next; + free(tmp); + } + + head = d->dedup_table[i]; + while (head) { + tmp = head; + head = head->next; + free(tmp); + } + } + + free(d->dedup_table); + d->dedup_table = NULL; +} + +static void btf_dedup_free(struct btf_dedup *d) +{ + btf_dedup_table_free(d); + + free(d->map); + d->map = NULL; + + free(d->hypot_map); + d->hypot_map = NULL; + + free(d->hypot_list); + d->hypot_list = NULL; + + free(d); +} + +/* Find closest power of two >= to size, capped at 2^max_size_log */ +static __u32 roundup_pow2_max(__u32 size, int max_size_log) +{ + int i; + + for (i = 0; i < max_size_log && (1U << i) < size; i++) + ; + return 1U << i; +} + + +static struct btf_dedup *btf_dedup_new(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts) +{ + struct btf_dedup *d = calloc(1, sizeof(struct btf_dedup)); + int i, err = 0; + __u32 sz; + + if (!d) + return ERR_PTR(-ENOMEM); + + d->opts.dont_resolve_fwds = opts && opts->dont_resolve_fwds; + sz = opts && opts->dedup_table_size ? opts->dedup_table_size + : BTF_DEDUP_TABLE_DEFAULT_SIZE; + sz = roundup_pow2_max(sz, BTF_DEDUP_TABLE_MAX_SIZE_LOG); + d->opts.dedup_table_size = sz; + + d->btf = btf; + d->btf_ext = btf_ext; + + d->dedup_table = calloc(d->opts.dedup_table_size, + sizeof(struct btf_dedup_node *)); + if (!d->dedup_table) { + err = -ENOMEM; + goto done; + } + + d->map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->map) { + err = -ENOMEM; + goto done; + } + /* special BTF "void" type is made canonical immediately */ + d->map[0] = 0; + for (i = 1; i <= btf->nr_types; i++) + d->map[i] = BTF_UNPROCESSED_ID; + + d->hypot_map = malloc(sizeof(__u32) * (1 + btf->nr_types)); + if (!d->hypot_map) { + err = -ENOMEM; + goto done; + } + for (i = 0; i <= btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + +done: + if (err) { + btf_dedup_free(d); + return ERR_PTR(err); + } + + return d; +} + +typedef int (*str_off_fn_t)(__u32 *str_off_ptr, void *ctx); + +/* + * Iterate over all possible places in .BTF and .BTF.ext that can reference + * string and pass pointer to it to a provided callback `fn`. + */ +static int btf_for_each_str_off(struct btf_dedup *d, str_off_fn_t fn, void *ctx) +{ + void *line_data_cur, *line_data_end; + int i, j, r, rec_size; + struct btf_type *t; + + for (i = 1; i <= d->btf->nr_types; i++) { + t = d->btf->types[i]; + r = fn(&t->name_off, ctx); + if (r) + return r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *m = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_ENUM: { + struct btf_enum *m = (struct btf_enum *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + case BTF_KIND_FUNC_PROTO: { + struct btf_param *m = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (j = 0; j < vlen; j++) { + r = fn(&m->name_off, ctx); + if (r) + return r; + m++; + } + break; + } + default: + break; + } + } + + if (!d->btf_ext) + return 0; + + line_data_cur = d->btf_ext->line_info.info; + line_data_end = d->btf_ext->line_info.info + d->btf_ext->line_info.len; + rec_size = d->btf_ext->line_info.rec_size; + + while (line_data_cur < line_data_end) { + struct btf_ext_info_sec *sec = line_data_cur; + struct bpf_line_info_min *line_info; + __u32 num_info = sec->num_info; + + r = fn(&sec->sec_name_off, ctx); + if (r) + return r; + + line_data_cur += sizeof(struct btf_ext_info_sec); + for (i = 0; i < num_info; i++) { + line_info = line_data_cur; + r = fn(&line_info->file_name_off, ctx); + if (r) + return r; + r = fn(&line_info->line_off, ctx); + if (r) + return r; + line_data_cur += rec_size; + } + } + + return 0; +} + +static int str_sort_by_content(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + return strcmp(p1->str, p2->str); +} + +static int str_sort_by_offset(const void *a1, const void *a2) +{ + const struct btf_str_ptr *p1 = a1; + const struct btf_str_ptr *p2 = a2; + + if (p1->str != p2->str) + return p1->str < p2->str ? -1 : 1; + return 0; +} + +static int btf_dedup_str_ptr_cmp(const void *str_ptr, const void *pelem) +{ + const struct btf_str_ptr *p = pelem; + + if (str_ptr != p->str) + return (const char *)str_ptr < p->str ? -1 : 1; + return 0; +} + +static int btf_str_mark_as_used(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + s->used = true; + return 0; +} + +static int btf_str_remap_offset(__u32 *str_off_ptr, void *ctx) +{ + struct btf_str_ptrs *strs; + struct btf_str_ptr *s; + + if (*str_off_ptr == 0) + return 0; + + strs = ctx; + s = bsearch(strs->data + *str_off_ptr, strs->ptrs, strs->cnt, + sizeof(struct btf_str_ptr), btf_dedup_str_ptr_cmp); + if (!s) + return -EINVAL; + *str_off_ptr = s->new_off; + return 0; +} + +/* + * Dedup string and filter out those that are not referenced from either .BTF + * or .BTF.ext (if provided) sections. + * + * This is done by building index of all strings in BTF's string section, + * then iterating over all entities that can reference strings (e.g., type + * names, struct field names, .BTF.ext line info, etc) and marking corresponding + * strings as used. After that all used strings are deduped and compacted into + * sequential blob of memory and new offsets are calculated. Then all the string + * references are iterated again and rewritten using new offsets. + */ +static int btf_dedup_strings(struct btf_dedup *d) +{ + const struct btf_header *hdr = d->btf->hdr; + char *start = (char *)d->btf->nohdr_data + hdr->str_off; + char *end = start + d->btf->hdr->str_len; + char *p = start, *tmp_strs = NULL; + struct btf_str_ptrs strs = { + .cnt = 0, + .cap = 0, + .ptrs = NULL, + .data = start, + }; + int i, j, err = 0, grp_idx; + bool grp_used; + + /* build index of all strings */ + while (p < end) { + if (strs.cnt + 1 > strs.cap) { + struct btf_str_ptr *new_ptrs; + + strs.cap += max(strs.cnt / 2, 16); + new_ptrs = realloc(strs.ptrs, + sizeof(strs.ptrs[0]) * strs.cap); + if (!new_ptrs) { + err = -ENOMEM; + goto done; + } + strs.ptrs = new_ptrs; + } + + strs.ptrs[strs.cnt].str = p; + strs.ptrs[strs.cnt].used = false; + + p += strlen(p) + 1; + strs.cnt++; + } + + /* temporary storage for deduplicated strings */ + tmp_strs = malloc(d->btf->hdr->str_len); + if (!tmp_strs) { + err = -ENOMEM; + goto done; + } + + /* mark all used strings */ + strs.ptrs[0].used = true; + err = btf_for_each_str_off(d, btf_str_mark_as_used, &strs); + if (err) + goto done; + + /* sort strings by context, so that we can identify duplicates */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_content); + + /* + * iterate groups of equal strings and if any instance in a group was + * referenced, emit single instance and remember new offset + */ + p = tmp_strs; + grp_idx = 0; + grp_used = strs.ptrs[0].used; + /* iterate past end to avoid code duplication after loop */ + for (i = 1; i <= strs.cnt; i++) { + /* + * when i == strs.cnt, we want to skip string comparison and go + * straight to handling last group of strings (otherwise we'd + * need to handle last group after the loop w/ duplicated code) + */ + if (i < strs.cnt && + !strcmp(strs.ptrs[i].str, strs.ptrs[grp_idx].str)) { + grp_used = grp_used || strs.ptrs[i].used; + continue; + } + + /* + * this check would have been required after the loop to handle + * last group of strings, but due to <= condition in a loop + * we avoid that duplication + */ + if (grp_used) { + int new_off = p - tmp_strs; + __u32 len = strlen(strs.ptrs[grp_idx].str); + + memmove(p, strs.ptrs[grp_idx].str, len + 1); + for (j = grp_idx; j < i; j++) + strs.ptrs[j].new_off = new_off; + p += len + 1; + } + + if (i < strs.cnt) { + grp_idx = i; + grp_used = strs.ptrs[i].used; + } + } + + /* replace original strings with deduped ones */ + d->btf->hdr->str_len = p - tmp_strs; + memmove(start, tmp_strs, d->btf->hdr->str_len); + end = start + d->btf->hdr->str_len; + + /* restore original order for further binary search lookups */ + qsort(strs.ptrs, strs.cnt, sizeof(strs.ptrs[0]), str_sort_by_offset); + + /* remap string offsets */ + err = btf_for_each_str_off(d, btf_str_remap_offset, &strs); + if (err) + goto done; + + d->btf->hdr->str_len = end - start; + +done: + free(tmp_strs); + free(strs.ptrs); + return err; +} + +static __u32 btf_hash_common(struct btf_type *t) +{ + __u32 h; + + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info); + h = hash_combine(h, t->size); + return h; +} + +static bool btf_equal_common(struct btf_type *t1, struct btf_type *t2) +{ + return t1->name_off == t2->name_off && + t1->info == t2->info && + t1->size == t2->size; +} + +/* Calculate type signature hash of INT. */ +static __u32 btf_hash_int(struct btf_type *t) +{ + __u32 info = *(__u32 *)(t + 1); + __u32 h; + + h = btf_hash_common(t); + h = hash_combine(h, info); + return h; +} + +/* Check structural equality of two INTs. */ +static bool btf_equal_int(struct btf_type *t1, struct btf_type *t2) +{ + __u32 info1, info2; + + if (!btf_equal_common(t1, t2)) + return false; + info1 = *(__u32 *)(t1 + 1); + info2 = *(__u32 *)(t2 + 1); + return info1 == info2; +} + +/* Calculate type signature hash of ENUM. */ +static __u32 btf_hash_enum(struct btf_type *t) +{ + __u32 h; + + /* don't hash vlen and enum members to support enum fwd resolving */ + h = hash_combine(0, t->name_off); + h = hash_combine(h, t->info & ~0xffff); + h = hash_combine(h, t->size); + return h; +} + +/* Check structural equality of two ENUMs. */ +static bool btf_equal_enum(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_enum *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_enum *)(t1 + 1); + m2 = (struct btf_enum *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->val != m2->val) + return false; + m1++; + m2++; + } + return true; +} + +static inline bool btf_is_enum_fwd(struct btf_type *t) +{ + return BTF_INFO_KIND(t->info) == BTF_KIND_ENUM && + BTF_INFO_VLEN(t->info) == 0; +} + +static bool btf_compat_enum(struct btf_type *t1, struct btf_type *t2) +{ + if (!btf_is_enum_fwd(t1) && !btf_is_enum_fwd(t2)) + return btf_equal_enum(t1, t2); + /* ignore vlen when comparing */ + return t1->name_off == t2->name_off && + (t1->info & ~0xffff) == (t2->info & ~0xffff) && + t1->size == t2->size; +} + +/* + * Calculate type signature hash of STRUCT/UNION, ignoring referenced type IDs, + * as referenced type IDs equivalence is established separately during type + * graph equivalence check algorithm. + */ +static __u32 btf_hash_struct(struct btf_type *t) +{ + struct btf_member *member = (struct btf_member *)(t + 1); + __u32 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->offset); + /* no hashing of referenced type ID, it can be unresolved yet */ + member++; + } + return h; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_shallow_equal_struct(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_member *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_member *)(t1 + 1); + m2 = (struct btf_member *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->offset != m2->offset) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Calculate type signature hash of ARRAY, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static __u32 btf_hash_array(struct btf_type *t) +{ + struct btf_array *info = (struct btf_array *)(t + 1); + __u32 h = btf_hash_common(t); + + h = hash_combine(h, info->type); + h = hash_combine(h, info->index_type); + h = hash_combine(h, info->nelems); + return h; +} + +/* + * Check exact equality of two ARRAYs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * ARRAY to potential canonical representative. + */ +static bool btf_equal_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->type == info2->type && + info1->index_type == info2->index_type && + info1->nelems == info2->nelems; +} + +/* + * Check structural compatibility of two ARRAYs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static bool btf_compat_array(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_array *info1, *info2; + + if (!btf_equal_common(t1, t2)) + return false; + + info1 = (struct btf_array *)(t1 + 1); + info2 = (struct btf_array *)(t2 + 1); + return info1->nelems == info2->nelems; +} + +/* + * Calculate type signature hash of FUNC_PROTO, including referenced type IDs, + * under assumption that they were already resolved to canonical type IDs and + * are not going to change. + */ +static inline __u32 btf_hash_fnproto(struct btf_type *t) +{ + struct btf_param *member = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + __u32 h = btf_hash_common(t); + int i; + + for (i = 0; i < vlen; i++) { + h = hash_combine(h, member->name_off); + h = hash_combine(h, member->type); + member++; + } + return h; +} + +/* + * Check exact equality of two FUNC_PROTOs, taking into account referenced + * type IDs, under assumption that they were already resolved to canonical + * type IDs and are not going to change. + * This function is called during reference types deduplication to compare + * FUNC_PROTO to potential canonical representative. + */ +static inline bool btf_equal_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + if (!btf_equal_common(t1, t2)) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off || m1->type != m2->type) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Check structural compatibility of two FUNC_PROTOs, ignoring referenced type + * IDs. This check is performed during type graph equivalence check and + * referenced types equivalence is checked separately. + */ +static inline bool btf_compat_fnproto(struct btf_type *t1, struct btf_type *t2) +{ + struct btf_param *m1, *m2; + __u16 vlen; + int i; + + /* skip return type ID */ + if (t1->name_off != t2->name_off || t1->info != t2->info) + return false; + + vlen = BTF_INFO_VLEN(t1->info); + m1 = (struct btf_param *)(t1 + 1); + m2 = (struct btf_param *)(t2 + 1); + for (i = 0; i < vlen; i++) { + if (m1->name_off != m2->name_off) + return false; + m1++; + m2++; + } + return true; +} + +/* + * Deduplicate primitive types, that can't reference other types, by calculating + * their type signature hash and comparing them with any possible canonical + * candidate. If no canonical candidate matches, type itself is marked as + * canonical and is added into `btf_dedup->dedup_table` as another candidate. + */ +static int btf_dedup_prim_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + struct btf_type *cand; + struct btf_dedup_node *cand_node; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u32 h; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_ARRAY: + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: + case BTF_KIND_FUNC: + case BTF_KIND_FUNC_PROTO: + return 0; + + case BTF_KIND_INT: + h = btf_hash_int(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_int(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ENUM: + h = btf_hash_enum(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_enum(t, cand)) { + new_id = cand_node->type_id; + break; + } + if (d->opts.dont_resolve_fwds) + continue; + if (btf_compat_enum(t, cand)) { + if (btf_is_enum_fwd(t)) { + /* resolve fwd to full enum */ + new_id = cand_node->type_id; + break; + } + /* resolve canonical enum fwd to full enum */ + d->map[cand_node->type_id] = type_id; + } + } + break; + + case BTF_KIND_FWD: + h = btf_hash_common(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_prim_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_prim_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Check whether type is already mapped into canonical one (could be to itself). + */ +static inline bool is_type_mapped(struct btf_dedup *d, uint32_t type_id) +{ + return d->map[type_id] <= BTF_MAX_NR_TYPES; +} + +/* + * Resolve type ID into its canonical type ID, if any; otherwise return original + * type ID. If type is FWD and is resolved into STRUCT/UNION already, follow + * STRUCT/UNION link and resolve it into canonical type ID as well. + */ +static inline __u32 resolve_type_id(struct btf_dedup *d, __u32 type_id) +{ + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + return type_id; +} + +/* + * Resolve FWD to underlying STRUCT/UNION, if any; otherwise return original + * type ID. + */ +static uint32_t resolve_fwd_id(struct btf_dedup *d, uint32_t type_id) +{ + __u32 orig_type_id = type_id; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + while (is_type_mapped(d, type_id) && d->map[type_id] != type_id) + type_id = d->map[type_id]; + + if (BTF_INFO_KIND(d->btf->types[type_id]->info) != BTF_KIND_FWD) + return type_id; + + return orig_type_id; +} + + +static inline __u16 btf_fwd_kind(struct btf_type *t) +{ + return BTF_INFO_KFLAG(t->info) ? BTF_KIND_UNION : BTF_KIND_STRUCT; +} + +/* + * Check equivalence of BTF type graph formed by candidate struct/union (we'll + * call it "candidate graph" in this description for brevity) to a type graph + * formed by (potential) canonical struct/union ("canonical graph" for brevity + * here, though keep in mind that not all types in canonical graph are + * necessarily canonical representatives themselves, some of them might be + * duplicates or its uniqueness might not have been established yet). + * Returns: + * - >0, if type graphs are equivalent; + * - 0, if not equivalent; + * - <0, on error. + * + * Algorithm performs side-by-side DFS traversal of both type graphs and checks + * equivalence of BTF types at each step. If at any point BTF types in candidate + * and canonical graphs are not compatible structurally, whole graphs are + * incompatible. If types are structurally equivalent (i.e., all information + * except referenced type IDs is exactly the same), a mapping from `canon_id` to + * a `cand_id` is recored in hypothetical mapping (`btf_dedup->hypot_map`). + * If a type references other types, then those referenced types are checked + * for equivalence recursively. + * + * During DFS traversal, if we find that for current `canon_id` type we + * already have some mapping in hypothetical map, we check for two possible + * situations: + * - `canon_id` is mapped to exactly the same type as `cand_id`. This will + * happen when type graphs have cycles. In this case we assume those two + * types are equivalent. + * - `canon_id` is mapped to different type. This is contradiction in our + * hypothetical mapping, because same graph in canonical graph corresponds + * to two different types in candidate graph, which for equivalent type + * graphs shouldn't happen. This condition terminates equivalence check + * with negative result. + * + * If type graphs traversal exhausts types to check and find no contradiction, + * then type graphs are equivalent. + * + * When checking types for equivalence, there is one special case: FWD types. + * If FWD type resolution is allowed and one of the types (either from canonical + * or candidate graph) is FWD and other is STRUCT/UNION (depending on FWD's kind + * flag) and their names match, hypothetical mapping is updated to point from + * FWD to STRUCT/UNION. If graphs will be determined as equivalent successfully, + * this mapping will be used to record FWD -> STRUCT/UNION mapping permanently. + * + * Technically, this could lead to incorrect FWD to STRUCT/UNION resolution, + * if there are two exactly named (or anonymous) structs/unions that are + * compatible structurally, one of which has FWD field, while other is concrete + * STRUCT/UNION, but according to C sources they are different structs/unions + * that are referencing different types with the same name. This is extremely + * unlikely to happen, but btf_dedup API allows to disable FWD resolution if + * this logic is causing problems. + * + * Doing FWD resolution means that both candidate and/or canonical graphs can + * consists of portions of the graph that come from multiple compilation units. + * This is due to the fact that types within single compilation unit are always + * deduplicated and FWDs are already resolved, if referenced struct/union + * definiton is available. So, if we had unresolved FWD and found corresponding + * STRUCT/UNION, they will be from different compilation units. This + * consequently means that when we "link" FWD to corresponding STRUCT/UNION, + * type graph will likely have at least two different BTF types that describe + * same type (e.g., most probably there will be two different BTF types for the + * same 'int' primitive type) and could even have "overlapping" parts of type + * graph that describe same subset of types. + * + * This in turn means that our assumption that each type in canonical graph + * must correspond to exactly one type in candidate graph might not hold + * anymore and will make it harder to detect contradictions using hypothetical + * map. To handle this problem, we allow to follow FWD -> STRUCT/UNION + * resolution only in canonical graph. FWDs in candidate graphs are never + * resolved. To see why it's OK, let's check all possible situations w.r.t. FWDs + * that can occur: + * - Both types in canonical and candidate graphs are FWDs. If they are + * structurally equivalent, then they can either be both resolved to the + * same STRUCT/UNION or not resolved at all. In both cases they are + * equivalent and there is no need to resolve FWD on candidate side. + * - Both types in canonical and candidate graphs are concrete STRUCT/UNION, + * so nothing to resolve as well, algorithm will check equivalence anyway. + * - Type in canonical graph is FWD, while type in candidate is concrete + * STRUCT/UNION. In this case candidate graph comes from single compilation + * unit, so there is exactly one BTF type for each unique C type. After + * resolving FWD into STRUCT/UNION, there might be more than one BTF type + * in canonical graph mapping to single BTF type in candidate graph, but + * because hypothetical mapping maps from canonical to candidate types, it's + * alright, and we still maintain the property of having single `canon_id` + * mapping to single `cand_id` (there could be two different `canon_id` + * mapped to the same `cand_id`, but it's not contradictory). + * - Type in canonical graph is concrete STRUCT/UNION, while type in candidate + * graph is FWD. In this case we are just going to check compatibility of + * STRUCT/UNION and corresponding FWD, and if they are compatible, we'll + * assume that whatever STRUCT/UNION FWD resolves to must be equivalent to + * a concrete STRUCT/UNION from canonical graph. If the rest of type graphs + * turn out equivalent, we'll re-resolve FWD to concrete STRUCT/UNION from + * canonical graph. + */ +static int btf_dedup_is_equiv(struct btf_dedup *d, __u32 cand_id, + __u32 canon_id) +{ + struct btf_type *cand_type; + struct btf_type *canon_type; + __u32 hypot_type_id; + __u16 cand_kind; + __u16 canon_kind; + int i, eq; + + /* if both resolve to the same canonical, they must be equivalent */ + if (resolve_type_id(d, cand_id) == resolve_type_id(d, canon_id)) + return 1; + + canon_id = resolve_fwd_id(d, canon_id); + + hypot_type_id = d->hypot_map[canon_id]; + if (hypot_type_id <= BTF_MAX_NR_TYPES) + return hypot_type_id == cand_id; + + if (btf_dedup_hypot_map_add(d, canon_id, cand_id)) + return -ENOMEM; + + cand_type = d->btf->types[cand_id]; + canon_type = d->btf->types[canon_id]; + cand_kind = BTF_INFO_KIND(cand_type->info); + canon_kind = BTF_INFO_KIND(canon_type->info); + + if (cand_type->name_off != canon_type->name_off) + return 0; + + /* FWD <--> STRUCT/UNION equivalence check, if enabled */ + if (!d->opts.dont_resolve_fwds + && (cand_kind == BTF_KIND_FWD || canon_kind == BTF_KIND_FWD) + && cand_kind != canon_kind) { + __u16 real_kind; + __u16 fwd_kind; + + if (cand_kind == BTF_KIND_FWD) { + real_kind = canon_kind; + fwd_kind = btf_fwd_kind(cand_type); + } else { + real_kind = cand_kind; + fwd_kind = btf_fwd_kind(canon_type); + } + return fwd_kind == real_kind; + } + + if (cand_kind != canon_kind) + return 0; + + switch (cand_kind) { + case BTF_KIND_INT: + return btf_equal_int(cand_type, canon_type); + + case BTF_KIND_ENUM: + if (d->opts.dont_resolve_fwds) + return btf_equal_enum(cand_type, canon_type); + else + return btf_compat_enum(cand_type, canon_type); + + case BTF_KIND_FWD: + return btf_equal_common(cand_type, canon_type); + + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + if (cand_type->info != canon_type->info) + return 0; + return btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + + case BTF_KIND_ARRAY: { + struct btf_array *cand_arr, *canon_arr; + + if (!btf_compat_array(cand_type, canon_type)) + return 0; + cand_arr = (struct btf_array *)(cand_type + 1); + canon_arr = (struct btf_array *)(canon_type + 1); + eq = btf_dedup_is_equiv(d, + cand_arr->index_type, canon_arr->index_type); + if (eq <= 0) + return eq; + return btf_dedup_is_equiv(d, cand_arr->type, canon_arr->type); + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *cand_m, *canon_m; + __u16 vlen; + + if (!btf_shallow_equal_struct(cand_type, canon_type)) + return 0; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_m = (struct btf_member *)(cand_type + 1); + canon_m = (struct btf_member *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_m->type, canon_m->type); + if (eq <= 0) + return eq; + cand_m++; + canon_m++; + } + + return 1; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *cand_p, *canon_p; + __u16 vlen; + + if (!btf_compat_fnproto(cand_type, canon_type)) + return 0; + eq = btf_dedup_is_equiv(d, cand_type->type, canon_type->type); + if (eq <= 0) + return eq; + vlen = BTF_INFO_VLEN(cand_type->info); + cand_p = (struct btf_param *)(cand_type + 1); + canon_p = (struct btf_param *)(canon_type + 1); + for (i = 0; i < vlen; i++) { + eq = btf_dedup_is_equiv(d, cand_p->type, canon_p->type); + if (eq <= 0) + return eq; + cand_p++; + canon_p++; + } + return 1; + } + + default: + return -EINVAL; + } + return 0; +} + +/* + * Use hypothetical mapping, produced by successful type graph equivalence + * check, to augment existing struct/union canonical mapping, where possible. + * + * If BTF_KIND_FWD resolution is allowed, this mapping is also used to record + * FWD -> STRUCT/UNION correspondence as well. FWD resolution is bidirectional: + * it doesn't matter if FWD type was part of canonical graph or candidate one, + * we are recording the mapping anyway. As opposed to carefulness required + * for struct/union correspondence mapping (described below), for FWD resolution + * it's not important, as by the time that FWD type (reference type) will be + * deduplicated all structs/unions will be deduped already anyway. + * + * Recording STRUCT/UNION mapping is purely a performance optimization and is + * not required for correctness. It needs to be done carefully to ensure that + * struct/union from candidate's type graph is not mapped into corresponding + * struct/union from canonical type graph that itself hasn't been resolved into + * canonical representative. The only guarantee we have is that canonical + * struct/union was determined as canonical and that won't change. But any + * types referenced through that struct/union fields could have been not yet + * resolved, so in case like that it's too early to establish any kind of + * correspondence between structs/unions. + * + * No canonical correspondence is derived for primitive types (they are already + * deduplicated completely already anyway) or reference types (they rely on + * stability of struct/union canonical relationship for equivalence checks). + */ +static void btf_dedup_merge_hypot_map(struct btf_dedup *d) +{ + __u32 cand_type_id, targ_type_id; + __u16 t_kind, c_kind; + __u32 t_id, c_id; + int i; + + for (i = 0; i < d->hypot_cnt; i++) { + cand_type_id = d->hypot_list[i]; + targ_type_id = d->hypot_map[cand_type_id]; + t_id = resolve_type_id(d, targ_type_id); + c_id = resolve_type_id(d, cand_type_id); + t_kind = BTF_INFO_KIND(d->btf->types[t_id]->info); + c_kind = BTF_INFO_KIND(d->btf->types[c_id]->info); + /* + * Resolve FWD into STRUCT/UNION. + * It's ok to resolve FWD into STRUCT/UNION that's not yet + * mapped to canonical representative (as opposed to + * STRUCT/UNION <--> STRUCT/UNION mapping logic below), because + * eventually that struct is going to be mapped and all resolved + * FWDs will automatically resolve to correct canonical + * representative. This will happen before ref type deduping, + * which critically depends on stability of these mapping. This + * stability is not a requirement for STRUCT/UNION equivalence + * checks, though. + */ + if (t_kind != BTF_KIND_FWD && c_kind == BTF_KIND_FWD) + d->map[c_id] = t_id; + else if (t_kind == BTF_KIND_FWD && c_kind != BTF_KIND_FWD) + d->map[t_id] = c_id; + + if ((t_kind == BTF_KIND_STRUCT || t_kind == BTF_KIND_UNION) && + c_kind != BTF_KIND_FWD && + is_type_mapped(d, c_id) && + !is_type_mapped(d, t_id)) { + /* + * as a perf optimization, we can map struct/union + * that's part of type graph we just verified for + * equivalence. We can do that for struct/union that has + * canonical representative only, though. + */ + d->map[t_id] = c_id; + } + } +} + +/* + * Deduplicate struct/union types. + * + * For each struct/union type its type signature hash is calculated, taking + * into account type's name, size, number, order and names of fields, but + * ignoring type ID's referenced from fields, because they might not be deduped + * completely until after reference types deduplication phase. This type hash + * is used to iterate over all potential canonical types, sharing same hash. + * For each canonical candidate we check whether type graphs that they form + * (through referenced types in fields and so on) are equivalent using algorithm + * implemented in `btf_dedup_is_equiv`. If such equivalence is found and + * BTF_KIND_FWD resolution is allowed, then hypothetical mapping + * (btf_dedup->hypot_map) produced by aforementioned type graph equivalence + * algorithm is used to record FWD -> STRUCT/UNION mapping. It's also used to + * potentially map other structs/unions to their canonical representatives, + * if such relationship hasn't yet been established. This speeds up algorithm + * by eliminating some of the duplicate work. + * + * If no matching canonical representative was found, struct/union is marked + * as canonical for itself and is added into btf_dedup->dedup_table hash map + * for further look ups. + */ +static int btf_dedup_struct_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *cand_type, *t; + /* if we don't find equivalent type, then we are canonical */ + __u32 new_id = type_id; + __u16 kind; + __u32 h; + + /* already deduped or is in process of deduping (loop detected) */ + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return 0; + + t = d->btf->types[type_id]; + kind = BTF_INFO_KIND(t->info); + + if (kind != BTF_KIND_STRUCT && kind != BTF_KIND_UNION) + return 0; + + h = btf_hash_struct(t); + for_each_dedup_cand(d, h, cand_node) { + int eq; + + /* + * Even though btf_dedup_is_equiv() checks for + * btf_shallow_equal_struct() internally when checking two + * structs (unions) for equivalence, we need to guard here + * from picking matching FWD type as a dedup candidate. + * This can happen due to hash collision. In such case just + * relying on btf_dedup_is_equiv() would lead to potentially + * creating a loop (FWD -> STRUCT and STRUCT -> FWD), because + * FWD and compatible STRUCT/UNION are considered equivalent. + */ + cand_type = d->btf->types[cand_node->type_id]; + if (!btf_shallow_equal_struct(t, cand_type)) + continue; + + btf_dedup_clear_hypot_map(d); + eq = btf_dedup_is_equiv(d, type_id, cand_node->type_id); + if (eq < 0) + return eq; + if (!eq) + continue; + new_id = cand_node->type_id; + btf_dedup_merge_hypot_map(d); + break; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return 0; +} + +static int btf_dedup_struct_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_struct_type(d, i); + if (err) + return err; + } + return 0; +} + +/* + * Deduplicate reference type. + * + * Once all primitive and struct/union types got deduplicated, we can easily + * deduplicate all other (reference) BTF types. This is done in two steps: + * + * 1. Resolve all referenced type IDs into their canonical type IDs. This + * resolution can be done either immediately for primitive or struct/union types + * (because they were deduped in previous two phases) or recursively for + * reference types. Recursion will always terminate at either primitive or + * struct/union type, at which point we can "unwind" chain of reference types + * one by one. There is no danger of encountering cycles because in C type + * system the only way to form type cycle is through struct/union, so any chain + * of reference types, even those taking part in a type cycle, will inevitably + * reach struct/union at some point. + * + * 2. Once all referenced type IDs are resolved into canonical ones, BTF type + * becomes "stable", in the sense that no further deduplication will cause + * any changes to it. With that, it's now possible to calculate type's signature + * hash (this time taking into account referenced type IDs) and loop over all + * potential canonical representatives. If no match was found, current type + * will become canonical representative of itself and will be added into + * btf_dedup->dedup_table as another possible canonical representative. + */ +static int btf_dedup_ref_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_dedup_node *cand_node; + struct btf_type *t, *cand; + /* if we don't find equivalent type, then we are representative type */ + __u32 new_id = type_id; + int ref_type_id; + __u32 h; + + if (d->map[type_id] == BTF_IN_PROGRESS_ID) + return -ELOOP; + if (d->map[type_id] <= BTF_MAX_NR_TYPES) + return resolve_type_id(d, type_id); + + t = d->btf->types[type_id]; + d->map[type_id] = BTF_IN_PROGRESS_ID; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + h = btf_hash_common(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_common(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + + case BTF_KIND_ARRAY: { + struct btf_array *info = (struct btf_array *)(t + 1); + + ref_type_id = btf_dedup_ref_type(d, info->type); + if (ref_type_id < 0) + return ref_type_id; + info->type = ref_type_id; + + ref_type_id = btf_dedup_ref_type(d, info->index_type); + if (ref_type_id < 0) + return ref_type_id; + info->index_type = ref_type_id; + + h = btf_hash_array(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_array(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param; + __u16 vlen; + int i; + + ref_type_id = btf_dedup_ref_type(d, t->type); + if (ref_type_id < 0) + return ref_type_id; + t->type = ref_type_id; + + vlen = BTF_INFO_VLEN(t->info); + param = (struct btf_param *)(t + 1); + for (i = 0; i < vlen; i++) { + ref_type_id = btf_dedup_ref_type(d, param->type); + if (ref_type_id < 0) + return ref_type_id; + param->type = ref_type_id; + param++; + } + + h = btf_hash_fnproto(t); + for_each_dedup_cand(d, h, cand_node) { + cand = d->btf->types[cand_node->type_id]; + if (btf_equal_fnproto(t, cand)) { + new_id = cand_node->type_id; + break; + } + } + break; + } + + default: + return -EINVAL; + } + + d->map[type_id] = new_id; + if (type_id == new_id && btf_dedup_table_add(d, h, type_id)) + return -ENOMEM; + + return new_id; +} + +static int btf_dedup_ref_types(struct btf_dedup *d) +{ + int i, err; + + for (i = 1; i <= d->btf->nr_types; i++) { + err = btf_dedup_ref_type(d, i); + if (err < 0) + return err; + } + btf_dedup_table_free(d); + return 0; +} + +/* + * Compact types. + * + * After we established for each type its corresponding canonical representative + * type, we now can eliminate types that are not canonical and leave only + * canonical ones layed out sequentially in memory by copying them over + * duplicates. During compaction btf_dedup->hypot_map array is reused to store + * a map from original type ID to a new compacted type ID, which will be used + * during next phase to "fix up" type IDs, referenced from struct/union and + * reference types. + */ +static int btf_dedup_compact_types(struct btf_dedup *d) +{ + struct btf_type **new_types; + __u32 next_type_id = 1; + char *types_start, *p; + int i, len; + + /* we are going to reuse hypot_map to store compaction remapping */ + d->hypot_map[0] = 0; + for (i = 1; i <= d->btf->nr_types; i++) + d->hypot_map[i] = BTF_UNPROCESSED_ID; + + types_start = d->btf->nohdr_data + d->btf->hdr->type_off; + p = types_start; + + for (i = 1; i <= d->btf->nr_types; i++) { + if (d->map[i] != i) + continue; + + len = btf_type_size(d->btf->types[i]); + if (len < 0) + return len; + + memmove(p, d->btf->types[i], len); + d->hypot_map[i] = next_type_id; + d->btf->types[next_type_id] = (struct btf_type *)p; + p += len; + next_type_id++; + } + + /* shrink struct btf's internal types index and update btf_header */ + d->btf->nr_types = next_type_id - 1; + d->btf->types_size = d->btf->nr_types; + d->btf->hdr->type_len = p - types_start; + new_types = realloc(d->btf->types, + (1 + d->btf->nr_types) * sizeof(struct btf_type *)); + if (!new_types) + return -ENOMEM; + d->btf->types = new_types; + + /* make sure string section follows type information without gaps */ + d->btf->hdr->str_off = p - (char *)d->btf->nohdr_data; + memmove(p, d->btf->strings, d->btf->hdr->str_len); + d->btf->strings = p; + p += d->btf->hdr->str_len; + + d->btf->data_size = p - (char *)d->btf->data; + return 0; +} + +/* + * Figure out final (deduplicated and compacted) type ID for provided original + * `type_id` by first resolving it into corresponding canonical type ID and + * then mapping it to a deduplicated type ID, stored in btf_dedup->hypot_map, + * which is populated during compaction phase. + */ +static int btf_dedup_remap_type_id(struct btf_dedup *d, __u32 type_id) +{ + __u32 resolved_type_id, new_type_id; + + resolved_type_id = resolve_type_id(d, type_id); + new_type_id = d->hypot_map[resolved_type_id]; + if (new_type_id > BTF_MAX_NR_TYPES) + return -EINVAL; + return new_type_id; +} + +/* + * Remap referenced type IDs into deduped type IDs. + * + * After BTF types are deduplicated and compacted, their final type IDs may + * differ from original ones. The map from original to a corresponding + * deduped type ID is stored in btf_dedup->hypot_map and is populated during + * compaction phase. During remapping phase we are rewriting all type IDs + * referenced from any BTF type (e.g., struct fields, func proto args, etc) to + * their final deduped type IDs. + */ +static int btf_dedup_remap_type(struct btf_dedup *d, __u32 type_id) +{ + struct btf_type *t = d->btf->types[type_id]; + int i, r; + + switch (BTF_INFO_KIND(t->info)) { + case BTF_KIND_INT: + case BTF_KIND_ENUM: + break; + + case BTF_KIND_FWD: + case BTF_KIND_CONST: + case BTF_KIND_VOLATILE: + case BTF_KIND_RESTRICT: + case BTF_KIND_PTR: + case BTF_KIND_TYPEDEF: + case BTF_KIND_FUNC: + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + break; + + case BTF_KIND_ARRAY: { + struct btf_array *arr_info = (struct btf_array *)(t + 1); + + r = btf_dedup_remap_type_id(d, arr_info->type); + if (r < 0) + return r; + arr_info->type = r; + r = btf_dedup_remap_type_id(d, arr_info->index_type); + if (r < 0) + return r; + arr_info->index_type = r; + break; + } + + case BTF_KIND_STRUCT: + case BTF_KIND_UNION: { + struct btf_member *member = (struct btf_member *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, member->type); + if (r < 0) + return r; + member->type = r; + member++; + } + break; + } + + case BTF_KIND_FUNC_PROTO: { + struct btf_param *param = (struct btf_param *)(t + 1); + __u16 vlen = BTF_INFO_VLEN(t->info); + + r = btf_dedup_remap_type_id(d, t->type); + if (r < 0) + return r; + t->type = r; + + for (i = 0; i < vlen; i++) { + r = btf_dedup_remap_type_id(d, param->type); + if (r < 0) + return r; + param->type = r; + param++; + } + break; + } + + default: + return -EINVAL; + } + + return 0; +} + +static int btf_dedup_remap_types(struct btf_dedup *d) +{ + int i, r; + + for (i = 1; i <= d->btf->nr_types; i++) { + r = btf_dedup_remap_type(d, i); + if (r < 0) + return r; + } + return 0; +} diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index b0610dcdae6b..28a1e1e59861 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -55,33 +55,47 @@ struct btf_ext_header { __u32 line_info_len; }; -typedef int (*btf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); - LIBBPF_API void btf__free(struct btf *btf); -LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size, btf_print_fn_t err_log); +LIBBPF_API struct btf *btf__new(__u8 *data, __u32 size); +LIBBPF_API int btf__load(struct btf *btf); LIBBPF_API __s32 btf__find_by_name(const struct btf *btf, const char *type_name); +LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf); LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf, __u32 id); LIBBPF_API __s64 btf__resolve_size(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__resolve_type(const struct btf *btf, __u32 type_id); LIBBPF_API int btf__fd(const struct btf *btf); +LIBBPF_API const void *btf__get_raw_data(const struct btf *btf, __u32 *size); LIBBPF_API const char *btf__name_by_offset(const struct btf *btf, __u32 offset); LIBBPF_API int btf__get_from_id(__u32 id, struct btf **btf); +LIBBPF_API int btf__get_map_kv_tids(const struct btf *btf, const char *map_name, + __u32 expected_key_size, + __u32 expected_value_size, + __u32 *key_type_id, __u32 *value_type_id); + +LIBBPF_API struct btf_ext *btf_ext__new(__u8 *data, __u32 size); +LIBBPF_API void btf_ext__free(struct btf_ext *btf_ext); +LIBBPF_API const void *btf_ext__get_raw_data(const struct btf_ext *btf_ext, + __u32 *size); +LIBBPF_API int btf_ext__reloc_func_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **func_info, __u32 *cnt); +LIBBPF_API int btf_ext__reloc_line_info(const struct btf *btf, + const struct btf_ext *btf_ext, + const char *sec_name, __u32 insns_cnt, + void **line_info, __u32 *cnt); +LIBBPF_API __u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API __u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); + +struct btf_dedup_opts { + unsigned int dedup_table_size; + bool dont_resolve_fwds; +}; -struct btf_ext *btf_ext__new(__u8 *data, __u32 size, btf_print_fn_t err_log); -void btf_ext__free(struct btf_ext *btf_ext); -int btf_ext__reloc_func_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **func_info, __u32 *func_info_len); -int btf_ext__reloc_line_info(const struct btf *btf, - const struct btf_ext *btf_ext, - const char *sec_name, __u32 insns_cnt, - void **line_info, __u32 *cnt); -__u32 btf_ext__func_info_rec_size(const struct btf_ext *btf_ext); -__u32 btf_ext__line_info_rec_size(const struct btf_ext *btf_ext); +LIBBPF_API int btf__dedup(struct btf *btf, struct btf_ext *btf_ext, + const struct btf_dedup_opts *opts); #ifdef __cplusplus } /* extern "C" */ diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index 169e347c76f6..11c25d9ea431 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -42,6 +42,7 @@ #include "bpf.h" #include "btf.h" #include "str_error.h" +#include "libbpf_util.h" #ifndef EM_BPF #define EM_BPF 247 @@ -53,39 +54,33 @@ #define __printf(a, b) __attribute__((format(printf, a, b))) -__printf(1, 2) -static int __base_pr(const char *format, ...) +static int __base_pr(enum libbpf_print_level level, const char *format, + va_list args) { - va_list args; - int err; + if (level == LIBBPF_DEBUG) + return 0; - va_start(args, format); - err = vfprintf(stderr, format, args); - va_end(args); - return err; + return vfprintf(stderr, format, args); } -static __printf(1, 2) libbpf_print_fn_t __pr_warning = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_info = __base_pr; -static __printf(1, 2) libbpf_print_fn_t __pr_debug; - -#define __pr(func, fmt, ...) \ -do { \ - if ((func)) \ - (func)("libbpf: " fmt, ##__VA_ARGS__); \ -} while (0) +static libbpf_print_fn_t __libbpf_pr = __base_pr; -#define pr_warning(fmt, ...) __pr(__pr_warning, fmt, ##__VA_ARGS__) -#define pr_info(fmt, ...) __pr(__pr_info, fmt, ##__VA_ARGS__) -#define pr_debug(fmt, ...) __pr(__pr_debug, fmt, ##__VA_ARGS__) +void libbpf_set_print(libbpf_print_fn_t fn) +{ + __libbpf_pr = fn; +} -void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug) +__printf(2, 3) +void libbpf_print(enum libbpf_print_level level, const char *format, ...) { - __pr_warning = warn; - __pr_info = info; - __pr_debug = debug; + va_list args; + + if (!__libbpf_pr) + return; + + va_start(args, format); + __libbpf_pr(level, format, args); + va_end(args); } #define STRERR_BUFSIZE 128 @@ -117,6 +112,11 @@ void libbpf_set_print(libbpf_print_fn_t warn, # define LIBBPF_ELF_C_READ_MMAP ELF_C_READ #endif +static inline __u64 ptr_to_u64(const void *ptr) +{ + return (__u64) (unsigned long) ptr; +} + struct bpf_capabilities { /* v4.14: kernel support for program & map names. */ __u32 name:1; @@ -312,7 +312,7 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx, return -EINVAL; } - bzero(prog, sizeof(*prog)); + memset(prog, 0, sizeof(*prog)); prog->section_name = strdup(section_name); if (!prog->section_name) { @@ -627,7 +627,7 @@ bpf_object__init_maps(struct bpf_object *obj, int flags) bool strict = !(flags & MAPS_RELAX_COMPAT); int i, map_idx, map_def_sz, nr_maps = 0; Elf_Scn *scn; - Elf_Data *data; + Elf_Data *data = NULL; Elf_Data *symbols = obj->efile.symbols; if (obj->efile.maps_shndx < 0) @@ -839,12 +839,20 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) else if (strcmp(name, "maps") == 0) obj->efile.maps_shndx = idx; else if (strcmp(name, BTF_ELF_SEC) == 0) { - obj->btf = btf__new(data->d_buf, data->d_size, - __pr_debug); + obj->btf = btf__new(data->d_buf, data->d_size); if (IS_ERR(obj->btf)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_ELF_SEC, PTR_ERR(obj->btf)); obj->btf = NULL; + continue; + } + err = btf__load(obj->btf); + if (err) { + pr_warning("Error loading %s into kernel: %d. Ignored and continue.\n", + BTF_ELF_SEC, err); + btf__free(obj->btf); + obj->btf = NULL; + err = 0; } } else if (strcmp(name, BTF_EXT_ELF_SEC) == 0) { btf_ext_data = data; @@ -915,8 +923,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags) BTF_EXT_ELF_SEC, BTF_ELF_SEC); } else { obj->btf_ext = btf_ext__new(btf_ext_data->d_buf, - btf_ext_data->d_size, - __pr_debug); + btf_ext_data->d_size); if (IS_ERR(obj->btf_ext)) { pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", BTF_EXT_ELF_SEC, @@ -1057,72 +1064,18 @@ bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr, static int bpf_map_find_btf_info(struct bpf_map *map, const struct btf *btf) { - const struct btf_type *container_type; - const struct btf_member *key, *value; struct bpf_map_def *def = &map->def; - const size_t max_name = 256; - char container_name[max_name]; - __s64 key_size, value_size; - __s32 container_id; - - if (snprintf(container_name, max_name, "____btf_map_%s", map->name) == - max_name) { - pr_warning("map:%s length of '____btf_map_%s' is too long\n", - map->name, map->name); - return -EINVAL; - } - - container_id = btf__find_by_name(btf, container_name); - if (container_id < 0) { - pr_debug("map:%s container_name:%s cannot be found in BTF. Missing BPF_ANNOTATE_KV_PAIR?\n", - map->name, container_name); - return container_id; - } - - container_type = btf__type_by_id(btf, container_id); - if (!container_type) { - pr_warning("map:%s cannot find BTF type for container_id:%u\n", - map->name, container_id); - return -EINVAL; - } - - if (BTF_INFO_KIND(container_type->info) != BTF_KIND_STRUCT || - BTF_INFO_VLEN(container_type->info) < 2) { - pr_warning("map:%s container_name:%s is an invalid container struct\n", - map->name, container_name); - return -EINVAL; - } - - key = (struct btf_member *)(container_type + 1); - value = key + 1; - - key_size = btf__resolve_size(btf, key->type); - if (key_size < 0) { - pr_warning("map:%s invalid BTF key_type_size\n", - map->name); - return key_size; - } - - if (def->key_size != key_size) { - pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", - map->name, (__u32)key_size, def->key_size); - return -EINVAL; - } - - value_size = btf__resolve_size(btf, value->type); - if (value_size < 0) { - pr_warning("map:%s invalid BTF value_type_size\n", map->name); - return value_size; - } + __u32 key_type_id, value_type_id; + int ret; - if (def->value_size != value_size) { - pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", - map->name, (__u32)value_size, def->value_size); - return -EINVAL; - } + ret = btf__get_map_kv_tids(btf, map->name, def->key_size, + def->value_size, &key_type_id, + &value_type_id); + if (ret) + return ret; - map->btf_key_type_id = key->type; - map->btf_value_type_id = value->type; + map->btf_key_type_id = key_type_id; + map->btf_value_type_id = value_type_id; return 0; } @@ -1174,6 +1127,20 @@ err_free_new_name: return -errno; } +int bpf_map__resize(struct bpf_map *map, __u32 max_entries) +{ + if (!map || !max_entries) + return -EINVAL; + + /* If map already created, its attributes can't be changed. */ + if (map->fd >= 0) + return -EBUSY; + + map->def.max_entries = max_entries; + + return 0; +} + static int bpf_object__probe_name(struct bpf_object *obj) { @@ -1637,7 +1604,7 @@ bpf_program__load(struct bpf_program *prog, struct bpf_prog_prep_result result; bpf_program_prep_t preprocessor = prog->preprocessor; - bzero(&result, sizeof(result)); + memset(&result, 0, sizeof(result)); err = preprocessor(prog, i, prog->insns, prog->insns_cnt, &result); if (err) { @@ -2147,7 +2114,7 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path) if (err) return err; - bpf_map__for_each(map, obj) { + bpf_object__for_each_map(map, obj) { char buf[PATH_MAX]; int len; @@ -2194,7 +2161,7 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path) if (!obj) return -ENOENT; - bpf_map__for_each(map, obj) { + bpf_object__for_each_map(map, obj) { char buf[PATH_MAX]; int len; @@ -2378,6 +2345,11 @@ unsigned int bpf_object__kversion(struct bpf_object *obj) return obj ? obj->kern_version : 0; } +struct btf *bpf_object__btf(struct bpf_object *obj) +{ + return obj ? obj->btf : NULL; +} + int bpf_object__btf_fd(const struct bpf_object *obj) { return obj->btf ? btf__fd(obj->btf) : -1; @@ -2667,9 +2639,38 @@ static const struct { #undef BPF_EAPROG_SEC #undef BPF_APROG_COMPAT +#define MAX_TYPE_NAME_SIZE 32 + +static char *libbpf_get_type_names(bool attach_type) +{ + int i, len = ARRAY_SIZE(section_names) * MAX_TYPE_NAME_SIZE; + char *buf; + + buf = malloc(len); + if (!buf) + return NULL; + + buf[0] = '\0'; + /* Forge string buf with all available names */ + for (i = 0; i < ARRAY_SIZE(section_names); i++) { + if (attach_type && !section_names[i].is_attachable) + continue; + + if (strlen(buf) + strlen(section_names[i].sec) + 2 > len) { + free(buf); + return NULL; + } + strcat(buf, " "); + strcat(buf, section_names[i].sec); + } + + return buf; +} + int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, enum bpf_attach_type *expected_attach_type) { + char *type_names; int i; if (!name) @@ -2682,12 +2683,20 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type, *expected_attach_type = section_names[i].expected_attach_type; return 0; } + pr_warning("failed to guess program type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(false); + if (type_names != NULL) { + pr_info("supported section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } int libbpf_attach_type_by_name(const char *name, enum bpf_attach_type *attach_type) { + char *type_names; int i; if (!name) @@ -2701,6 +2710,13 @@ int libbpf_attach_type_by_name(const char *name, *attach_type = section_names[i].attach_type; return 0; } + pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); + type_names = libbpf_get_type_names(true); + if (type_names != NULL) { + pr_info("attachable section(type) names are:%s\n", type_names); + free(type_names); + } + return -EINVAL; } @@ -2833,13 +2849,19 @@ bpf_object__find_map_by_name(struct bpf_object *obj, const char *name) { struct bpf_map *pos; - bpf_map__for_each(pos, obj) { + bpf_object__for_each_map(pos, obj) { if (pos->name && !strcmp(pos->name, name)) return pos; } return NULL; } +int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name) +{ + return bpf_map__fd(bpf_object__find_map_by_name(obj, name)); +} + struct bpf_map * bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset) { @@ -2907,8 +2929,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, err = bpf_program__identify_section(prog, &prog_type, &expected_attach_type); if (err < 0) { - pr_warning("failed to guess program type based on section name %s\n", - prog->section_name); bpf_object__close(obj); return -EINVAL; } @@ -2922,7 +2942,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr, first_prog = prog; } - bpf_map__for_each(map, obj) { + bpf_object__for_each_map(map, obj) { if (!bpf_map__is_offload_neutral(map)) map->map_ifindex = attr->ifindex; } @@ -2991,3 +3011,249 @@ bpf_perf_event_read_simple(void *mmap_mem, size_t mmap_size, size_t page_size, ring_buffer_write_tail(header, data_tail); return ret; } + +struct bpf_prog_info_array_desc { + int array_offset; /* e.g. offset of jited_prog_insns */ + int count_offset; /* e.g. offset of jited_prog_len */ + int size_offset; /* > 0: offset of rec size, + * < 0: fix size of -size_offset + */ +}; + +static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = { + [BPF_PROG_INFO_JITED_INSNS] = { + offsetof(struct bpf_prog_info, jited_prog_insns), + offsetof(struct bpf_prog_info, jited_prog_len), + -1, + }, + [BPF_PROG_INFO_XLATED_INSNS] = { + offsetof(struct bpf_prog_info, xlated_prog_insns), + offsetof(struct bpf_prog_info, xlated_prog_len), + -1, + }, + [BPF_PROG_INFO_MAP_IDS] = { + offsetof(struct bpf_prog_info, map_ids), + offsetof(struct bpf_prog_info, nr_map_ids), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_JITED_KSYMS] = { + offsetof(struct bpf_prog_info, jited_ksyms), + offsetof(struct bpf_prog_info, nr_jited_ksyms), + -(int)sizeof(__u64), + }, + [BPF_PROG_INFO_JITED_FUNC_LENS] = { + offsetof(struct bpf_prog_info, jited_func_lens), + offsetof(struct bpf_prog_info, nr_jited_func_lens), + -(int)sizeof(__u32), + }, + [BPF_PROG_INFO_FUNC_INFO] = { + offsetof(struct bpf_prog_info, func_info), + offsetof(struct bpf_prog_info, nr_func_info), + offsetof(struct bpf_prog_info, func_info_rec_size), + }, + [BPF_PROG_INFO_LINE_INFO] = { + offsetof(struct bpf_prog_info, line_info), + offsetof(struct bpf_prog_info, nr_line_info), + offsetof(struct bpf_prog_info, line_info_rec_size), + }, + [BPF_PROG_INFO_JITED_LINE_INFO] = { + offsetof(struct bpf_prog_info, jited_line_info), + offsetof(struct bpf_prog_info, nr_jited_line_info), + offsetof(struct bpf_prog_info, jited_line_info_rec_size), + }, + [BPF_PROG_INFO_PROG_TAGS] = { + offsetof(struct bpf_prog_info, prog_tags), + offsetof(struct bpf_prog_info, nr_prog_tags), + -(int)sizeof(__u8) * BPF_TAG_SIZE, + }, + +}; + +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u32)]; + return -(int)offset; +} + +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + return array[offset / sizeof(__u64)]; + return -(int)offset; +} + +static void bpf_prog_info_set_offset_u32(struct bpf_prog_info *info, int offset, + __u32 val) +{ + __u32 *array = (__u32 *)info; + + if (offset >= 0) + array[offset / sizeof(__u32)] = val; +} + +static void bpf_prog_info_set_offset_u64(struct bpf_prog_info *info, int offset, + __u64 val) +{ + __u64 *array = (__u64 *)info; + + if (offset >= 0) + array[offset / sizeof(__u64)] = val; +} + +struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays) +{ + struct bpf_prog_info_linear *info_linear; + struct bpf_prog_info info = {}; + __u32 info_len = sizeof(info); + __u32 data_len = 0; + int i, err; + void *ptr; + + if (arrays >> BPF_PROG_INFO_LAST_ARRAY) + return ERR_PTR(-EINVAL); + + /* step 1: get array dimensions */ + err = bpf_obj_get_info_by_fd(fd, &info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + return ERR_PTR(-EFAULT); + } + + /* step 2: calculate total size of all arrays */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + bool include_array = (arrays & (1UL << i)) > 0; + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + desc = bpf_prog_info_array_desc + i; + + /* kernel is too old to support this field */ + if (info_len < desc->array_offset + sizeof(__u32) || + info_len < desc->count_offset + sizeof(__u32) || + (desc->size_offset > 0 && info_len < desc->size_offset)) + include_array = false; + + if (!include_array) { + arrays &= ~(1UL << i); /* clear the bit */ + continue; + } + + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + + data_len += count * size; + } + + /* step 3: allocate continuous memory */ + data_len = roundup(data_len, sizeof(__u64)); + info_linear = malloc(sizeof(struct bpf_prog_info_linear) + data_len); + if (!info_linear) + return ERR_PTR(-ENOMEM); + + /* step 4: fill data to info_linear->info */ + info_linear->arrays = arrays; + memset(&info_linear->info, 0, sizeof(info)); + ptr = info_linear->data; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 count, size; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + count = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + size = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->count_offset, count); + bpf_prog_info_set_offset_u32(&info_linear->info, + desc->size_offset, size); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, + ptr_to_u64(ptr)); + ptr += count * size; + } + + /* step 5: call syscall again to get required arrays */ + err = bpf_obj_get_info_by_fd(fd, &info_linear->info, &info_len); + if (err) { + pr_debug("can't get prog info: %s", strerror(errno)); + free(info_linear); + return ERR_PTR(-EFAULT); + } + + /* step 6: verify the data */ + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u32 v1, v2; + + if ((arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + v1 = bpf_prog_info_read_offset_u32(&info, desc->count_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->count_offset); + if (v1 != v2) + pr_warning("%s: mismatch in element count\n", __func__); + + v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset); + v2 = bpf_prog_info_read_offset_u32(&info_linear->info, + desc->size_offset); + if (v1 != v2) + pr_warning("%s: mismatch in rec size\n", __func__); + } + + /* step 7: update info_len and data_len */ + info_linear->info_len = sizeof(struct bpf_prog_info); + info_linear->data_len = data_len; + + return info_linear; +} + +void bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + addr = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + offs = addr - ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, offs); + } +} + +void bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear) +{ + int i; + + for (i = BPF_PROG_INFO_FIRST_ARRAY; i < BPF_PROG_INFO_LAST_ARRAY; ++i) { + struct bpf_prog_info_array_desc *desc; + __u64 addr, offs; + + if ((info_linear->arrays & (1UL << i)) == 0) + continue; + + desc = bpf_prog_info_array_desc + i; + offs = bpf_prog_info_read_offset_u64(&info_linear->info, + desc->array_offset); + addr = offs + ptr_to_u64(info_linear->data); + bpf_prog_info_set_offset_u64(&info_linear->info, + desc->array_offset, addr); + } +} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index 5f68d7b75215..c70785cc8ef5 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -10,6 +10,7 @@ #ifndef __LIBBPF_LIBBPF_H #define __LIBBPF_LIBBPF_H +#include <stdarg.h> #include <stdio.h> #include <stdint.h> #include <stdbool.h> @@ -47,17 +48,16 @@ enum libbpf_errno { LIBBPF_API int libbpf_strerror(int err, char *buf, size_t size); -/* - * __printf is defined in include/linux/compiler-gcc.h. However, - * it would be better if libbpf.h didn't depend on Linux header files. - * So instead of __printf, here we use gcc attribute directly. - */ -typedef int (*libbpf_print_fn_t)(const char *, ...) - __attribute__((format(printf, 1, 2))); +enum libbpf_print_level { + LIBBPF_WARN, + LIBBPF_INFO, + LIBBPF_DEBUG, +}; + +typedef int (*libbpf_print_fn_t)(enum libbpf_print_level level, + const char *, va_list ap); -LIBBPF_API void libbpf_set_print(libbpf_print_fn_t warn, - libbpf_print_fn_t info, - libbpf_print_fn_t debug); +LIBBPF_API void libbpf_set_print(libbpf_print_fn_t fn); /* Hide internal to user */ struct bpf_object; @@ -90,6 +90,9 @@ LIBBPF_API int bpf_object__load(struct bpf_object *obj); LIBBPF_API int bpf_object__unload(struct bpf_object *obj); LIBBPF_API const char *bpf_object__name(struct bpf_object *obj); LIBBPF_API unsigned int bpf_object__kversion(struct bpf_object *obj); + +struct btf; +LIBBPF_API struct btf *bpf_object__btf(struct bpf_object *obj); LIBBPF_API int bpf_object__btf_fd(const struct bpf_object *obj); LIBBPF_API struct bpf_program * @@ -264,6 +267,9 @@ struct bpf_map; LIBBPF_API struct bpf_map * bpf_object__find_map_by_name(struct bpf_object *obj, const char *name); +LIBBPF_API int +bpf_object__find_map_fd_by_name(struct bpf_object *obj, const char *name); + /* * Get bpf_map through the offset of corresponding struct bpf_map_def * in the BPF object file. @@ -273,10 +279,11 @@ bpf_object__find_map_by_offset(struct bpf_object *obj, size_t offset); LIBBPF_API struct bpf_map * bpf_map__next(struct bpf_map *map, struct bpf_object *obj); -#define bpf_map__for_each(pos, obj) \ +#define bpf_object__for_each_map(pos, obj) \ for ((pos) = bpf_map__next(NULL, (obj)); \ (pos) != NULL; \ (pos) = bpf_map__next((pos), (obj))) +#define bpf_map__for_each bpf_object__for_each_map LIBBPF_API struct bpf_map * bpf_map__prev(struct bpf_map *map, struct bpf_object *obj); @@ -292,6 +299,7 @@ LIBBPF_API int bpf_map__set_priv(struct bpf_map *map, void *priv, bpf_map_clear_priv_t clear_priv); LIBBPF_API void *bpf_map__priv(struct bpf_map *map); LIBBPF_API int bpf_map__reuse_fd(struct bpf_map *map, int fd); +LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries); LIBBPF_API bool bpf_map__is_offload_neutral(struct bpf_map *map); LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path); @@ -314,6 +322,7 @@ LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type, struct bpf_object **pobj, int *prog_fd); LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); enum bpf_perf_event_ret { LIBBPF_PERF_EVENT_DONE = 0, @@ -355,6 +364,83 @@ LIBBPF_API const struct bpf_line_info * bpf_prog_linfo__lfind(const struct bpf_prog_linfo *prog_linfo, __u32 insn_off, __u32 nr_skip); +/* + * Probe for supported system features + * + * Note that running many of these probes in a short amount of time can cause + * the kernel to reach the maximal size of lockable memory allowed for the + * user, causing subsequent probes to fail. In this case, the caller may want + * to adjust that limit with setrlimit(). + */ +LIBBPF_API bool bpf_probe_prog_type(enum bpf_prog_type prog_type, + __u32 ifindex); +LIBBPF_API bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex); +LIBBPF_API bool bpf_probe_helper(enum bpf_func_id id, + enum bpf_prog_type prog_type, __u32 ifindex); + +/* + * Get bpf_prog_info in continuous memory + * + * struct bpf_prog_info has multiple arrays. The user has option to choose + * arrays to fetch from kernel. The following APIs provide an uniform way to + * fetch these data. All arrays in bpf_prog_info are stored in a single + * continuous memory region. This makes it easy to store the info in a + * file. + * + * Before writing bpf_prog_info_linear to files, it is necessary to + * translate pointers in bpf_prog_info to offsets. Helper functions + * bpf_program__bpil_addr_to_offs() and bpf_program__bpil_offs_to_addr() + * are introduced to switch between pointers and offsets. + * + * Examples: + * # To fetch map_ids and prog_tags: + * __u64 arrays = (1UL << BPF_PROG_INFO_MAP_IDS) | + * (1UL << BPF_PROG_INFO_PROG_TAGS); + * struct bpf_prog_info_linear *info_linear = + * bpf_program__get_prog_info_linear(fd, arrays); + * + * # To save data in file + * bpf_program__bpil_addr_to_offs(info_linear); + * write(f, info_linear, sizeof(*info_linear) + info_linear->data_len); + * + * # To read data from file + * read(f, info_linear, <proper_size>); + * bpf_program__bpil_offs_to_addr(info_linear); + */ +enum bpf_prog_info_array { + BPF_PROG_INFO_FIRST_ARRAY = 0, + BPF_PROG_INFO_JITED_INSNS = 0, + BPF_PROG_INFO_XLATED_INSNS, + BPF_PROG_INFO_MAP_IDS, + BPF_PROG_INFO_JITED_KSYMS, + BPF_PROG_INFO_JITED_FUNC_LENS, + BPF_PROG_INFO_FUNC_INFO, + BPF_PROG_INFO_LINE_INFO, + BPF_PROG_INFO_JITED_LINE_INFO, + BPF_PROG_INFO_PROG_TAGS, + BPF_PROG_INFO_LAST_ARRAY, +}; + +struct bpf_prog_info_linear { + /* size of struct bpf_prog_info, when the tool is compiled */ + __u32 info_len; + /* total bytes allocated for data, round up to 8 bytes */ + __u32 data_len; + /* which arrays are included in data */ + __u64 arrays; + struct bpf_prog_info info; + __u8 data[]; +}; + +LIBBPF_API struct bpf_prog_info_linear * +bpf_program__get_prog_info_linear(int fd, __u64 arrays); + +LIBBPF_API void +bpf_program__bpil_addr_to_offs(struct bpf_prog_info_linear *info_linear); + +LIBBPF_API void +bpf_program__bpil_offs_to_addr(struct bpf_prog_info_linear *info_linear); + #ifdef __cplusplus } /* extern "C" */ #endif diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index cd02cd4e2cc3..f3ce50500cf2 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -124,3 +124,36 @@ LIBBPF_0.0.1 { local: *; }; + +LIBBPF_0.0.2 { + global: + bpf_probe_helper; + bpf_probe_map_type; + bpf_probe_prog_type; + bpf_map__resize; + bpf_map_lookup_elem_flags; + bpf_object__btf; + bpf_object__find_map_fd_by_name; + bpf_get_link_xdp_id; + btf__dedup; + btf__get_map_kv_tids; + btf__get_nr_types; + btf__get_raw_data; + btf__load; + btf_ext__free; + btf_ext__func_info_rec_size; + btf_ext__get_raw_data; + btf_ext__line_info_rec_size; + btf_ext__new; + btf_ext__reloc_func_info; + btf_ext__reloc_line_info; + xsk_umem__create; + xsk_socket__create; + xsk_umem__delete; + xsk_socket__delete; + xsk_umem__fd; + xsk_socket__fd; + bpf_program__get_prog_info_linear; + bpf_program__bpil_addr_to_offs; + bpf_program__bpil_offs_to_addr; +} LIBBPF_0.0.1; diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c new file mode 100644 index 000000000000..8c3a1c04dcb2 --- /dev/null +++ b/tools/lib/bpf/libbpf_probes.c @@ -0,0 +1,242 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) +/* Copyright (c) 2019 Netronome Systems, Inc. */ + +#include <errno.h> +#include <fcntl.h> +#include <string.h> +#include <stdlib.h> +#include <unistd.h> +#include <net/if.h> +#include <sys/utsname.h> + +#include <linux/filter.h> +#include <linux/kernel.h> + +#include "bpf.h" +#include "libbpf.h" + +static bool grep(const char *buffer, const char *pattern) +{ + return !!strstr(buffer, pattern); +} + +static int get_vendor_id(int ifindex) +{ + char ifname[IF_NAMESIZE], path[64], buf[8]; + ssize_t len; + int fd; + + if (!if_indextoname(ifindex, ifname)) + return -1; + + snprintf(path, sizeof(path), "/sys/class/net/%s/device/vendor", ifname); + + fd = open(path, O_RDONLY); + if (fd < 0) + return -1; + + len = read(fd, buf, sizeof(buf)); + close(fd); + if (len < 0) + return -1; + if (len >= (ssize_t)sizeof(buf)) + return -1; + buf[len] = '\0'; + + return strtol(buf, NULL, 0); +} + +static int get_kernel_version(void) +{ + int version, subversion, patchlevel; + struct utsname utsn; + + /* Return 0 on failure, and attempt to probe with empty kversion */ + if (uname(&utsn)) + return 0; + + if (sscanf(utsn.release, "%d.%d.%d", + &version, &subversion, &patchlevel) != 3) + return 0; + + return (version << 16) + (subversion << 8) + patchlevel; +} + +static void +probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns, + size_t insns_cnt, char *buf, size_t buf_len, __u32 ifindex) +{ + struct bpf_load_program_attr xattr = {}; + int fd; + + switch (prog_type) { + case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: + xattr.expected_attach_type = BPF_CGROUP_INET4_CONNECT; + break; + case BPF_PROG_TYPE_KPROBE: + xattr.kern_version = get_kernel_version(); + break; + case BPF_PROG_TYPE_UNSPEC: + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_TRACEPOINT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_PERF_EVENT: + case BPF_PROG_TYPE_CGROUP_SKB: + case BPF_PROG_TYPE_CGROUP_SOCK: + case BPF_PROG_TYPE_LWT_IN: + case BPF_PROG_TYPE_LWT_OUT: + case BPF_PROG_TYPE_LWT_XMIT: + case BPF_PROG_TYPE_SOCK_OPS: + case BPF_PROG_TYPE_SK_SKB: + case BPF_PROG_TYPE_CGROUP_DEVICE: + case BPF_PROG_TYPE_SK_MSG: + case BPF_PROG_TYPE_RAW_TRACEPOINT: + case BPF_PROG_TYPE_LWT_SEG6LOCAL: + case BPF_PROG_TYPE_LIRC_MODE2: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + default: + break; + } + + xattr.prog_type = prog_type; + xattr.insns = insns; + xattr.insns_cnt = insns_cnt; + xattr.license = "GPL"; + xattr.prog_ifindex = ifindex; + + fd = bpf_load_program_xattr(&xattr, buf, buf_len); + if (fd >= 0) + close(fd); +} + +bool bpf_probe_prog_type(enum bpf_prog_type prog_type, __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_MOV64_IMM(BPF_REG_0, 0), + BPF_EXIT_INSN() + }; + + if (ifindex && prog_type == BPF_PROG_TYPE_SCHED_CLS) + /* nfp returns -EINVAL on exit(0) with TC offload */ + insns[0].imm = 2; + + errno = 0; + probe_load(prog_type, insns, ARRAY_SIZE(insns), NULL, 0, ifindex); + + return errno != EINVAL && errno != EOPNOTSUPP; +} + +bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex) +{ + int key_size, value_size, max_entries, map_flags; + struct bpf_create_map_attr attr = {}; + int fd = -1, fd_inner; + + key_size = sizeof(__u32); + value_size = sizeof(__u32); + max_entries = 1; + map_flags = 0; + + switch (map_type) { + case BPF_MAP_TYPE_STACK_TRACE: + value_size = sizeof(__u64); + break; + case BPF_MAP_TYPE_LPM_TRIE: + key_size = sizeof(__u64); + value_size = sizeof(__u64); + map_flags = BPF_F_NO_PREALLOC; + break; + case BPF_MAP_TYPE_CGROUP_STORAGE: + case BPF_MAP_TYPE_PERCPU_CGROUP_STORAGE: + key_size = sizeof(struct bpf_cgroup_storage_key); + value_size = sizeof(__u64); + max_entries = 0; + break; + case BPF_MAP_TYPE_QUEUE: + case BPF_MAP_TYPE_STACK: + key_size = 0; + break; + case BPF_MAP_TYPE_UNSPEC: + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_ARRAY: + case BPF_MAP_TYPE_PROG_ARRAY: + case BPF_MAP_TYPE_PERF_EVENT_ARRAY: + case BPF_MAP_TYPE_PERCPU_HASH: + case BPF_MAP_TYPE_PERCPU_ARRAY: + case BPF_MAP_TYPE_CGROUP_ARRAY: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_LRU_PERCPU_HASH: + case BPF_MAP_TYPE_ARRAY_OF_MAPS: + case BPF_MAP_TYPE_HASH_OF_MAPS: + case BPF_MAP_TYPE_DEVMAP: + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_CPUMAP: + case BPF_MAP_TYPE_XSKMAP: + case BPF_MAP_TYPE_SOCKHASH: + case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: + default: + break; + } + + if (map_type == BPF_MAP_TYPE_ARRAY_OF_MAPS || + map_type == BPF_MAP_TYPE_HASH_OF_MAPS) { + /* TODO: probe for device, once libbpf has a function to create + * map-in-map for offload + */ + if (ifindex) + return false; + + fd_inner = bpf_create_map(BPF_MAP_TYPE_HASH, + sizeof(__u32), sizeof(__u32), 1, 0); + if (fd_inner < 0) + return false; + fd = bpf_create_map_in_map(map_type, NULL, sizeof(__u32), + fd_inner, 1, 0); + close(fd_inner); + } else { + /* Note: No other restriction on map type probes for offload */ + attr.map_type = map_type; + attr.key_size = key_size; + attr.value_size = value_size; + attr.max_entries = max_entries; + attr.map_flags = map_flags; + attr.map_ifindex = ifindex; + + fd = bpf_create_map_xattr(&attr); + } + if (fd >= 0) + close(fd); + + return fd >= 0; +} + +bool bpf_probe_helper(enum bpf_func_id id, enum bpf_prog_type prog_type, + __u32 ifindex) +{ + struct bpf_insn insns[2] = { + BPF_EMIT_CALL(id), + BPF_EXIT_INSN() + }; + char buf[4096] = {}; + bool res; + + probe_load(prog_type, insns, ARRAY_SIZE(insns), buf, sizeof(buf), + ifindex); + res = !grep(buf, "invalid func ") && !grep(buf, "unknown func "); + + if (ifindex) { + switch (get_vendor_id(ifindex)) { + case 0x19ee: /* Netronome specific */ + res = res && !grep(buf, "not supported by FW") && + !grep(buf, "unsupported function id"); + break; + default: + break; + } + } + + return res; +} diff --git a/tools/lib/bpf/libbpf_util.h b/tools/lib/bpf/libbpf_util.h new file mode 100644 index 000000000000..81ecda0cb9c9 --- /dev/null +++ b/tools/lib/bpf/libbpf_util.h @@ -0,0 +1,30 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +/* Copyright (c) 2019 Facebook */ + +#ifndef __LIBBPF_LIBBPF_UTIL_H +#define __LIBBPF_LIBBPF_UTIL_H + +#include <stdbool.h> + +#ifdef __cplusplus +extern "C" { +#endif + +extern void libbpf_print(enum libbpf_print_level level, + const char *format, ...) + __attribute__((format(printf, 2, 3))); + +#define __pr(level, fmt, ...) \ +do { \ + libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__); \ +} while (0) + +#define pr_warning(fmt, ...) __pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_info(fmt, ...) __pr(LIBBPF_INFO, fmt, ##__VA_ARGS__) +#define pr_debug(fmt, ...) __pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index 0ce67aea8f3b..ce3ec81b71c0 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -21,6 +21,12 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t, void *cookie); +struct xdp_id_md { + int ifindex; + __u32 flags; + __u32 id; +}; + int libbpf_netlink_open(__u32 *nl_pid) { struct sockaddr_nl sa; @@ -196,6 +202,85 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh, return dump_link_nlmsg(cookie, ifi, tb); } +static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) +{ + if (mode != XDP_ATTACHED_MULTI) + return IFLA_XDP_PROG_ID; + if (flags & XDP_FLAGS_DRV_MODE) + return IFLA_XDP_DRV_PROG_ID; + if (flags & XDP_FLAGS_HW_MODE) + return IFLA_XDP_HW_PROG_ID; + if (flags & XDP_FLAGS_SKB_MODE) + return IFLA_XDP_SKB_PROG_ID; + + return IFLA_XDP_UNSPEC; +} + +static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +{ + struct nlattr *xdp_tb[IFLA_XDP_MAX + 1]; + struct xdp_id_md *xdp_id = cookie; + struct ifinfomsg *ifinfo = msg; + unsigned char mode, xdp_attr; + int ret; + + if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) + return 0; + + if (!tb[IFLA_XDP]) + return 0; + + ret = libbpf_nla_parse_nested(xdp_tb, IFLA_XDP_MAX, tb[IFLA_XDP], NULL); + if (ret) + return ret; + + if (!xdp_tb[IFLA_XDP_ATTACHED]) + return 0; + + mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); + if (mode == XDP_ATTACHED_NONE) + return 0; + + xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); + if (!xdp_attr || !xdp_tb[xdp_attr]) + return 0; + + xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); + + return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ + struct xdp_id_md xdp_id = {}; + int sock, ret; + __u32 nl_pid; + __u32 mask; + + if (flags & ~XDP_FLAGS_MASK) + return -EINVAL; + + /* Check whether the single {HW,DRV,SKB} mode is set */ + flags &= (XDP_FLAGS_SKB_MODE | XDP_FLAGS_DRV_MODE | XDP_FLAGS_HW_MODE); + mask = flags - 1; + if (flags && flags & mask) + return -EINVAL; + + sock = libbpf_netlink_open(&nl_pid); + if (sock < 0) + return sock; + + xdp_id.ifindex = ifindex; + xdp_id.flags = flags; + + ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); + if (!ret) + *prog_id = xdp_id.id; + + close(sock); + return ret; +} + int libbpf_nl_get_link(int sock, unsigned int nl_pid, libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie) { diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp index abf3fc25c9fa..fc134873bb6d 100644 --- a/tools/lib/bpf/test_libbpf.cpp +++ b/tools/lib/bpf/test_libbpf.cpp @@ -8,11 +8,11 @@ int main(int argc, char *argv[]) { /* libbpf.h */ - libbpf_set_print(NULL, NULL, NULL); + libbpf_set_print(NULL); /* bpf.h */ bpf_prog_get_fd_by_id(0); /* btf.h */ - btf__new(NULL, 0, NULL); + btf__new(NULL, 0); } diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c new file mode 100644 index 000000000000..8d0078b65486 --- /dev/null +++ b/tools/lib/bpf/xsk.c @@ -0,0 +1,730 @@ +// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <[email protected]> + */ + +#include <errno.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <arpa/inet.h> +#include <asm/barrier.h> +#include <linux/compiler.h> +#include <linux/ethtool.h> +#include <linux/filter.h> +#include <linux/if_ether.h> +#include <linux/if_packet.h> +#include <linux/if_xdp.h> +#include <linux/sockios.h> +#include <net/if.h> +#include <sys/ioctl.h> +#include <sys/mman.h> +#include <sys/socket.h> +#include <sys/types.h> + +#include "bpf.h" +#include "libbpf.h" +#include "libbpf_util.h" +#include "xsk.h" + +#ifndef SOL_XDP + #define SOL_XDP 283 +#endif + +#ifndef AF_XDP + #define AF_XDP 44 +#endif + +#ifndef PF_XDP + #define PF_XDP AF_XDP +#endif + +struct xsk_umem { + struct xsk_ring_prod *fill; + struct xsk_ring_cons *comp; + char *umem_area; + struct xsk_umem_config config; + int fd; + int refcount; +}; + +struct xsk_socket { + struct xsk_ring_cons *rx; + struct xsk_ring_prod *tx; + __u64 outstanding_tx; + struct xsk_umem *umem; + struct xsk_socket_config config; + int fd; + int xsks_map; + int ifindex; + int prog_fd; + int qidconf_map_fd; + int xsks_map_fd; + __u32 queue_id; + char ifname[IFNAMSIZ]; +}; + +struct xsk_nl_info { + bool xdp_prog_attached; + int ifindex; + int fd; +}; + +/* For 32-bit systems, we need to use mmap2 as the offsets are 64-bit. + * Unfortunately, it is not part of glibc. + */ +static inline void *xsk_mmap(void *addr, size_t length, int prot, int flags, + int fd, __u64 offset) +{ +#ifdef __NR_mmap2 + unsigned int page_shift = __builtin_ffs(getpagesize()) - 1; + long ret = syscall(__NR_mmap2, addr, length, prot, flags, fd, + (off_t)(offset >> page_shift)); + + return (void *)ret; +#else + return mmap(addr, length, prot, flags, fd, offset); +#endif +} + +int xsk_umem__fd(const struct xsk_umem *umem) +{ + return umem ? umem->fd : -EINVAL; +} + +int xsk_socket__fd(const struct xsk_socket *xsk) +{ + return xsk ? xsk->fd : -EINVAL; +} + +static bool xsk_page_aligned(void *buffer) +{ + unsigned long addr = (unsigned long)buffer; + + return !(addr & (getpagesize() - 1)); +} + +static void xsk_set_umem_config(struct xsk_umem_config *cfg, + const struct xsk_umem_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->fill_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->comp_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->frame_size = XSK_UMEM__DEFAULT_FRAME_SIZE; + cfg->frame_headroom = XSK_UMEM__DEFAULT_FRAME_HEADROOM; + return; + } + + cfg->fill_size = usr_cfg->fill_size; + cfg->comp_size = usr_cfg->comp_size; + cfg->frame_size = usr_cfg->frame_size; + cfg->frame_headroom = usr_cfg->frame_headroom; +} + +static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg, + const struct xsk_socket_config *usr_cfg) +{ + if (!usr_cfg) { + cfg->rx_size = XSK_RING_CONS__DEFAULT_NUM_DESCS; + cfg->tx_size = XSK_RING_PROD__DEFAULT_NUM_DESCS; + cfg->libbpf_flags = 0; + cfg->xdp_flags = 0; + cfg->bind_flags = 0; + return 0; + } + + if (usr_cfg->libbpf_flags & ~XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD) + return -EINVAL; + + cfg->rx_size = usr_cfg->rx_size; + cfg->tx_size = usr_cfg->tx_size; + cfg->libbpf_flags = usr_cfg->libbpf_flags; + cfg->xdp_flags = usr_cfg->xdp_flags; + cfg->bind_flags = usr_cfg->bind_flags; + + return 0; +} + +int xsk_umem__create(struct xsk_umem **umem_ptr, void *umem_area, __u64 size, + struct xsk_ring_prod *fill, struct xsk_ring_cons *comp, + const struct xsk_umem_config *usr_config) +{ + struct xdp_mmap_offsets off; + struct xdp_umem_reg mr; + struct xsk_umem *umem; + socklen_t optlen; + void *map; + int err; + + if (!umem_area || !umem_ptr || !fill || !comp) + return -EFAULT; + if (!size && !xsk_page_aligned(umem_area)) + return -EINVAL; + + umem = calloc(1, sizeof(*umem)); + if (!umem) + return -ENOMEM; + + umem->fd = socket(AF_XDP, SOCK_RAW, 0); + if (umem->fd < 0) { + err = -errno; + goto out_umem_alloc; + } + + umem->umem_area = umem_area; + xsk_set_umem_config(&umem->config, usr_config); + + mr.addr = (uintptr_t)umem_area; + mr.len = size; + mr.chunk_size = umem->config.frame_size; + mr.headroom = umem->config.frame_headroom; + + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_REG, &mr, sizeof(mr)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_FILL_RING, + &umem->config.fill_size, + sizeof(umem->config.fill_size)); + if (err) { + err = -errno; + goto out_socket; + } + err = setsockopt(umem->fd, SOL_XDP, XDP_UMEM_COMPLETION_RING, + &umem->config.comp_size, + sizeof(umem->config.comp_size)); + if (err) { + err = -errno; + goto out_socket; + } + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + map = xsk_mmap(NULL, off.fr.desc + + umem->config.fill_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_FILL_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + umem->fill = fill; + fill->mask = umem->config.fill_size - 1; + fill->size = umem->config.fill_size; + fill->producer = map + off.fr.producer; + fill->consumer = map + off.fr.consumer; + fill->ring = map + off.fr.desc; + fill->cached_cons = umem->config.fill_size; + + map = xsk_mmap(NULL, + off.cr.desc + umem->config.comp_size * sizeof(__u64), + PROT_READ | PROT_WRITE, MAP_SHARED | MAP_POPULATE, + umem->fd, XDP_UMEM_PGOFF_COMPLETION_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap; + } + + umem->comp = comp; + comp->mask = umem->config.comp_size - 1; + comp->size = umem->config.comp_size; + comp->producer = map + off.cr.producer; + comp->consumer = map + off.cr.consumer; + comp->ring = map + off.cr.desc; + + *umem_ptr = umem; + return 0; + +out_mmap: + munmap(umem->fill, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); +out_socket: + close(umem->fd); +out_umem_alloc: + free(umem); + return err; +} + +static int xsk_load_xdp_prog(struct xsk_socket *xsk) +{ + char bpf_log_buf[BPF_LOG_BUF_SIZE]; + int err, prog_fd; + + /* This is the C-program: + * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx) + * { + * int *qidconf, index = ctx->rx_queue_index; + * + * // A set entry here means that the correspnding queue_id + * // has an active AF_XDP socket bound to it. + * qidconf = bpf_map_lookup_elem(&qidconf_map, &index); + * if (!qidconf) + * return XDP_ABORTED; + * + * if (*qidconf) + * return bpf_redirect_map(&xsks_map, index, 0); + * + * return XDP_PASS; + * } + */ + struct bpf_insn prog[] = { + /* r1 = *(u32 *)(r1 + 16) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), + /* *(u32 *)(r10 - 4) = r1 */ + BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), + BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), + BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), + BPF_LD_MAP_FD(BPF_REG_1, xsk->qidconf_map_fd), + BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), + BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), + BPF_MOV32_IMM(BPF_REG_0, 0), + /* if r1 == 0 goto +8 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 8), + BPF_MOV32_IMM(BPF_REG_0, 2), + /* r1 = *(u32 *)(r1 + 0) */ + BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 0), + /* if r1 == 0 goto +5 */ + BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5), + /* r2 = *(u32 *)(r10 - 4) */ + BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), + BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), + BPF_MOV32_IMM(BPF_REG_3, 0), + BPF_EMIT_CALL(BPF_FUNC_redirect_map), + /* The jumps are to this instruction */ + BPF_EXIT_INSN(), + }; + size_t insns_cnt = sizeof(prog) / sizeof(struct bpf_insn); + + prog_fd = bpf_load_program(BPF_PROG_TYPE_XDP, prog, insns_cnt, + "LGPL-2.1 or BSD-2-Clause", 0, bpf_log_buf, + BPF_LOG_BUF_SIZE); + if (prog_fd < 0) { + pr_warning("BPF log buffer:\n%s", bpf_log_buf); + return prog_fd; + } + + err = bpf_set_link_xdp_fd(xsk->ifindex, prog_fd, xsk->config.xdp_flags); + if (err) { + close(prog_fd); + return err; + } + + xsk->prog_fd = prog_fd; + return 0; +} + +static int xsk_get_max_queues(struct xsk_socket *xsk) +{ + struct ethtool_channels channels; + struct ifreq ifr; + int fd, err, ret; + + fd = socket(AF_INET, SOCK_DGRAM, 0); + if (fd < 0) + return -errno; + + channels.cmd = ETHTOOL_GCHANNELS; + ifr.ifr_data = (void *)&channels; + strncpy(ifr.ifr_name, xsk->ifname, IFNAMSIZ); + err = ioctl(fd, SIOCETHTOOL, &ifr); + if (err && errno != EOPNOTSUPP) { + ret = -errno; + goto out; + } + + if (channels.max_combined == 0 || errno == EOPNOTSUPP) + /* If the device says it has no channels, then all traffic + * is sent to a single stream, so max queues = 1. + */ + ret = 1; + else + ret = channels.max_combined; + +out: + close(fd); + return ret; +} + +static int xsk_create_bpf_maps(struct xsk_socket *xsk) +{ + int max_queues; + int fd; + + max_queues = xsk_get_max_queues(xsk); + if (max_queues < 0) + return max_queues; + + fd = bpf_create_map_name(BPF_MAP_TYPE_ARRAY, "qidconf_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) + return fd; + xsk->qidconf_map_fd = fd; + + fd = bpf_create_map_name(BPF_MAP_TYPE_XSKMAP, "xsks_map", + sizeof(int), sizeof(int), max_queues, 0); + if (fd < 0) { + close(xsk->qidconf_map_fd); + return fd; + } + xsk->xsks_map_fd = fd; + + return 0; +} + +static void xsk_delete_bpf_maps(struct xsk_socket *xsk) +{ + close(xsk->qidconf_map_fd); + close(xsk->xsks_map_fd); +} + +static int xsk_update_bpf_maps(struct xsk_socket *xsk, int qidconf_value, + int xsks_value) +{ + bool qidconf_map_updated = false, xsks_map_updated = false; + struct bpf_prog_info prog_info = {}; + __u32 prog_len = sizeof(prog_info); + struct bpf_map_info map_info; + __u32 map_len = sizeof(map_info); + __u32 *map_ids; + int reset_value = 0; + __u32 num_maps; + unsigned int i; + int err; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + return err; + + num_maps = prog_info.nr_map_ids; + + map_ids = calloc(prog_info.nr_map_ids, sizeof(*map_ids)); + if (!map_ids) + return -ENOMEM; + + memset(&prog_info, 0, prog_len); + prog_info.nr_map_ids = num_maps; + prog_info.map_ids = (__u64)(unsigned long)map_ids; + + err = bpf_obj_get_info_by_fd(xsk->prog_fd, &prog_info, &prog_len); + if (err) + goto out_map_ids; + + for (i = 0; i < prog_info.nr_map_ids; i++) { + int fd; + + fd = bpf_map_get_fd_by_id(map_ids[i]); + if (fd < 0) { + err = -errno; + goto out_maps; + } + + err = bpf_obj_get_info_by_fd(fd, &map_info, &map_len); + if (err) + goto out_maps; + + if (!strcmp(map_info.name, "qidconf_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &qidconf_value, 0); + if (err) + goto out_maps; + qidconf_map_updated = true; + xsk->qidconf_map_fd = fd; + } else if (!strcmp(map_info.name, "xsks_map")) { + err = bpf_map_update_elem(fd, &xsk->queue_id, + &xsks_value, 0); + if (err) + goto out_maps; + xsks_map_updated = true; + xsk->xsks_map_fd = fd; + } + + if (qidconf_map_updated && xsks_map_updated) + break; + } + + if (!(qidconf_map_updated && xsks_map_updated)) { + err = -ENOENT; + goto out_maps; + } + + err = 0; + goto out_success; + +out_maps: + if (qidconf_map_updated) + (void)bpf_map_update_elem(xsk->qidconf_map_fd, &xsk->queue_id, + &reset_value, 0); + if (xsks_map_updated) + (void)bpf_map_update_elem(xsk->xsks_map_fd, &xsk->queue_id, + &reset_value, 0); +out_success: + if (qidconf_map_updated) + close(xsk->qidconf_map_fd); + if (xsks_map_updated) + close(xsk->xsks_map_fd); +out_map_ids: + free(map_ids); + return err; +} + +static int xsk_setup_xdp_prog(struct xsk_socket *xsk) +{ + bool prog_attached = false; + __u32 prog_id = 0; + int err; + + err = bpf_get_link_xdp_id(xsk->ifindex, &prog_id, + xsk->config.xdp_flags); + if (err) + return err; + + if (!prog_id) { + prog_attached = true; + err = xsk_create_bpf_maps(xsk); + if (err) + return err; + + err = xsk_load_xdp_prog(xsk); + if (err) + goto out_maps; + } else { + xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); + } + + err = xsk_update_bpf_maps(xsk, true, xsk->fd); + if (err) + goto out_load; + + return 0; + +out_load: + if (prog_attached) + close(xsk->prog_fd); +out_maps: + if (prog_attached) + xsk_delete_bpf_maps(xsk); + return err; +} + +int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname, + __u32 queue_id, struct xsk_umem *umem, + struct xsk_ring_cons *rx, struct xsk_ring_prod *tx, + const struct xsk_socket_config *usr_config) +{ + struct sockaddr_xdp sxdp = {}; + struct xdp_mmap_offsets off; + struct xsk_socket *xsk; + socklen_t optlen; + void *map; + int err; + + if (!umem || !xsk_ptr || !rx || !tx) + return -EFAULT; + + if (umem->refcount) { + pr_warning("Error: shared umems not supported by libbpf.\n"); + return -EBUSY; + } + + xsk = calloc(1, sizeof(*xsk)); + if (!xsk) + return -ENOMEM; + + if (umem->refcount++ > 0) { + xsk->fd = socket(AF_XDP, SOCK_RAW, 0); + if (xsk->fd < 0) { + err = -errno; + goto out_xsk_alloc; + } + } else { + xsk->fd = umem->fd; + } + + xsk->outstanding_tx = 0; + xsk->queue_id = queue_id; + xsk->umem = umem; + xsk->ifindex = if_nametoindex(ifname); + if (!xsk->ifindex) { + err = -errno; + goto out_socket; + } + strncpy(xsk->ifname, ifname, IFNAMSIZ); + + err = xsk_set_xdp_socket_config(&xsk->config, usr_config); + if (err) + goto out_socket; + + if (rx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING, + &xsk->config.rx_size, + sizeof(xsk->config.rx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + if (tx) { + err = setsockopt(xsk->fd, SOL_XDP, XDP_TX_RING, + &xsk->config.tx_size, + sizeof(xsk->config.tx_size)); + if (err) { + err = -errno; + goto out_socket; + } + } + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (err) { + err = -errno; + goto out_socket; + } + + if (rx) { + map = xsk_mmap(NULL, off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_RX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_socket; + } + + rx->mask = xsk->config.rx_size - 1; + rx->size = xsk->config.rx_size; + rx->producer = map + off.rx.producer; + rx->consumer = map + off.rx.consumer; + rx->ring = map + off.rx.desc; + } + xsk->rx = rx; + + if (tx) { + map = xsk_mmap(NULL, off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc), + PROT_READ | PROT_WRITE, + MAP_SHARED | MAP_POPULATE, + xsk->fd, XDP_PGOFF_TX_RING); + if (map == MAP_FAILED) { + err = -errno; + goto out_mmap_rx; + } + + tx->mask = xsk->config.tx_size - 1; + tx->size = xsk->config.tx_size; + tx->producer = map + off.tx.producer; + tx->consumer = map + off.tx.consumer; + tx->ring = map + off.tx.desc; + tx->cached_cons = xsk->config.tx_size; + } + xsk->tx = tx; + + sxdp.sxdp_family = PF_XDP; + sxdp.sxdp_ifindex = xsk->ifindex; + sxdp.sxdp_queue_id = xsk->queue_id; + sxdp.sxdp_flags = xsk->config.bind_flags; + + err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp)); + if (err) { + err = -errno; + goto out_mmap_tx; + } + + if (!(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { + err = xsk_setup_xdp_prog(xsk); + if (err) + goto out_mmap_tx; + } + + *xsk_ptr = xsk; + return 0; + +out_mmap_tx: + if (tx) + munmap(xsk->tx, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); +out_mmap_rx: + if (rx) + munmap(xsk->rx, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); +out_socket: + if (--umem->refcount) + close(xsk->fd); +out_xsk_alloc: + free(xsk); + return err; +} + +int xsk_umem__delete(struct xsk_umem *umem) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!umem) + return 0; + + if (umem->refcount) + return -EBUSY; + + optlen = sizeof(off); + err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + munmap(umem->fill->ring, + off.fr.desc + umem->config.fill_size * sizeof(__u64)); + munmap(umem->comp->ring, + off.cr.desc + umem->config.comp_size * sizeof(__u64)); + } + + close(umem->fd); + free(umem); + + return 0; +} + +void xsk_socket__delete(struct xsk_socket *xsk) +{ + struct xdp_mmap_offsets off; + socklen_t optlen; + int err; + + if (!xsk) + return; + + (void)xsk_update_bpf_maps(xsk, 0, 0); + + optlen = sizeof(off); + err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); + if (!err) { + if (xsk->rx) + munmap(xsk->rx->ring, + off.rx.desc + + xsk->config.rx_size * sizeof(struct xdp_desc)); + if (xsk->tx) + munmap(xsk->tx->ring, + off.tx.desc + + xsk->config.tx_size * sizeof(struct xdp_desc)); + } + + xsk->umem->refcount--; + /* Do not close an fd that also has an associated umem connected + * to it. + */ + if (xsk->fd != xsk->umem->fd) + close(xsk->fd); + free(xsk); +} diff --git a/tools/lib/bpf/xsk.h b/tools/lib/bpf/xsk.h new file mode 100644 index 000000000000..a497f00e2962 --- /dev/null +++ b/tools/lib/bpf/xsk.h @@ -0,0 +1,203 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ + +/* + * AF_XDP user-space access library. + * + * Copyright(c) 2018 - 2019 Intel Corporation. + * + * Author(s): Magnus Karlsson <[email protected]> + */ + +#ifndef __LIBBPF_XSK_H +#define __LIBBPF_XSK_H + +#include <stdio.h> +#include <stdint.h> +#include <linux/if_xdp.h> + +#include "libbpf.h" + +#ifdef __cplusplus +extern "C" { +#endif + +/* Do not access these members directly. Use the functions below. */ +#define DEFINE_XSK_RING(name) \ +struct name { \ + __u32 cached_prod; \ + __u32 cached_cons; \ + __u32 mask; \ + __u32 size; \ + __u32 *producer; \ + __u32 *consumer; \ + void *ring; \ +} + +DEFINE_XSK_RING(xsk_ring_prod); +DEFINE_XSK_RING(xsk_ring_cons); + +struct xsk_umem; +struct xsk_socket; + +static inline __u64 *xsk_ring_prod__fill_addr(struct xsk_ring_prod *fill, + __u32 idx) +{ + __u64 *addrs = (__u64 *)fill->ring; + + return &addrs[idx & fill->mask]; +} + +static inline const __u64 * +xsk_ring_cons__comp_addr(const struct xsk_ring_cons *comp, __u32 idx) +{ + const __u64 *addrs = (const __u64 *)comp->ring; + + return &addrs[idx & comp->mask]; +} + +static inline struct xdp_desc *xsk_ring_prod__tx_desc(struct xsk_ring_prod *tx, + __u32 idx) +{ + struct xdp_desc *descs = (struct xdp_desc *)tx->ring; + + return &descs[idx & tx->mask]; +} + +static inline const struct xdp_desc * +xsk_ring_cons__rx_desc(const struct xsk_ring_cons *rx, __u32 idx) +{ + const struct xdp_desc *descs = (const struct xdp_desc *)rx->ring; + + return &descs[idx & rx->mask]; +} + +static inline __u32 xsk_prod_nb_free(struct xsk_ring_prod *r, __u32 nb) +{ + __u32 free_entries = r->cached_cons - r->cached_prod; + + if (free_entries >= nb) + return free_entries; + + /* Refresh the local tail pointer. + * cached_cons is r->size bigger than the real consumer pointer so + * that this addition can be avoided in the more frequently + * executed code that computs free_entries in the beginning of + * this function. Without this optimization it whould have been + * free_entries = r->cached_prod - r->cached_cons + r->size. + */ + r->cached_cons = *r->consumer + r->size; + + return r->cached_cons - r->cached_prod; +} + +static inline __u32 xsk_cons_nb_avail(struct xsk_ring_cons *r, __u32 nb) +{ + __u32 entries = r->cached_prod - r->cached_cons; + + if (entries == 0) { + r->cached_prod = *r->producer; + entries = r->cached_prod - r->cached_cons; + } + + return (entries > nb) ? nb : entries; +} + +static inline size_t xsk_ring_prod__reserve(struct xsk_ring_prod *prod, + size_t nb, __u32 *idx) +{ + if (unlikely(xsk_prod_nb_free(prod, nb) < nb)) + return 0; + + *idx = prod->cached_prod; + prod->cached_prod += nb; + + return nb; +} + +static inline void xsk_ring_prod__submit(struct xsk_ring_prod *prod, size_t nb) +{ + /* Make sure everything has been written to the ring before signalling + * this to the kernel. + */ + smp_wmb(); + + *prod->producer += nb; +} + +static inline size_t xsk_ring_cons__peek(struct xsk_ring_cons *cons, + size_t nb, __u32 *idx) +{ + size_t entries = xsk_cons_nb_avail(cons, nb); + + if (likely(entries > 0)) { + /* Make sure we do not speculatively read the data before + * we have received the packet buffers from the ring. + */ + smp_rmb(); + + *idx = cons->cached_cons; + cons->cached_cons += entries; + } + + return entries; +} + +static inline void xsk_ring_cons__release(struct xsk_ring_cons *cons, size_t nb) +{ + *cons->consumer += nb; +} + +static inline void *xsk_umem__get_data(void *umem_area, __u64 addr) +{ + return &((char *)umem_area)[addr]; +} + +LIBBPF_API int xsk_umem__fd(const struct xsk_umem *umem); +LIBBPF_API int xsk_socket__fd(const struct xsk_socket *xsk); + +#define XSK_RING_CONS__DEFAULT_NUM_DESCS 2048 +#define XSK_RING_PROD__DEFAULT_NUM_DESCS 2048 +#define XSK_UMEM__DEFAULT_FRAME_SHIFT 11 /* 2048 bytes */ +#define XSK_UMEM__DEFAULT_FRAME_SIZE (1 << XSK_UMEM__DEFAULT_FRAME_SHIFT) +#define XSK_UMEM__DEFAULT_FRAME_HEADROOM 0 + +struct xsk_umem_config { + __u32 fill_size; + __u32 comp_size; + __u32 frame_size; + __u32 frame_headroom; +}; + +/* Flags for the libbpf_flags field. */ +#define XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD (1 << 0) + +struct xsk_socket_config { + __u32 rx_size; + __u32 tx_size; + __u32 libbpf_flags; + __u32 xdp_flags; + __u16 bind_flags; +}; + +/* Set config to NULL to get the default configuration. */ +LIBBPF_API int xsk_umem__create(struct xsk_umem **umem, + void *umem_area, __u64 size, + struct xsk_ring_prod *fill, + struct xsk_ring_cons *comp, + const struct xsk_umem_config *config); +LIBBPF_API int xsk_socket__create(struct xsk_socket **xsk, + const char *ifname, __u32 queue_id, + struct xsk_umem *umem, + struct xsk_ring_cons *rx, + struct xsk_ring_prod *tx, + const struct xsk_socket_config *config); + +/* Returns 0 for success and -EBUSY if the umem is still in use. */ +LIBBPF_API int xsk_umem__delete(struct xsk_umem *umem); +LIBBPF_API void xsk_socket__delete(struct xsk_socket *xsk); + +#ifdef __cplusplus +} /* extern "C" */ +#endif + +#endif /* __LIBBPF_XSK_H */ diff --git a/tools/lib/lockdep/include/liblockdep/common.h b/tools/lib/lockdep/include/liblockdep/common.h index d640a9761f09..a81d91d4fc78 100644 --- a/tools/lib/lockdep/include/liblockdep/common.h +++ b/tools/lib/lockdep/include/liblockdep/common.h @@ -45,6 +45,8 @@ void lock_acquire(struct lockdep_map *lock, unsigned int subclass, void lock_release(struct lockdep_map *lock, int nested, unsigned long ip); void lockdep_reset_lock(struct lockdep_map *lock); +void lockdep_register_key(struct lock_class_key *key); +void lockdep_unregister_key(struct lock_class_key *key); extern void debug_check_no_locks_freed(const void *from, unsigned long len); #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ diff --git a/tools/lib/lockdep/include/liblockdep/mutex.h b/tools/lib/lockdep/include/liblockdep/mutex.h index 2073d4e1f2f0..783dd0df06f9 100644 --- a/tools/lib/lockdep/include/liblockdep/mutex.h +++ b/tools/lib/lockdep/include/liblockdep/mutex.h @@ -7,6 +7,7 @@ struct liblockdep_pthread_mutex { pthread_mutex_t mutex; + struct lock_class_key key; struct lockdep_map dep_map; }; @@ -27,11 +28,10 @@ static inline int __mutex_init(liblockdep_pthread_mutex_t *lock, return pthread_mutex_init(&lock->mutex, __mutexattr); } -#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ -({ \ - static struct lock_class_key __key; \ - \ - __mutex_init((mutex), #mutex, &__key, (mutexattr)); \ +#define liblockdep_pthread_mutex_init(mutex, mutexattr) \ +({ \ + lockdep_register_key(&(mutex)->key); \ + __mutex_init((mutex), #mutex, &(mutex)->key, (mutexattr)); \ }) static inline int liblockdep_pthread_mutex_lock(liblockdep_pthread_mutex_t *lock) @@ -55,6 +55,7 @@ static inline int liblockdep_pthread_mutex_trylock(liblockdep_pthread_mutex_t *l static inline int liblockdep_pthread_mutex_destroy(liblockdep_pthread_mutex_t *lock) { lockdep_reset_lock(&lock->dep_map); + lockdep_unregister_key(&lock->key); return pthread_mutex_destroy(&lock->mutex); } diff --git a/tools/lib/lockdep/run_tests.sh b/tools/lib/lockdep/run_tests.sh index c8fbd0306960..11f425662b43 100755 --- a/tools/lib/lockdep/run_tests.sh +++ b/tools/lib/lockdep/run_tests.sh @@ -11,7 +11,7 @@ find tests -name '*.c' | sort | while read -r i; do testname=$(basename "$i" .c) echo -ne "$testname... " if gcc -o "tests/$testname" -pthread "$i" liblockdep.a -Iinclude -D__USE_LIBLOCKDEP && - timeout 1 "tests/$testname" 2>&1 | "tests/${testname}.sh"; then + timeout 1 "tests/$testname" 2>&1 | /bin/bash "tests/${testname}.sh"; then echo "PASSED!" else echo "FAILED!" @@ -24,7 +24,7 @@ find tests -name '*.c' | sort | while read -r i; do echo -ne "(PRELOAD) $testname... " if gcc -o "tests/$testname" -pthread -Iinclude "$i" && timeout 1 ./lockdep "tests/$testname" 2>&1 | - "tests/${testname}.sh"; then + /bin/bash "tests/${testname}.sh"; then echo "PASSED!" else echo "FAILED!" @@ -37,7 +37,7 @@ find tests -name '*.c' | sort | while read -r i; do echo -ne "(PRELOAD + Valgrind) $testname... " if gcc -o "tests/$testname" -pthread -Iinclude "$i" && { timeout 10 valgrind --read-var-info=yes ./lockdep "./tests/$testname" >& "tests/${testname}.vg.out"; true; } && - "tests/${testname}.sh" < "tests/${testname}.vg.out" && + /bin/bash "tests/${testname}.sh" < "tests/${testname}.vg.out" && ! grep -Eq '(^==[0-9]*== (Invalid |Uninitialised ))|Mismatched free|Source and destination overlap| UME ' "tests/${testname}.vg.out"; then echo "PASSED!" else diff --git a/tools/lib/lockdep/tests/ABBA.c b/tools/lib/lockdep/tests/ABBA.c index 623313f54720..543789bc3e37 100644 --- a/tools/lib/lockdep/tests/ABBA.c +++ b/tools/lib/lockdep/tests/ABBA.c @@ -14,4 +14,13 @@ void main(void) pthread_mutex_destroy(&b); pthread_mutex_destroy(&a); + + pthread_mutex_init(&a, NULL); + pthread_mutex_init(&b, NULL); + + LOCK_UNLOCK_2(a, b); + LOCK_UNLOCK_2(b, a); + + pthread_mutex_destroy(&b); + pthread_mutex_destroy(&a); } diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c index 17c2b596f043..904adb70a4f0 100644 --- a/tools/lib/rbtree.c +++ b/tools/lib/rbtree.c @@ -22,6 +22,7 @@ */ #include <linux/rbtree_augmented.h> +#include <linux/export.h> /* * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree @@ -43,6 +44,30 @@ * parentheses and have some accompanying text comment. */ +/* + * Notes on lockless lookups: + * + * All stores to the tree structure (rb_left and rb_right) must be done using + * WRITE_ONCE(). And we must not inadvertently cause (temporary) loops in the + * tree structure as seen in program order. + * + * These two requirements will allow lockless iteration of the tree -- not + * correct iteration mind you, tree rotations are not atomic so a lookup might + * miss entire subtrees. + * + * But they do guarantee that any such traversal will only see valid elements + * and that it will indeed complete -- does not get stuck in a loop. + * + * It also guarantees that if the lookup returns an element it is the 'correct' + * one. But not returning an element does _NOT_ mean it's not present. + * + * NOTE: + * + * Stores to __rb_parent_color are not important for simple lookups so those + * are left undone as of now. Nor did I check for loops involving parent + * pointers. + */ + static inline void rb_set_black(struct rb_node *rb) { rb->__rb_parent_color |= RB_BLACK; @@ -70,22 +95,35 @@ __rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, static __always_inline void __rb_insert(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + if (newleft) + *leftmost = node; + while (true) { /* - * Loop invariant: node is red - * - * If there is a black parent, we are done. - * Otherwise, take some corrective action as we don't - * want a red root or two consecutive red nodes. + * Loop invariant: node is red. */ - if (!parent) { + if (unlikely(!parent)) { + /* + * The inserted node is root. Either this is the + * first node, or we recursed at Case 1 below and + * are no longer violating 4). + */ rb_set_parent_color(node, NULL, RB_BLACK); break; - } else if (rb_is_black(parent)) + } + + /* + * If there is a black parent, we are done. + * Otherwise, take some corrective action as, + * per 4), we don't want a red root or two + * consecutive red nodes. + */ + if(rb_is_black(parent)) break; gparent = rb_red_parent(parent); @@ -94,7 +132,7 @@ __rb_insert(struct rb_node *node, struct rb_root *root, if (parent != tmp) { /* parent == gparent->rb_left */ if (tmp && rb_is_red(tmp)) { /* - * Case 1 - color flips + * Case 1 - node's uncle is red (color flips). * * G g * / \ / \ @@ -117,7 +155,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_right; if (node == tmp) { /* - * Case 2 - left rotate at parent + * Case 2 - node's uncle is black and node is + * the parent's right child (left rotate at parent). * * G G * / \ / \ @@ -128,8 +167,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * This still leaves us in violation of 4), the * continuation into Case 3 will fix that. */ - parent->rb_right = tmp = node->rb_left; - node->rb_left = parent; + tmp = node->rb_left; + WRITE_ONCE(parent->rb_right, tmp); + WRITE_ONCE(node->rb_left, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -140,7 +180,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* - * Case 3 - right rotate at gparent + * Case 3 - node's uncle is black and node is + * the parent's left child (right rotate at gparent). * * G P * / \ / \ @@ -148,8 +189,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, * / \ * n U */ - gparent->rb_left = tmp; /* == parent->rb_right */ - parent->rb_right = gparent; + WRITE_ONCE(gparent->rb_left, tmp); /* == parent->rb_right */ + WRITE_ONCE(parent->rb_right, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -170,8 +211,9 @@ __rb_insert(struct rb_node *node, struct rb_root *root, tmp = parent->rb_left; if (node == tmp) { /* Case 2 - right rotate at parent */ - parent->rb_left = tmp = node->rb_right; - node->rb_right = parent; + tmp = node->rb_right; + WRITE_ONCE(parent->rb_left, tmp); + WRITE_ONCE(node->rb_right, parent); if (tmp) rb_set_parent_color(tmp, parent, RB_BLACK); @@ -182,8 +224,8 @@ __rb_insert(struct rb_node *node, struct rb_root *root, } /* Case 3 - left rotate at gparent */ - gparent->rb_right = tmp; /* == parent->rb_left */ - parent->rb_left = gparent; + WRITE_ONCE(gparent->rb_right, tmp); /* == parent->rb_left */ + WRITE_ONCE(parent->rb_left, gparent); if (tmp) rb_set_parent_color(tmp, gparent, RB_BLACK); __rb_rotate_set_parents(gparent, parent, root, RB_RED); @@ -223,8 +265,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * Sl Sr N Sl */ - parent->rb_right = tmp1 = sibling->rb_left; - sibling->rb_left = parent; + tmp1 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp1); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -268,15 +311,31 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * * (p) (p) * / \ / \ - * N S --> N Sl + * N S --> N sl * / \ \ - * sl Sr s + * sl Sr S * \ * Sr + * + * Note: p might be red, and then both + * p and sl are red after rotation(which + * breaks property 4). This is fixed in + * Case 4 (in __rb_rotate_set_parents() + * which set sl the color of p + * and set p RB_BLACK) + * + * (p) (sl) + * / \ / \ + * N sl --> P S + * \ / \ + * S N Sr + * \ + * Sr */ - sibling->rb_left = tmp1 = tmp2->rb_right; - tmp2->rb_right = sibling; - parent->rb_right = tmp2; + tmp1 = tmp2->rb_right; + WRITE_ONCE(sibling->rb_left, tmp1); + WRITE_ONCE(tmp2->rb_right, sibling); + WRITE_ONCE(parent->rb_right, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -296,8 +355,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, * / \ / \ * (sl) sr N (sl) */ - parent->rb_right = tmp2 = sibling->rb_left; - sibling->rb_left = parent; + tmp2 = sibling->rb_left; + WRITE_ONCE(parent->rb_right, tmp2); + WRITE_ONCE(sibling->rb_left, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -309,8 +369,9 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, sibling = parent->rb_left; if (rb_is_red(sibling)) { /* Case 1 - right rotate at parent */ - parent->rb_left = tmp1 = sibling->rb_right; - sibling->rb_right = parent; + tmp1 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp1); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, parent, RB_BLACK); __rb_rotate_set_parents(parent, sibling, root, RB_RED); @@ -334,10 +395,11 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, } break; } - /* Case 3 - right rotate at sibling */ - sibling->rb_right = tmp1 = tmp2->rb_left; - tmp2->rb_left = sibling; - parent->rb_left = tmp2; + /* Case 3 - left rotate at sibling */ + tmp1 = tmp2->rb_left; + WRITE_ONCE(sibling->rb_right, tmp1); + WRITE_ONCE(tmp2->rb_left, sibling); + WRITE_ONCE(parent->rb_left, tmp2); if (tmp1) rb_set_parent_color(tmp1, sibling, RB_BLACK); @@ -345,9 +407,10 @@ ____rb_erase_color(struct rb_node *parent, struct rb_root *root, tmp1 = sibling; sibling = tmp2; } - /* Case 4 - left rotate at parent + color flips */ - parent->rb_left = tmp2 = sibling->rb_right; - sibling->rb_right = parent; + /* Case 4 - right rotate at parent + color flips */ + tmp2 = sibling->rb_right; + WRITE_ONCE(parent->rb_left, tmp2); + WRITE_ONCE(sibling->rb_right, parent); rb_set_parent_color(tmp1, sibling, RB_BLACK); if (tmp2) rb_set_parent(tmp2, parent); @@ -378,22 +441,41 @@ static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} static const struct rb_augment_callbacks dummy_callbacks = { - dummy_propagate, dummy_copy, dummy_rotate + .propagate = dummy_propagate, + .copy = dummy_copy, + .rotate = dummy_rotate }; void rb_insert_color(struct rb_node *node, struct rb_root *root) { - __rb_insert(node, root, dummy_rotate); + __rb_insert(node, root, false, NULL, dummy_rotate); } void rb_erase(struct rb_node *node, struct rb_root *root) { struct rb_node *rebalance; - rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + rebalance = __rb_erase_augmented(node, root, + NULL, &dummy_callbacks); if (rebalance) ____rb_erase_color(rebalance, root, dummy_rotate); } +void rb_insert_color_cached(struct rb_node *node, + struct rb_root_cached *root, bool leftmost) +{ + __rb_insert(node, &root->rb_root, leftmost, + &root->rb_leftmost, dummy_rotate); +} + +void rb_erase_cached(struct rb_node *node, struct rb_root_cached *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, &root->rb_root, + &root->rb_leftmost, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, &root->rb_root, dummy_rotate); +} + /* * Augmented rbtree manipulation functions. * @@ -402,9 +484,10 @@ void rb_erase(struct rb_node *node, struct rb_root *root) */ void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + bool newleft, struct rb_node **leftmost, void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) { - __rb_insert(node, root, augment_rotate); + __rb_insert(node, root, newleft, leftmost, augment_rotate); } /* @@ -498,15 +581,24 @@ void rb_replace_node(struct rb_node *victim, struct rb_node *new, { struct rb_node *parent = rb_parent(victim); + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; + /* Set the surrounding nodes to point to the replacement */ - __rb_change_child(victim, new, parent, root); if (victim->rb_left) rb_set_parent(victim->rb_left, new); if (victim->rb_right) rb_set_parent(victim->rb_right, new); + __rb_change_child(victim, new, parent, root); +} - /* Copy the pointers/colour from the victim to the replacement */ - *new = *victim; +void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root) +{ + rb_replace_node(victim, new, &root->rb_root); + + if (root->rb_leftmost == victim) + root->rb_leftmost = new; } static struct rb_node *rb_left_deepest_node(const struct rb_node *node) diff --git a/tools/lib/traceevent/event-parse-api.c b/tools/lib/traceevent/event-parse-api.c index 8b31c0e00ba3..988587840c80 100644 --- a/tools/lib/traceevent/event-parse-api.c +++ b/tools/lib/traceevent/event-parse-api.c @@ -9,6 +9,22 @@ #include "event-utils.h" /** + * tep_get_event - returns the event with the given index + * @tep: a handle to the tep_handle + * @index: index of the requested event, in the range 0 .. nr_events + * + * This returns pointer to the element of the events array with the given index + * If @tep is NULL, or @index is not in the range 0 .. nr_events, NULL is returned. + */ +struct tep_event *tep_get_event(struct tep_handle *tep, int index) +{ + if (tep && tep->events && index < tep->nr_events) + return tep->events[index]; + + return NULL; +} + +/** * tep_get_first_event - returns the first event in the events array * @tep: a handle to the tep_handle * @@ -17,10 +33,7 @@ */ struct tep_event *tep_get_first_event(struct tep_handle *tep) { - if (tep && tep->events) - return tep->events[0]; - - return NULL; + return tep_get_event(tep, 0); } /** @@ -32,7 +45,7 @@ struct tep_event *tep_get_first_event(struct tep_handle *tep) */ int tep_get_events_count(struct tep_handle *tep) { - if(tep) + if (tep) return tep->nr_events; return 0; } @@ -43,19 +56,47 @@ int tep_get_events_count(struct tep_handle *tep) * @flag: flag, or combination of flags to be set * can be any combination from enum tep_flag * - * This sets a flag or mbination of flags from enum tep_flag - */ + * This sets a flag or combination of flags from enum tep_flag + */ void tep_set_flag(struct tep_handle *tep, int flag) { - if(tep) + if (tep) tep->flags |= flag; } -unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data) +/** + * tep_clear_flag - clear event parser flag + * @tep: a handle to the tep_handle + * @flag: flag to be cleared + * + * This clears a tep flag + */ +void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag) +{ + if (tep) + tep->flags &= ~flag; +} + +/** + * tep_test_flag - check the state of event parser flag + * @tep: a handle to the tep_handle + * @flag: flag to be checked + * + * This returns the state of the requested tep flag. + * Returns: true if the flag is set, false otherwise. + */ +bool tep_test_flag(struct tep_handle *tep, enum tep_flag flag) +{ + if (tep) + return tep->flags & flag; + return false; +} + +unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data) { unsigned short swap; - if (!pevent || pevent->host_bigendian == pevent->file_bigendian) + if (!tep || tep->host_bigendian == tep->file_bigendian) return data; swap = ((data & 0xffULL) << 8) | @@ -64,11 +105,11 @@ unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data) return swap; } -unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data) +unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data) { unsigned int swap; - if (!pevent || pevent->host_bigendian == pevent->file_bigendian) + if (!tep || tep->host_bigendian == tep->file_bigendian) return data; swap = ((data & 0xffULL) << 24) | @@ -80,11 +121,11 @@ unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data) } unsigned long long -tep_data2host8(struct tep_handle *pevent, unsigned long long data) +tep_data2host8(struct tep_handle *tep, unsigned long long data) { unsigned long long swap; - if (!pevent || pevent->host_bigendian == pevent->file_bigendian) + if (!tep || tep->host_bigendian == tep->file_bigendian) return data; swap = ((data & 0xffULL) << 56) | @@ -101,175 +142,232 @@ tep_data2host8(struct tep_handle *pevent, unsigned long long data) /** * tep_get_header_page_size - get size of the header page - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * * This returns size of the header page - * If @pevent is NULL, 0 is returned. + * If @tep is NULL, 0 is returned. + */ +int tep_get_header_page_size(struct tep_handle *tep) +{ + if (tep) + return tep->header_page_size_size; + return 0; +} + +/** + * tep_get_header_timestamp_size - get size of the timestamp in the header page + * @tep: a handle to the tep_handle + * + * This returns size of the timestamp in the header page + * If @tep is NULL, 0 is returned. */ -int tep_get_header_page_size(struct tep_handle *pevent) +int tep_get_header_timestamp_size(struct tep_handle *tep) { - if(pevent) - return pevent->header_page_size_size; + if (tep) + return tep->header_page_ts_size; return 0; } /** * tep_get_cpus - get the number of CPUs - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * * This returns the number of CPUs - * If @pevent is NULL, 0 is returned. + * If @tep is NULL, 0 is returned. */ -int tep_get_cpus(struct tep_handle *pevent) +int tep_get_cpus(struct tep_handle *tep) { - if(pevent) - return pevent->cpus; + if (tep) + return tep->cpus; return 0; } /** * tep_set_cpus - set the number of CPUs - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * * This sets the number of CPUs */ -void tep_set_cpus(struct tep_handle *pevent, int cpus) +void tep_set_cpus(struct tep_handle *tep, int cpus) { - if(pevent) - pevent->cpus = cpus; + if (tep) + tep->cpus = cpus; } /** - * tep_get_long_size - get the size of a long integer on the current machine - * @pevent: a handle to the tep_handle + * tep_get_long_size - get the size of a long integer on the traced machine + * @tep: a handle to the tep_handle * - * This returns the size of a long integer on the current machine - * If @pevent is NULL, 0 is returned. + * This returns the size of a long integer on the traced machine + * If @tep is NULL, 0 is returned. */ -int tep_get_long_size(struct tep_handle *pevent) +int tep_get_long_size(struct tep_handle *tep) { - if(pevent) - return pevent->long_size; + if (tep) + return tep->long_size; return 0; } /** - * tep_set_long_size - set the size of a long integer on the current machine - * @pevent: a handle to the tep_handle + * tep_set_long_size - set the size of a long integer on the traced machine + * @tep: a handle to the tep_handle * @size: size, in bytes, of a long integer * - * This sets the size of a long integer on the current machine + * This sets the size of a long integer on the traced machine */ -void tep_set_long_size(struct tep_handle *pevent, int long_size) +void tep_set_long_size(struct tep_handle *tep, int long_size) { - if(pevent) - pevent->long_size = long_size; + if (tep) + tep->long_size = long_size; } /** - * tep_get_page_size - get the size of a memory page on the current machine - * @pevent: a handle to the tep_handle + * tep_get_page_size - get the size of a memory page on the traced machine + * @tep: a handle to the tep_handle * - * This returns the size of a memory page on the current machine - * If @pevent is NULL, 0 is returned. + * This returns the size of a memory page on the traced machine + * If @tep is NULL, 0 is returned. */ -int tep_get_page_size(struct tep_handle *pevent) +int tep_get_page_size(struct tep_handle *tep) { - if(pevent) - return pevent->page_size; + if (tep) + return tep->page_size; return 0; } /** - * tep_set_page_size - set the size of a memory page on the current machine - * @pevent: a handle to the tep_handle + * tep_set_page_size - set the size of a memory page on the traced machine + * @tep: a handle to the tep_handle * @_page_size: size of a memory page, in bytes * - * This sets the size of a memory page on the current machine + * This sets the size of a memory page on the traced machine */ -void tep_set_page_size(struct tep_handle *pevent, int _page_size) +void tep_set_page_size(struct tep_handle *tep, int _page_size) { - if(pevent) - pevent->page_size = _page_size; + if (tep) + tep->page_size = _page_size; } /** - * tep_is_file_bigendian - get if the file is in big endian order - * @pevent: a handle to the tep_handle + * tep_is_file_bigendian - return the endian of the file + * @tep: a handle to the tep_handle * - * This returns if the file is in big endian order - * If @pevent is NULL, 0 is returned. + * This returns true if the file is in big endian order + * If @tep is NULL, false is returned. */ -int tep_is_file_bigendian(struct tep_handle *pevent) +bool tep_is_file_bigendian(struct tep_handle *tep) { - if(pevent) - return pevent->file_bigendian; - return 0; + if (tep) + return (tep->file_bigendian == TEP_BIG_ENDIAN); + return false; } /** * tep_set_file_bigendian - set if the file is in big endian order - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * @endian: non zero, if the file is in big endian order * * This sets if the file is in big endian order */ -void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian) +void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian) { - if(pevent) - pevent->file_bigendian = endian; + if (tep) + tep->file_bigendian = endian; } /** - * tep_is_host_bigendian - get if the order of the current host is big endian - * @pevent: a handle to the tep_handle + * tep_is_local_bigendian - return the endian of the saved local machine + * @tep: a handle to the tep_handle * - * This gets if the order of the current host is big endian - * If @pevent is NULL, 0 is returned. + * This returns true if the saved local machine in @tep is big endian. + * If @tep is NULL, false is returned. */ -int tep_is_host_bigendian(struct tep_handle *pevent) +bool tep_is_local_bigendian(struct tep_handle *tep) { - if(pevent) - return pevent->host_bigendian; + if (tep) + return (tep->host_bigendian == TEP_BIG_ENDIAN); return 0; } /** - * tep_set_host_bigendian - set the order of the local host - * @pevent: a handle to the tep_handle + * tep_set_local_bigendian - set the stored local machine endian order + * @tep: a handle to the tep_handle * @endian: non zero, if the local host has big endian order * - * This sets the order of the local host + * This sets the endian order for the local machine. */ -void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian) +void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian) { - if(pevent) - pevent->host_bigendian = endian; + if (tep) + tep->host_bigendian = endian; } /** * tep_is_latency_format - get if the latency output format is configured - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * - * This gets if the latency output format is configured - * If @pevent is NULL, 0 is returned. + * This returns true if the latency output format is configured + * If @tep is NULL, false is returned. */ -int tep_is_latency_format(struct tep_handle *pevent) +bool tep_is_latency_format(struct tep_handle *tep) { - if(pevent) - return pevent->latency_format; - return 0; + if (tep) + return (tep->latency_format); + return false; } /** * tep_set_latency_format - set the latency output format - * @pevent: a handle to the tep_handle + * @tep: a handle to the tep_handle * @lat: non zero for latency output format * * This sets the latency output format */ -void tep_set_latency_format(struct tep_handle *pevent, int lat) +void tep_set_latency_format(struct tep_handle *tep, int lat) +{ + if (tep) + tep->latency_format = lat; +} + +/** + * tep_is_old_format - get if an old kernel is used + * @tep: a handle to the tep_handle + * + * This returns true, if an old kernel is used to generate the tracing events or + * false if a new kernel is used. Old kernels did not have header page info. + * If @tep is NULL, false is returned. + */ +bool tep_is_old_format(struct tep_handle *tep) +{ + if (tep) + return tep->old_format; + return false; +} + +/** + * tep_set_print_raw - set a flag to force print in raw format + * @tep: a handle to the tep_handle + * @print_raw: the new value of the print_raw flag + * + * This sets a flag to force print in raw format + */ +void tep_set_print_raw(struct tep_handle *tep, int print_raw) +{ + if (tep) + tep->print_raw = print_raw; +} + +/** + * tep_set_test_filters - set a flag to test a filter string + * @tep: a handle to the tep_handle + * @test_filters: the new value of the test_filters flag + * + * This sets a flag to test a filter string. If this flag is set, when + * tep_filter_add_filter_str() API as called,it will print the filter string + * instead of adding it. + */ +void tep_set_test_filters(struct tep_handle *tep, int test_filters) { - if(pevent) - pevent->latency_format = lat; + if (tep) + tep->test_filters = test_filters; } diff --git a/tools/lib/traceevent/event-parse-local.h b/tools/lib/traceevent/event-parse-local.h index 9a092dd4a86d..09aa142f7fdd 100644 --- a/tools/lib/traceevent/event-parse-local.h +++ b/tools/lib/traceevent/event-parse-local.h @@ -7,7 +7,7 @@ #ifndef _PARSE_EVENTS_INT_H #define _PARSE_EVENTS_INT_H -struct cmdline; +struct tep_cmdline; struct cmdline_list; struct func_map; struct func_list; @@ -36,7 +36,7 @@ struct tep_handle { int long_size; int page_size; - struct cmdline *cmdlines; + struct tep_cmdline *cmdlines; struct cmdline_list *cmdlist; int cmdline_count; @@ -92,8 +92,8 @@ struct tep_handle { void tep_free_event(struct tep_event *event); void tep_free_format_field(struct tep_format_field *field); -unsigned short tep_data2host2(struct tep_handle *pevent, unsigned short data); -unsigned int tep_data2host4(struct tep_handle *pevent, unsigned int data); -unsigned long long tep_data2host8(struct tep_handle *pevent, unsigned long long data); +unsigned short tep_data2host2(struct tep_handle *tep, unsigned short data); +unsigned int tep_data2host4(struct tep_handle *tep, unsigned int data); +unsigned long long tep_data2host8(struct tep_handle *tep, unsigned long long data); #endif /* _PARSE_EVENTS_INT_H */ diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index 69a96e39f0ab..b36b536a9fcb 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -124,15 +124,15 @@ struct tep_print_arg *alloc_arg(void) return calloc(1, sizeof(struct tep_print_arg)); } -struct cmdline { +struct tep_cmdline { char *comm; int pid; }; static int cmdline_cmp(const void *a, const void *b) { - const struct cmdline *ca = a; - const struct cmdline *cb = b; + const struct tep_cmdline *ca = a; + const struct tep_cmdline *cb = b; if (ca->pid < cb->pid) return -1; @@ -148,14 +148,14 @@ struct cmdline_list { int pid; }; -static int cmdline_init(struct tep_handle *pevent) +static int cmdline_init(struct tep_handle *tep) { - struct cmdline_list *cmdlist = pevent->cmdlist; + struct cmdline_list *cmdlist = tep->cmdlist; struct cmdline_list *item; - struct cmdline *cmdlines; + struct tep_cmdline *cmdlines; int i; - cmdlines = malloc(sizeof(*cmdlines) * pevent->cmdline_count); + cmdlines = malloc(sizeof(*cmdlines) * tep->cmdline_count); if (!cmdlines) return -1; @@ -169,29 +169,29 @@ static int cmdline_init(struct tep_handle *pevent) free(item); } - qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - pevent->cmdlines = cmdlines; - pevent->cmdlist = NULL; + tep->cmdlines = cmdlines; + tep->cmdlist = NULL; return 0; } -static const char *find_cmdline(struct tep_handle *pevent, int pid) +static const char *find_cmdline(struct tep_handle *tep, int pid) { - const struct cmdline *comm; - struct cmdline key; + const struct tep_cmdline *comm; + struct tep_cmdline key; if (!pid) return "<idle>"; - if (!pevent->cmdlines && cmdline_init(pevent)) + if (!tep->cmdlines && cmdline_init(tep)) return "<not enough memory for cmdlines!>"; key.pid = pid; - comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, - sizeof(*pevent->cmdlines), cmdline_cmp); + comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, + sizeof(*tep->cmdlines), cmdline_cmp); if (comm) return comm->comm; @@ -199,32 +199,32 @@ static const char *find_cmdline(struct tep_handle *pevent, int pid) } /** - * tep_pid_is_registered - return if a pid has a cmdline registered - * @pevent: handle for the pevent + * tep_is_pid_registered - return if a pid has a cmdline registered + * @tep: a handle to the trace event parser context * @pid: The pid to check if it has a cmdline registered with. * - * Returns 1 if the pid has a cmdline mapped to it - * 0 otherwise. + * Returns true if the pid has a cmdline mapped to it + * false otherwise. */ -int tep_pid_is_registered(struct tep_handle *pevent, int pid) +bool tep_is_pid_registered(struct tep_handle *tep, int pid) { - const struct cmdline *comm; - struct cmdline key; + const struct tep_cmdline *comm; + struct tep_cmdline key; if (!pid) - return 1; + return true; - if (!pevent->cmdlines && cmdline_init(pevent)) - return 0; + if (!tep->cmdlines && cmdline_init(tep)) + return false; key.pid = pid; - comm = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, - sizeof(*pevent->cmdlines), cmdline_cmp); + comm = bsearch(&key, tep->cmdlines, tep->cmdline_count, + sizeof(*tep->cmdlines), cmdline_cmp); if (comm) - return 1; - return 0; + return true; + return false; } /* @@ -232,11 +232,13 @@ int tep_pid_is_registered(struct tep_handle *pevent, int pid) * we must add this pid. This is much slower than when cmdlines * are added before the array is initialized. */ -static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) +static int add_new_comm(struct tep_handle *tep, + const char *comm, int pid, bool override) { - struct cmdline *cmdlines = pevent->cmdlines; - const struct cmdline *cmdline; - struct cmdline key; + struct tep_cmdline *cmdlines = tep->cmdlines; + struct tep_cmdline *cmdline; + struct tep_cmdline key; + char *new_comm; if (!pid) return 0; @@ -244,52 +246,55 @@ static int add_new_comm(struct tep_handle *pevent, const char *comm, int pid) /* avoid duplicates */ key.pid = pid; - cmdline = bsearch(&key, pevent->cmdlines, pevent->cmdline_count, - sizeof(*pevent->cmdlines), cmdline_cmp); + cmdline = bsearch(&key, tep->cmdlines, tep->cmdline_count, + sizeof(*tep->cmdlines), cmdline_cmp); if (cmdline) { - errno = EEXIST; - return -1; + if (!override) { + errno = EEXIST; + return -1; + } + new_comm = strdup(comm); + if (!new_comm) { + errno = ENOMEM; + return -1; + } + free(cmdline->comm); + cmdline->comm = new_comm; + + return 0; } - cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (pevent->cmdline_count + 1)); + cmdlines = realloc(cmdlines, sizeof(*cmdlines) * (tep->cmdline_count + 1)); if (!cmdlines) { errno = ENOMEM; return -1; } - cmdlines[pevent->cmdline_count].comm = strdup(comm); - if (!cmdlines[pevent->cmdline_count].comm) { + cmdlines[tep->cmdline_count].comm = strdup(comm); + if (!cmdlines[tep->cmdline_count].comm) { free(cmdlines); errno = ENOMEM; return -1; } - cmdlines[pevent->cmdline_count].pid = pid; + cmdlines[tep->cmdline_count].pid = pid; - if (cmdlines[pevent->cmdline_count].comm) - pevent->cmdline_count++; + if (cmdlines[tep->cmdline_count].comm) + tep->cmdline_count++; - qsort(cmdlines, pevent->cmdline_count, sizeof(*cmdlines), cmdline_cmp); - pevent->cmdlines = cmdlines; + qsort(cmdlines, tep->cmdline_count, sizeof(*cmdlines), cmdline_cmp); + tep->cmdlines = cmdlines; return 0; } -/** - * tep_register_comm - register a pid / comm mapping - * @pevent: handle for the pevent - * @comm: the command line to register - * @pid: the pid to map the command line to - * - * This adds a mapping to search for command line names with - * a given pid. The comm is duplicated. - */ -int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) +static int _tep_register_comm(struct tep_handle *tep, + const char *comm, int pid, bool override) { struct cmdline_list *item; - if (pevent->cmdlines) - return add_new_comm(pevent, comm, pid); + if (tep->cmdlines) + return add_new_comm(tep, comm, pid, override); item = malloc(sizeof(*item)); if (!item) @@ -304,18 +309,52 @@ int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid) return -1; } item->pid = pid; - item->next = pevent->cmdlist; + item->next = tep->cmdlist; - pevent->cmdlist = item; - pevent->cmdline_count++; + tep->cmdlist = item; + tep->cmdline_count++; return 0; } -int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock) +/** + * tep_register_comm - register a pid / comm mapping + * @tep: a handle to the trace event parser context + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. If a command with the same pid + * already exist, -1 is returned and errno is set to EEXIST + */ +int tep_register_comm(struct tep_handle *tep, const char *comm, int pid) +{ + return _tep_register_comm(tep, comm, pid, false); +} + +/** + * tep_override_comm - register a pid / comm mapping + * @tep: a handle to the trace event parser context + * @comm: the command line to register + * @pid: the pid to map the command line to + * + * This adds a mapping to search for command line names with + * a given pid. The comm is duplicated. If a command with the same pid + * already exist, the command string is udapted with the new one + */ +int tep_override_comm(struct tep_handle *tep, const char *comm, int pid) { - pevent->trace_clock = strdup(trace_clock); - if (!pevent->trace_clock) { + if (!tep->cmdlines && cmdline_init(tep)) { + errno = ENOMEM; + return -1; + } + return _tep_register_comm(tep, comm, pid, true); +} + +int tep_register_trace_clock(struct tep_handle *tep, const char *trace_clock) +{ + tep->trace_clock = strdup(trace_clock); + if (!tep->trace_clock) { errno = ENOMEM; return -1; } @@ -369,18 +408,18 @@ static int func_bcmp(const void *a, const void *b) return 1; } -static int func_map_init(struct tep_handle *pevent) +static int func_map_init(struct tep_handle *tep) { struct func_list *funclist; struct func_list *item; struct func_map *func_map; int i; - func_map = malloc(sizeof(*func_map) * (pevent->func_count + 1)); + func_map = malloc(sizeof(*func_map) * (tep->func_count + 1)); if (!func_map) return -1; - funclist = pevent->funclist; + funclist = tep->funclist; i = 0; while (funclist) { @@ -393,34 +432,34 @@ static int func_map_init(struct tep_handle *pevent) free(item); } - qsort(func_map, pevent->func_count, sizeof(*func_map), func_cmp); + qsort(func_map, tep->func_count, sizeof(*func_map), func_cmp); /* * Add a special record at the end. */ - func_map[pevent->func_count].func = NULL; - func_map[pevent->func_count].addr = 0; - func_map[pevent->func_count].mod = NULL; + func_map[tep->func_count].func = NULL; + func_map[tep->func_count].addr = 0; + func_map[tep->func_count].mod = NULL; - pevent->func_map = func_map; - pevent->funclist = NULL; + tep->func_map = func_map; + tep->funclist = NULL; return 0; } static struct func_map * -__find_func(struct tep_handle *pevent, unsigned long long addr) +__find_func(struct tep_handle *tep, unsigned long long addr) { struct func_map *func; struct func_map key; - if (!pevent->func_map) - func_map_init(pevent); + if (!tep->func_map) + func_map_init(tep); key.addr = addr; - func = bsearch(&key, pevent->func_map, pevent->func_count, - sizeof(*pevent->func_map), func_bcmp); + func = bsearch(&key, tep->func_map, tep->func_count, + sizeof(*tep->func_map), func_bcmp); return func; } @@ -433,15 +472,14 @@ struct func_resolver { /** * tep_set_function_resolver - set an alternative function resolver - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @resolver: function to be used * @priv: resolver function private state. * * Some tools may have already a way to resolve kernel functions, allow them to - * keep using it instead of duplicating all the entries inside - * pevent->funclist. + * keep using it instead of duplicating all the entries inside tep->funclist. */ -int tep_set_function_resolver(struct tep_handle *pevent, +int tep_set_function_resolver(struct tep_handle *tep, tep_func_resolver_t *func, void *priv) { struct func_resolver *resolver = malloc(sizeof(*resolver)); @@ -452,38 +490,38 @@ int tep_set_function_resolver(struct tep_handle *pevent, resolver->func = func; resolver->priv = priv; - free(pevent->func_resolver); - pevent->func_resolver = resolver; + free(tep->func_resolver); + tep->func_resolver = resolver; return 0; } /** * tep_reset_function_resolver - reset alternative function resolver - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * * Stop using whatever alternative resolver was set, use the default * one instead. */ -void tep_reset_function_resolver(struct tep_handle *pevent) +void tep_reset_function_resolver(struct tep_handle *tep) { - free(pevent->func_resolver); - pevent->func_resolver = NULL; + free(tep->func_resolver); + tep->func_resolver = NULL; } static struct func_map * -find_func(struct tep_handle *pevent, unsigned long long addr) +find_func(struct tep_handle *tep, unsigned long long addr) { struct func_map *map; - if (!pevent->func_resolver) - return __find_func(pevent, addr); + if (!tep->func_resolver) + return __find_func(tep, addr); - map = &pevent->func_resolver->map; + map = &tep->func_resolver->map; map->mod = NULL; map->addr = addr; - map->func = pevent->func_resolver->func(pevent->func_resolver->priv, - &map->addr, &map->mod); + map->func = tep->func_resolver->func(tep->func_resolver->priv, + &map->addr, &map->mod); if (map->func == NULL) return NULL; @@ -492,18 +530,18 @@ find_func(struct tep_handle *pevent, unsigned long long addr) /** * tep_find_function - find a function by a given address - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @addr: the address to find the function with * * Returns a pointer to the function stored that has the given * address. Note, the address does not have to be exact, it * will select the function that would contain the address. */ -const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr) +const char *tep_find_function(struct tep_handle *tep, unsigned long long addr) { struct func_map *map; - map = find_func(pevent, addr); + map = find_func(tep, addr); if (!map) return NULL; @@ -512,7 +550,7 @@ const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr /** * tep_find_function_address - find a function address by a given address - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @addr: the address to find the function with * * Returns the address the function starts at. This can be used in @@ -520,11 +558,11 @@ const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr * name and the function offset. */ unsigned long long -tep_find_function_address(struct tep_handle *pevent, unsigned long long addr) +tep_find_function_address(struct tep_handle *tep, unsigned long long addr) { struct func_map *map; - map = find_func(pevent, addr); + map = find_func(tep, addr); if (!map) return 0; @@ -533,7 +571,7 @@ tep_find_function_address(struct tep_handle *pevent, unsigned long long addr) /** * tep_register_function - register a function with a given address - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @function: the function name to register * @addr: the address the function starts at * @mod: the kernel module the function may be in (NULL for none) @@ -541,7 +579,7 @@ tep_find_function_address(struct tep_handle *pevent, unsigned long long addr) * This registers a function name with an address and module. * The @func passed in is duplicated. */ -int tep_register_function(struct tep_handle *pevent, char *func, +int tep_register_function(struct tep_handle *tep, char *func, unsigned long long addr, char *mod) { struct func_list *item = malloc(sizeof(*item)); @@ -549,7 +587,7 @@ int tep_register_function(struct tep_handle *pevent, char *func, if (!item) return -1; - item->next = pevent->funclist; + item->next = tep->funclist; item->func = strdup(func); if (!item->func) goto out_free; @@ -562,8 +600,8 @@ int tep_register_function(struct tep_handle *pevent, char *func, item->mod = NULL; item->addr = addr; - pevent->funclist = item; - pevent->func_count++; + tep->funclist = item; + tep->func_count++; return 0; @@ -578,23 +616,23 @@ out_free: /** * tep_print_funcs - print out the stored functions - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * * This prints out the stored functions. */ -void tep_print_funcs(struct tep_handle *pevent) +void tep_print_funcs(struct tep_handle *tep) { int i; - if (!pevent->func_map) - func_map_init(pevent); + if (!tep->func_map) + func_map_init(tep); - for (i = 0; i < (int)pevent->func_count; i++) { + for (i = 0; i < (int)tep->func_count; i++) { printf("%016llx %s", - pevent->func_map[i].addr, - pevent->func_map[i].func); - if (pevent->func_map[i].mod) - printf(" [%s]\n", pevent->func_map[i].mod); + tep->func_map[i].addr, + tep->func_map[i].func); + if (tep->func_map[i].mod) + printf(" [%s]\n", tep->func_map[i].mod); else printf("\n"); } @@ -624,18 +662,18 @@ static int printk_cmp(const void *a, const void *b) return 0; } -static int printk_map_init(struct tep_handle *pevent) +static int printk_map_init(struct tep_handle *tep) { struct printk_list *printklist; struct printk_list *item; struct printk_map *printk_map; int i; - printk_map = malloc(sizeof(*printk_map) * (pevent->printk_count + 1)); + printk_map = malloc(sizeof(*printk_map) * (tep->printk_count + 1)); if (!printk_map) return -1; - printklist = pevent->printklist; + printklist = tep->printklist; i = 0; while (printklist) { @@ -647,41 +685,41 @@ static int printk_map_init(struct tep_handle *pevent) free(item); } - qsort(printk_map, pevent->printk_count, sizeof(*printk_map), printk_cmp); + qsort(printk_map, tep->printk_count, sizeof(*printk_map), printk_cmp); - pevent->printk_map = printk_map; - pevent->printklist = NULL; + tep->printk_map = printk_map; + tep->printklist = NULL; return 0; } static struct printk_map * -find_printk(struct tep_handle *pevent, unsigned long long addr) +find_printk(struct tep_handle *tep, unsigned long long addr) { struct printk_map *printk; struct printk_map key; - if (!pevent->printk_map && printk_map_init(pevent)) + if (!tep->printk_map && printk_map_init(tep)) return NULL; key.addr = addr; - printk = bsearch(&key, pevent->printk_map, pevent->printk_count, - sizeof(*pevent->printk_map), printk_cmp); + printk = bsearch(&key, tep->printk_map, tep->printk_count, + sizeof(*tep->printk_map), printk_cmp); return printk; } /** * tep_register_print_string - register a string by its address - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @fmt: the string format to register * @addr: the address the string was located at * * This registers a string by the address it was stored in the kernel. * The @fmt passed in is duplicated. */ -int tep_register_print_string(struct tep_handle *pevent, const char *fmt, +int tep_register_print_string(struct tep_handle *tep, const char *fmt, unsigned long long addr) { struct printk_list *item = malloc(sizeof(*item)); @@ -690,7 +728,7 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt, if (!item) return -1; - item->next = pevent->printklist; + item->next = tep->printklist; item->addr = addr; /* Strip off quotes and '\n' from the end */ @@ -708,8 +746,8 @@ int tep_register_print_string(struct tep_handle *pevent, const char *fmt, if (strcmp(p, "\\n") == 0) *p = 0; - pevent->printklist = item; - pevent->printk_count++; + tep->printklist = item; + tep->printk_count++; return 0; @@ -721,21 +759,21 @@ out_free: /** * tep_print_printk - print out the stored strings - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * * This prints the string formats that were stored. */ -void tep_print_printk(struct tep_handle *pevent) +void tep_print_printk(struct tep_handle *tep) { int i; - if (!pevent->printk_map) - printk_map_init(pevent); + if (!tep->printk_map) + printk_map_init(tep); - for (i = 0; i < (int)pevent->printk_count; i++) { + for (i = 0; i < (int)tep->printk_count; i++) { printf("%016llx %s\n", - pevent->printk_map[i].addr, - pevent->printk_map[i].printk); + tep->printk_map[i].addr, + tep->printk_map[i].printk); } } @@ -744,29 +782,29 @@ static struct tep_event *alloc_event(void) return calloc(1, sizeof(struct tep_event)); } -static int add_event(struct tep_handle *pevent, struct tep_event *event) +static int add_event(struct tep_handle *tep, struct tep_event *event) { int i; - struct tep_event **events = realloc(pevent->events, sizeof(event) * - (pevent->nr_events + 1)); + struct tep_event **events = realloc(tep->events, sizeof(event) * + (tep->nr_events + 1)); if (!events) return -1; - pevent->events = events; + tep->events = events; - for (i = 0; i < pevent->nr_events; i++) { - if (pevent->events[i]->id > event->id) + for (i = 0; i < tep->nr_events; i++) { + if (tep->events[i]->id > event->id) break; } - if (i < pevent->nr_events) - memmove(&pevent->events[i + 1], - &pevent->events[i], - sizeof(event) * (pevent->nr_events - i)); + if (i < tep->nr_events) + memmove(&tep->events[i + 1], + &tep->events[i], + sizeof(event) * (tep->nr_events - i)); - pevent->events[i] = event; - pevent->nr_events++; + tep->events[i] = event; + tep->nr_events++; - event->pevent = pevent; + event->tep = tep; return 0; } @@ -1145,7 +1183,7 @@ static enum tep_event_type read_token(char **tok) } /** - * tep_read_token - access to utilities to use the pevent parser + * tep_read_token - access to utilities to use the tep parser * @tok: The token to return * * This will parse tokens from the string given by @@ -1618,8 +1656,8 @@ static int event_read_fields(struct tep_event *event, struct tep_format_field ** else if (field->flags & TEP_FIELD_IS_STRING) field->elementsize = 1; else if (field->flags & TEP_FIELD_IS_LONG) - field->elementsize = event->pevent ? - event->pevent->long_size : + field->elementsize = event->tep ? + event->tep->long_size : sizeof(long); } else field->elementsize = field->size; @@ -2194,7 +2232,7 @@ eval_type_str(unsigned long long val, const char *type, int pointer) return val & 0xffffffff; if (strcmp(type, "u64") == 0 || - strcmp(type, "s64")) + strcmp(type, "s64") == 0) return val; if (strcmp(type, "s8") == 0) @@ -2418,7 +2456,7 @@ static int arg_num_eval(struct tep_print_arg *arg, long long *val) static char *arg_eval (struct tep_print_arg *arg) { long long val; - static char buf[20]; + static char buf[24]; switch (arg->type) { case TEP_PRINT_ATOM: @@ -2903,14 +2941,14 @@ process_bitmask(struct tep_event *event __maybe_unused, struct tep_print_arg *ar } static struct tep_function_handler * -find_func_handler(struct tep_handle *pevent, char *func_name) +find_func_handler(struct tep_handle *tep, char *func_name) { struct tep_function_handler *func; - if (!pevent) + if (!tep) return NULL; - for (func = pevent->func_handlers; func; func = func->next) { + for (func = tep->func_handlers; func; func = func->next) { if (strcmp(func->name, func_name) == 0) break; } @@ -2918,12 +2956,12 @@ find_func_handler(struct tep_handle *pevent, char *func_name) return func; } -static void remove_func_handler(struct tep_handle *pevent, char *func_name) +static void remove_func_handler(struct tep_handle *tep, char *func_name) { struct tep_function_handler *func; struct tep_function_handler **next; - next = &pevent->func_handlers; + next = &tep->func_handlers; while ((func = *next)) { if (strcmp(func->name, func_name) == 0) { *next = func->next; @@ -3037,7 +3075,7 @@ process_function(struct tep_event *event, struct tep_print_arg *arg, return process_dynamic_array_len(event, arg, tok); } - func = find_func_handler(event->pevent, token); + func = find_func_handler(event->tep, token); if (func) { free_token(token); return process_func_handler(event, func, arg, tok); @@ -3318,14 +3356,14 @@ tep_find_any_field(struct tep_event *event, const char *name) /** * tep_read_number - read a number from data - * @pevent: handle for the pevent + * @tep: a handle to the trace event parser context * @ptr: the raw data * @size: the size of the data that holds the number * * Returns the number (converted to host) from the * raw data. */ -unsigned long long tep_read_number(struct tep_handle *pevent, +unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size) { unsigned long long val; @@ -3334,12 +3372,12 @@ unsigned long long tep_read_number(struct tep_handle *pevent, case 1: return *(unsigned char *)ptr; case 2: - return tep_data2host2(pevent, *(unsigned short *)ptr); + return tep_data2host2(tep, *(unsigned short *)ptr); case 4: - return tep_data2host4(pevent, *(unsigned int *)ptr); + return tep_data2host4(tep, *(unsigned int *)ptr); case 8: memcpy(&val, (ptr), sizeof(unsigned long long)); - return tep_data2host8(pevent, val); + return tep_data2host8(tep, val); default: /* BUG! */ return 0; @@ -3367,7 +3405,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data, case 2: case 4: case 8: - *value = tep_read_number(field->event->pevent, + *value = tep_read_number(field->event->tep, data + field->offset, field->size); return 0; default: @@ -3375,7 +3413,7 @@ int tep_read_number_field(struct tep_format_field *field, const void *data, } } -static int get_common_info(struct tep_handle *pevent, +static int get_common_info(struct tep_handle *tep, const char *type, int *offset, int *size) { struct tep_event *event; @@ -3385,12 +3423,12 @@ static int get_common_info(struct tep_handle *pevent, * All events should have the same common elements. * Pick any event to find where the type is; */ - if (!pevent->events) { + if (!tep->events) { do_warning("no event_list!"); return -1; } - event = pevent->events[0]; + event = tep->events[0]; field = tep_find_common_field(event, type); if (!field) return -1; @@ -3401,58 +3439,58 @@ static int get_common_info(struct tep_handle *pevent, return 0; } -static int __parse_common(struct tep_handle *pevent, void *data, +static int __parse_common(struct tep_handle *tep, void *data, int *size, int *offset, const char *name) { int ret; if (!*size) { - ret = get_common_info(pevent, name, offset, size); + ret = get_common_info(tep, name, offset, size); if (ret < 0) return ret; } - return tep_read_number(pevent, data + *offset, *size); + return tep_read_number(tep, data + *offset, *size); } -static int trace_parse_common_type(struct tep_handle *pevent, void *data) +static int trace_parse_common_type(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->type_size, &pevent->type_offset, + return __parse_common(tep, data, + &tep->type_size, &tep->type_offset, "common_type"); } -static int parse_common_pid(struct tep_handle *pevent, void *data) +static int parse_common_pid(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->pid_size, &pevent->pid_offset, + return __parse_common(tep, data, + &tep->pid_size, &tep->pid_offset, "common_pid"); } -static int parse_common_pc(struct tep_handle *pevent, void *data) +static int parse_common_pc(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->pc_size, &pevent->pc_offset, + return __parse_common(tep, data, + &tep->pc_size, &tep->pc_offset, "common_preempt_count"); } -static int parse_common_flags(struct tep_handle *pevent, void *data) +static int parse_common_flags(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->flags_size, &pevent->flags_offset, + return __parse_common(tep, data, + &tep->flags_size, &tep->flags_offset, "common_flags"); } -static int parse_common_lock_depth(struct tep_handle *pevent, void *data) +static int parse_common_lock_depth(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->ld_size, &pevent->ld_offset, + return __parse_common(tep, data, + &tep->ld_size, &tep->ld_offset, "common_lock_depth"); } -static int parse_common_migrate_disable(struct tep_handle *pevent, void *data) +static int parse_common_migrate_disable(struct tep_handle *tep, void *data) { - return __parse_common(pevent, data, - &pevent->ld_size, &pevent->ld_offset, + return __parse_common(tep, data, + &tep->ld_size, &tep->ld_offset, "common_migrate_disable"); } @@ -3460,28 +3498,28 @@ static int events_id_cmp(const void *a, const void *b); /** * tep_find_event - find an event by given id - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @id: the id of the event * * Returns an event that has a given @id. */ -struct tep_event *tep_find_event(struct tep_handle *pevent, int id) +struct tep_event *tep_find_event(struct tep_handle *tep, int id) { struct tep_event **eventptr; struct tep_event key; struct tep_event *pkey = &key; /* Check cache first */ - if (pevent->last_event && pevent->last_event->id == id) - return pevent->last_event; + if (tep->last_event && tep->last_event->id == id) + return tep->last_event; key.id = id; - eventptr = bsearch(&pkey, pevent->events, pevent->nr_events, - sizeof(*pevent->events), events_id_cmp); + eventptr = bsearch(&pkey, tep->events, tep->nr_events, + sizeof(*tep->events), events_id_cmp); if (eventptr) { - pevent->last_event = *eventptr; + tep->last_event = *eventptr; return *eventptr; } @@ -3490,7 +3528,7 @@ struct tep_event *tep_find_event(struct tep_handle *pevent, int id) /** * tep_find_event_by_name - find an event by given name - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @sys: the system name to search for * @name: the name of the event to search for * @@ -3498,19 +3536,19 @@ struct tep_event *tep_find_event(struct tep_handle *pevent, int id) * @sys. If @sys is NULL the first event with @name is returned. */ struct tep_event * -tep_find_event_by_name(struct tep_handle *pevent, +tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name) { struct tep_event *event = NULL; int i; - if (pevent->last_event && - strcmp(pevent->last_event->name, name) == 0 && - (!sys || strcmp(pevent->last_event->system, sys) == 0)) - return pevent->last_event; + if (tep->last_event && + strcmp(tep->last_event->name, name) == 0 && + (!sys || strcmp(tep->last_event->system, sys) == 0)) + return tep->last_event; - for (i = 0; i < pevent->nr_events; i++) { - event = pevent->events[i]; + for (i = 0; i < tep->nr_events; i++) { + event = tep->events[i]; if (strcmp(event->name, name) == 0) { if (!sys) break; @@ -3518,17 +3556,17 @@ tep_find_event_by_name(struct tep_handle *pevent, break; } } - if (i == pevent->nr_events) + if (i == tep->nr_events) event = NULL; - pevent->last_event = event; + tep->last_event = event; return event; } static unsigned long long eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg *arg) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; unsigned long long val = 0; unsigned long long left, right; struct tep_print_arg *typearg = NULL; @@ -3550,7 +3588,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg } /* must be a number */ - val = tep_read_number(pevent, data + arg->field.field->offset, + val = tep_read_number(tep, data + arg->field.field->offset, arg->field.field->size); break; case TEP_PRINT_FLAGS: @@ -3590,11 +3628,11 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg } /* Default to long size */ - field_size = pevent->long_size; + field_size = tep->long_size; switch (larg->type) { case TEP_PRINT_DYNAMIC_ARRAY: - offset = tep_read_number(pevent, + offset = tep_read_number(tep, data + larg->dynarray.field->offset, larg->dynarray.field->size); if (larg->dynarray.field->elementsize) @@ -3623,7 +3661,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg default: goto default_op; /* oops, all bets off */ } - val = tep_read_number(pevent, + val = tep_read_number(tep, data + offset, field_size); if (typearg) val = eval_type(val, typearg, 1); @@ -3724,7 +3762,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg } break; case TEP_PRINT_DYNAMIC_ARRAY_LEN: - offset = tep_read_number(pevent, + offset = tep_read_number(tep, data + arg->dynarray.field->offset, arg->dynarray.field->size); /* @@ -3736,7 +3774,7 @@ eval_num_arg(void *data, int size, struct tep_event *event, struct tep_print_arg break; case TEP_PRINT_DYNAMIC_ARRAY: /* Without [], we pass the address to the dynamic data */ - offset = tep_read_number(pevent, + offset = tep_read_number(tep, data + arg->dynarray.field->offset, arg->dynarray.field->size); /* @@ -3811,7 +3849,7 @@ static void print_str_to_seq(struct trace_seq *s, const char *format, trace_seq_printf(s, format, str); } -static void print_bitmask_to_seq(struct tep_handle *pevent, +static void print_bitmask_to_seq(struct tep_handle *tep, struct trace_seq *s, const char *format, int len_arg, const void *data, int size) { @@ -3843,7 +3881,7 @@ static void print_bitmask_to_seq(struct tep_handle *pevent, * In the kernel, this is an array of long words, thus * endianness is very important. */ - if (pevent->file_bigendian) + if (tep->file_bigendian) index = size - (len + 1); else index = len; @@ -3869,7 +3907,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct tep_event *event, const char *format, int len_arg, struct tep_print_arg *arg) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; struct tep_print_flag_sym *flag; struct tep_format_field *field; struct printk_map *printk; @@ -3906,7 +3944,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, * is a pointer. */ if (!(field->flags & TEP_FIELD_IS_ARRAY) && - field->size == pevent->long_size) { + field->size == tep->long_size) { /* Handle heterogeneous recording and processing * architectures @@ -3921,12 +3959,12 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, * on 32-bit devices: * In this case, 64 bits must be read. */ - addr = (pevent->long_size == 8) ? + addr = (tep->long_size == 8) ? *(unsigned long long *)(data + field->offset) : (unsigned long long)*(unsigned int *)(data + field->offset); /* Check if it matches a print format */ - printk = find_printk(pevent, addr); + printk = find_printk(tep, addr); if (printk) trace_seq_puts(s, printk->printk); else @@ -3983,7 +4021,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, case TEP_PRINT_HEX_STR: if (arg->hex.field->type == TEP_PRINT_DYNAMIC_ARRAY) { unsigned long offset; - offset = tep_read_number(pevent, + offset = tep_read_number(tep, data + arg->hex.field->dynarray.field->offset, arg->hex.field->dynarray.field->size); hex = data + (offset & 0xffff); @@ -4014,7 +4052,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, unsigned long offset; struct tep_format_field *field = arg->int_array.field->dynarray.field; - offset = tep_read_number(pevent, + offset = tep_read_number(tep, data + field->offset, field->size); num = data + (offset & 0xffff); @@ -4065,7 +4103,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->string.string); arg->string.offset = f->offset; } - str_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->string.offset)); + str_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->string.offset)); str_offset &= 0xffff; print_str_to_seq(s, format, len_arg, ((char *)data) + str_offset); break; @@ -4083,10 +4121,10 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, f = tep_find_any_field(event, arg->bitmask.bitmask); arg->bitmask.offset = f->offset; } - bitmask_offset = tep_data2host4(pevent, *(unsigned int *)(data + arg->bitmask.offset)); + bitmask_offset = tep_data2host4(tep, *(unsigned int *)(data + arg->bitmask.offset)); bitmask_size = bitmask_offset >> 16; bitmask_offset &= 0xffff; - print_bitmask_to_seq(pevent, s, format, len_arg, + print_bitmask_to_seq(tep, s, format, len_arg, data + bitmask_offset, bitmask_size); break; } @@ -4218,7 +4256,7 @@ static void free_args(struct tep_print_arg *args) static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, struct tep_event *event) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; struct tep_format_field *field, *ip_field; struct tep_print_arg *args, *arg, **next; unsigned long long ip, val; @@ -4226,8 +4264,8 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s void *bptr; int vsize = 0; - field = pevent->bprint_buf_field; - ip_field = pevent->bprint_ip_field; + field = tep->bprint_buf_field; + ip_field = tep->bprint_ip_field; if (!field) { field = tep_find_field(event, "buf"); @@ -4240,11 +4278,11 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s do_warning_event(event, "can't find ip field for binary printk"); return NULL; } - pevent->bprint_buf_field = field; - pevent->bprint_ip_field = ip_field; + tep->bprint_buf_field = field; + tep->bprint_ip_field = ip_field; } - ip = tep_read_number(pevent, data + ip_field->offset, ip_field->size); + ip = tep_read_number(tep, data + ip_field->offset, ip_field->size); /* * The first arg is the IP pointer. @@ -4299,6 +4337,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s case 'S': case 'f': case 'F': + case 'x': break; default: /* @@ -4321,7 +4360,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s vsize = 4; break; case 1: - vsize = pevent->long_size; + vsize = tep->long_size; break; case 2: vsize = 8; @@ -4338,7 +4377,7 @@ static struct tep_print_arg *make_bprint_args(char *fmt, void *data, int size, s /* the pointers are always 4 bytes aligned */ bptr = (void *)(((unsigned long)bptr + 3) & ~3); - val = tep_read_number(pevent, bptr, vsize); + val = tep_read_number(tep, bptr, vsize); bptr += vsize; arg = alloc_arg(); if (!arg) { @@ -4395,13 +4434,13 @@ static char * get_bprint_format(void *data, int size __maybe_unused, struct tep_event *event) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; unsigned long long addr; struct tep_format_field *field; struct printk_map *printk; char *format; - field = pevent->bprint_fmt_field; + field = tep->bprint_fmt_field; if (!field) { field = tep_find_field(event, "fmt"); @@ -4409,12 +4448,12 @@ get_bprint_format(void *data, int size __maybe_unused, do_warning_event(event, "can't find format field for binary printk"); return NULL; } - pevent->bprint_fmt_field = field; + tep->bprint_fmt_field = field; } - addr = tep_read_number(pevent, data + field->offset, field->size); + addr = tep_read_number(tep, data + field->offset, field->size); - printk = find_printk(pevent, addr); + printk = find_printk(tep, addr); if (!printk) { if (asprintf(&format, "%%pf: (NO FORMAT FOUND at %llx)\n", addr) < 0) return NULL; @@ -4796,13 +4835,13 @@ void tep_print_field(struct trace_seq *s, void *data, { unsigned long long val; unsigned int offset, len, i; - struct tep_handle *pevent = field->event->pevent; + struct tep_handle *tep = field->event->tep; if (field->flags & TEP_FIELD_IS_ARRAY) { offset = field->offset; len = field->size; if (field->flags & TEP_FIELD_IS_DYNAMIC) { - val = tep_read_number(pevent, data + offset, len); + val = tep_read_number(tep, data + offset, len); offset = val; len = offset >> 16; offset &= 0xffff; @@ -4822,7 +4861,7 @@ void tep_print_field(struct trace_seq *s, void *data, field->flags &= ~TEP_FIELD_IS_STRING; } } else { - val = tep_read_number(pevent, data + field->offset, + val = tep_read_number(tep, data + field->offset, field->size); if (field->flags & TEP_FIELD_IS_POINTER) { trace_seq_printf(s, "0x%llx", val); @@ -4871,7 +4910,7 @@ void tep_print_fields(struct trace_seq *s, void *data, static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_event *event) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; struct tep_print_fmt *print_fmt = &event->print_fmt; struct tep_print_arg *arg = print_fmt->args; struct tep_print_arg *args = NULL; @@ -4963,7 +5002,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e case '-': goto cont_process; case 'p': - if (pevent->long_size == 4) + if (tep->long_size == 4) ls = 1; else ls = 2; @@ -5024,7 +5063,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e arg = arg->next; if (show_func) { - func = find_func(pevent, val); + func = find_func(tep, val); if (func) { trace_seq_puts(s, func->func); if (show_func == 'F') @@ -5034,7 +5073,7 @@ static void pretty_print(struct trace_seq *s, void *data, int size, struct tep_e break; } } - if (pevent->long_size == 8 && ls == 1 && + if (tep->long_size == 8 && ls == 1 && sizeof(long) != 8) { char *p; @@ -5132,8 +5171,8 @@ out_failed: } /** - * tep_data_lat_fmt - parse the data for the latency format - * @pevent: a handle to the pevent + * tep_data_latency_format - parse the data for the latency format + * @tep: a handle to the trace event parser context * @s: the trace_seq to write to * @record: the record to read from * @@ -5141,8 +5180,8 @@ out_failed: * need rescheduling, in hard/soft interrupt, preempt count * and lock depth) and places it into the trace_seq. */ -void tep_data_lat_fmt(struct tep_handle *pevent, - struct trace_seq *s, struct tep_record *record) +void tep_data_latency_format(struct tep_handle *tep, + struct trace_seq *s, struct tep_record *record) { static int check_lock_depth = 1; static int check_migrate_disable = 1; @@ -5156,13 +5195,13 @@ void tep_data_lat_fmt(struct tep_handle *pevent, int softirq; void *data = record->data; - lat_flags = parse_common_flags(pevent, data); - pc = parse_common_pc(pevent, data); + lat_flags = parse_common_flags(tep, data); + pc = parse_common_pc(tep, data); /* lock_depth may not always exist */ if (lock_depth_exists) - lock_depth = parse_common_lock_depth(pevent, data); + lock_depth = parse_common_lock_depth(tep, data); else if (check_lock_depth) { - lock_depth = parse_common_lock_depth(pevent, data); + lock_depth = parse_common_lock_depth(tep, data); if (lock_depth < 0) check_lock_depth = 0; else @@ -5171,9 +5210,9 @@ void tep_data_lat_fmt(struct tep_handle *pevent, /* migrate_disable may not always exist */ if (migrate_disable_exists) - migrate_disable = parse_common_migrate_disable(pevent, data); + migrate_disable = parse_common_migrate_disable(tep, data); else if (check_migrate_disable) { - migrate_disable = parse_common_migrate_disable(pevent, data); + migrate_disable = parse_common_migrate_disable(tep, data); if (migrate_disable < 0) check_migrate_disable = 0; else @@ -5216,101 +5255,89 @@ void tep_data_lat_fmt(struct tep_handle *pevent, /** * tep_data_type - parse out the given event type - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @rec: the record to read from * * This returns the event id from the @rec. */ -int tep_data_type(struct tep_handle *pevent, struct tep_record *rec) +int tep_data_type(struct tep_handle *tep, struct tep_record *rec) { - return trace_parse_common_type(pevent, rec->data); -} - -/** - * tep_data_event_from_type - find the event by a given type - * @pevent: a handle to the pevent - * @type: the type of the event. - * - * This returns the event form a given @type; - */ -struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type) -{ - return tep_find_event(pevent, type); + return trace_parse_common_type(tep, rec->data); } /** * tep_data_pid - parse the PID from record - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @rec: the record to parse * * This returns the PID from a record. */ -int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec) +int tep_data_pid(struct tep_handle *tep, struct tep_record *rec) { - return parse_common_pid(pevent, rec->data); + return parse_common_pid(tep, rec->data); } /** * tep_data_preempt_count - parse the preempt count from the record - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @rec: the record to parse * * This returns the preempt count from a record. */ -int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec) +int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec) { - return parse_common_pc(pevent, rec->data); + return parse_common_pc(tep, rec->data); } /** * tep_data_flags - parse the latency flags from the record - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @rec: the record to parse * * This returns the latency flags from a record. * * Use trace_flag_type enum for the flags (see event-parse.h). */ -int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec) +int tep_data_flags(struct tep_handle *tep, struct tep_record *rec) { - return parse_common_flags(pevent, rec->data); + return parse_common_flags(tep, rec->data); } /** * tep_data_comm_from_pid - return the command line from PID - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @pid: the PID of the task to search for * * This returns a pointer to the command line that has the given * @pid. */ -const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid) +const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid) { const char *comm; - comm = find_cmdline(pevent, pid); + comm = find_cmdline(tep, pid); return comm; } -static struct cmdline * -pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *next) +static struct tep_cmdline * +pid_from_cmdlist(struct tep_handle *tep, const char *comm, struct tep_cmdline *next) { struct cmdline_list *cmdlist = (struct cmdline_list *)next; if (cmdlist) cmdlist = cmdlist->next; else - cmdlist = pevent->cmdlist; + cmdlist = tep->cmdlist; while (cmdlist && strcmp(cmdlist->comm, comm) != 0) cmdlist = cmdlist->next; - return (struct cmdline *)cmdlist; + return (struct tep_cmdline *)cmdlist; } /** * tep_data_pid_from_comm - return the pid from a given comm - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @comm: the cmdline to find the pid from * @next: the cmdline structure to find the next comm * @@ -5321,34 +5348,34 @@ pid_from_cmdlist(struct tep_handle *pevent, const char *comm, struct cmdline *ne * next pid. * Also, it does a linear search, so it may be slow. */ -struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, - struct cmdline *next) +struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, + struct tep_cmdline *next) { - struct cmdline *cmdline; + struct tep_cmdline *cmdline; /* * If the cmdlines have not been converted yet, then use * the list. */ - if (!pevent->cmdlines) - return pid_from_cmdlist(pevent, comm, next); + if (!tep->cmdlines) + return pid_from_cmdlist(tep, comm, next); if (next) { /* * The next pointer could have been still from * a previous call before cmdlines were created */ - if (next < pevent->cmdlines || - next >= pevent->cmdlines + pevent->cmdline_count) + if (next < tep->cmdlines || + next >= tep->cmdlines + tep->cmdline_count) next = NULL; else cmdline = next++; } if (!next) - cmdline = pevent->cmdlines; + cmdline = tep->cmdlines; - while (cmdline < pevent->cmdlines + pevent->cmdline_count) { + while (cmdline < tep->cmdlines + tep->cmdline_count) { if (strcmp(cmdline->comm, comm) == 0) return cmdline; cmdline++; @@ -5358,12 +5385,13 @@ struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *co /** * tep_cmdline_pid - return the pid associated to a given cmdline + * @tep: a handle to the trace event parser context * @cmdline: The cmdline structure to get the pid from * * Returns the pid for a give cmdline. If @cmdline is NULL, then * -1 is returned. */ -int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline) +int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline) { struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; @@ -5374,9 +5402,9 @@ int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline) * If cmdlines have not been created yet, or cmdline is * not part of the array, then treat it as a cmdlist instead. */ - if (!pevent->cmdlines || - cmdline < pevent->cmdlines || - cmdline >= pevent->cmdlines + pevent->cmdline_count) + if (!tep->cmdlines || + cmdline < tep->cmdlines || + cmdline >= tep->cmdlines + tep->cmdline_count) return cmdlist->pid; return cmdline->pid; @@ -5396,7 +5424,7 @@ void tep_event_info(struct trace_seq *s, struct tep_event *event, { int print_pretty = 1; - if (event->pevent->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW)) + if (event->tep->print_raw || (event->flags & TEP_EVENT_FL_PRINTRAW)) tep_print_fields(s, record->data, record->size, event); else { @@ -5417,7 +5445,8 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) return true; if (!strcmp(trace_clock, "local") || !strcmp(trace_clock, "global") - || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf")) + || !strcmp(trace_clock, "uptime") || !strcmp(trace_clock, "perf") + || !strncmp(trace_clock, "mono", 4)) return true; /* trace_clock is setting in tsc or counter mode */ @@ -5426,14 +5455,14 @@ static bool is_timestamp_in_us(char *trace_clock, bool use_trace_clock) /** * tep_find_event_by_record - return the event from a given record - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @record: The record to get the event from * * Returns the associated event for a given record, or NULL if non is * is found. */ struct tep_event * -tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record) +tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record) { int type; @@ -5442,21 +5471,21 @@ tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record) return NULL; } - type = trace_parse_common_type(pevent, record->data); + type = trace_parse_common_type(tep, record->data); - return tep_find_event(pevent, type); + return tep_find_event(tep, type); } /** * tep_print_event_task - Write the event task comm, pid and CPU - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @s: the trace_seq to write to * @event: the handle to the record's event * @record: The record to get the event from * * Writes the tasks comm, pid and CPU to @s. */ -void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_task(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record) { @@ -5464,27 +5493,26 @@ void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, const char *comm; int pid; - pid = parse_common_pid(pevent, data); - comm = find_cmdline(pevent, pid); + pid = parse_common_pid(tep, data); + comm = find_cmdline(tep, pid); - if (pevent->latency_format) { - trace_seq_printf(s, "%8.8s-%-5d %3d", - comm, pid, record->cpu); - } else + if (tep->latency_format) + trace_seq_printf(s, "%8.8s-%-5d %3d", comm, pid, record->cpu); + else trace_seq_printf(s, "%16s-%-5d [%03d]", comm, pid, record->cpu); } /** * tep_print_event_time - Write the event timestamp - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @s: the trace_seq to write to * @event: the handle to the record's event * @record: The record to get the event from - * @use_trace_clock: Set to parse according to the @pevent->trace_clock + * @use_trace_clock: Set to parse according to the @tep->trace_clock * * Writes the timestamp of the record into @s. */ -void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_time(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record, bool use_trace_clock) @@ -5495,19 +5523,18 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, int p; bool use_usec_format; - use_usec_format = is_timestamp_in_us(pevent->trace_clock, - use_trace_clock); + use_usec_format = is_timestamp_in_us(tep->trace_clock, use_trace_clock); if (use_usec_format) { secs = record->ts / NSEC_PER_SEC; nsecs = record->ts - secs * NSEC_PER_SEC; } - if (pevent->latency_format) { - tep_data_lat_fmt(pevent, s, record); + if (tep->latency_format) { + tep_data_latency_format(tep, s, record); } if (use_usec_format) { - if (pevent->flags & TEP_NSEC_OUTPUT) { + if (tep->flags & TEP_NSEC_OUTPUT) { usecs = nsecs; p = 9; } else { @@ -5527,14 +5554,14 @@ void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, /** * tep_print_event_data - Write the event data section - * @pevent: a handle to the pevent + * @tep: a handle to the trace event parser context * @s: the trace_seq to write to * @event: the handle to the record's event * @record: The record to get the event from * * Writes the parsing of the record's data to @s. */ -void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_data(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record) { @@ -5551,15 +5578,15 @@ void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, tep_event_info(s, event, record); } -void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event(struct tep_handle *tep, struct trace_seq *s, struct tep_record *record, bool use_trace_clock) { struct tep_event *event; - event = tep_find_event_by_record(pevent, record); + event = tep_find_event_by_record(tep, record); if (!event) { int i; - int type = trace_parse_common_type(pevent, record->data); + int type = trace_parse_common_type(tep, record->data); do_warning("ug! no event found for type %d", type); trace_seq_printf(s, "[UNKNOWN TYPE %d]", type); @@ -5569,9 +5596,9 @@ void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, return; } - tep_print_event_task(pevent, s, event, record); - tep_print_event_time(pevent, s, event, record, use_trace_clock); - tep_print_event_data(pevent, s, event, record); + tep_print_event_task(tep, s, event, record); + tep_print_event_time(tep, s, event, record, use_trace_clock); + tep_print_event_data(tep, s, event, record); } static int events_id_cmp(const void *a, const void *b) @@ -5622,32 +5649,26 @@ static int events_system_cmp(const void *a, const void *b) return events_id_cmp(a, b); } -struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type sort_type) +static struct tep_event **list_events_copy(struct tep_handle *tep) { struct tep_event **events; - int (*sort)(const void *a, const void *b); - - events = pevent->sort_events; - - if (events && pevent->last_type == sort_type) - return events; - if (!events) { - events = malloc(sizeof(*events) * (pevent->nr_events + 1)); - if (!events) - return NULL; + if (!tep) + return NULL; - memcpy(events, pevent->events, sizeof(*events) * pevent->nr_events); - events[pevent->nr_events] = NULL; + events = malloc(sizeof(*events) * (tep->nr_events + 1)); + if (!events) + return NULL; - pevent->sort_events = events; + memcpy(events, tep->events, sizeof(*events) * tep->nr_events); + events[tep->nr_events] = NULL; + return events; +} - /* the internal events are sorted by id */ - if (sort_type == TEP_EVENT_SORT_ID) { - pevent->last_type = sort_type; - return events; - } - } +static void list_events_sort(struct tep_event **events, int nr_events, + enum tep_event_sort_type sort_type) +{ + int (*sort)(const void *a, const void *b); switch (sort_type) { case TEP_EVENT_SORT_ID: @@ -5660,11 +5681,82 @@ struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sor sort = events_system_cmp; break; default: + sort = NULL; + } + + if (sort) + qsort(events, nr_events, sizeof(*events), sort); +} + +/** + * tep_list_events - Get events, sorted by given criteria. + * @tep: a handle to the tep context + * @sort_type: desired sort order of the events in the array + * + * Returns an array of pointers to all events, sorted by the given + * @sort_type criteria. The last element of the array is NULL. The returned + * memory must not be freed, it is managed by the library. + * The function is not thread safe. + */ +struct tep_event **tep_list_events(struct tep_handle *tep, + enum tep_event_sort_type sort_type) +{ + struct tep_event **events; + + if (!tep) + return NULL; + + events = tep->sort_events; + if (events && tep->last_type == sort_type) return events; + + if (!events) { + events = list_events_copy(tep); + if (!events) + return NULL; + + tep->sort_events = events; + + /* the internal events are sorted by id */ + if (sort_type == TEP_EVENT_SORT_ID) { + tep->last_type = sort_type; + return events; + } } - qsort(events, pevent->nr_events, sizeof(*events), sort); - pevent->last_type = sort_type; + list_events_sort(events, tep->nr_events, sort_type); + tep->last_type = sort_type; + + return events; +} + + +/** + * tep_list_events_copy - Thread safe version of tep_list_events() + * @tep: a handle to the tep context + * @sort_type: desired sort order of the events in the array + * + * Returns an array of pointers to all events, sorted by the given + * @sort_type criteria. The last element of the array is NULL. The returned + * array is newly allocated inside the function and must be freed by the caller + */ +struct tep_event **tep_list_events_copy(struct tep_handle *tep, + enum tep_event_sort_type sort_type) +{ + struct tep_event **events; + + if (!tep) + return NULL; + + events = list_events_copy(tep); + if (!events) + return NULL; + + /* the internal events are sorted by id */ + if (sort_type == TEP_EVENT_SORT_ID) + return events; + + list_events_sort(events, tep->nr_events, sort_type); return events; } @@ -5923,7 +6015,7 @@ static void parse_header_field(const char *field, /** * tep_parse_header_page - parse the data stored in the header page - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @buf: the buffer storing the header page format string * @size: the size of @buf * @long_size: the long size to use if there is no header @@ -5933,7 +6025,7 @@ static void parse_header_field(const char *field, * * /sys/kernel/debug/tracing/events/header_page */ -int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size, +int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, int long_size) { int ignore; @@ -5943,22 +6035,22 @@ int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long si * Old kernels did not have header page info. * Sorry but we just use what we find here in user space. */ - pevent->header_page_ts_size = sizeof(long long); - pevent->header_page_size_size = long_size; - pevent->header_page_data_offset = sizeof(long long) + long_size; - pevent->old_format = 1; + tep->header_page_ts_size = sizeof(long long); + tep->header_page_size_size = long_size; + tep->header_page_data_offset = sizeof(long long) + long_size; + tep->old_format = 1; return -1; } init_input_buf(buf, size); - parse_header_field("timestamp", &pevent->header_page_ts_offset, - &pevent->header_page_ts_size, 1); - parse_header_field("commit", &pevent->header_page_size_offset, - &pevent->header_page_size_size, 1); - parse_header_field("overwrite", &pevent->header_page_overwrite, + parse_header_field("timestamp", &tep->header_page_ts_offset, + &tep->header_page_ts_size, 1); + parse_header_field("commit", &tep->header_page_size_offset, + &tep->header_page_size_size, 1); + parse_header_field("overwrite", &tep->header_page_overwrite, &ignore, 0); - parse_header_field("data", &pevent->header_page_data_offset, - &pevent->header_page_data_size, 1); + parse_header_field("data", &tep->header_page_data_offset, + &tep->header_page_data_size, 1); return 0; } @@ -5986,11 +6078,11 @@ static void free_handler(struct event_handler *handle) free(handle); } -static int find_event_handle(struct tep_handle *pevent, struct tep_event *event) +static int find_event_handle(struct tep_handle *tep, struct tep_event *event) { struct event_handler *handle, **next; - for (next = &pevent->handlers; *next; + for (next = &tep->handlers; *next; next = &(*next)->next) { handle = *next; if (event_matches(event, handle->id, @@ -6028,7 +6120,7 @@ static int find_event_handle(struct tep_handle *pevent, struct tep_event *event) * /sys/kernel/debug/tracing/events/.../.../format */ enum tep_errno __tep_parse_format(struct tep_event **eventp, - struct tep_handle *pevent, const char *buf, + struct tep_handle *tep, const char *buf, unsigned long size, const char *sys) { struct tep_event *event; @@ -6070,8 +6162,8 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp, goto event_alloc_failed; } - /* Add pevent to event so that it can be referenced */ - event->pevent = pevent; + /* Add tep to event so that it can be referenced */ + event->tep = tep; ret = event_read_format(event); if (ret < 0) { @@ -6083,7 +6175,7 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp, * If the event has an override, don't print warnings if the event * print format fails to parse. */ - if (pevent && find_event_handle(pevent, event)) + if (tep && find_event_handle(tep, event)) show_warning = 0; ret = event_read_print(event); @@ -6135,18 +6227,18 @@ enum tep_errno __tep_parse_format(struct tep_event **eventp, } static enum tep_errno -__parse_event(struct tep_handle *pevent, +__parse_event(struct tep_handle *tep, struct tep_event **eventp, const char *buf, unsigned long size, const char *sys) { - int ret = __tep_parse_format(eventp, pevent, buf, size, sys); + int ret = __tep_parse_format(eventp, tep, buf, size, sys); struct tep_event *event = *eventp; if (event == NULL) return ret; - if (pevent && add_event(pevent, event)) { + if (tep && add_event(tep, event)) { ret = TEP_ERRNO__MEM_ALLOC_FAILED; goto event_add_failed; } @@ -6164,7 +6256,7 @@ event_add_failed: /** * tep_parse_format - parse the event format - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @eventp: returned format * @buf: the buffer storing the event format string * @size: the size of @buf @@ -6177,17 +6269,17 @@ event_add_failed: * * /sys/kernel/debug/tracing/events/.../.../format */ -enum tep_errno tep_parse_format(struct tep_handle *pevent, +enum tep_errno tep_parse_format(struct tep_handle *tep, struct tep_event **eventp, const char *buf, unsigned long size, const char *sys) { - return __parse_event(pevent, eventp, buf, size, sys); + return __parse_event(tep, eventp, buf, size, sys); } /** * tep_parse_event - parse the event format - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @buf: the buffer storing the event format string * @size: the size of @buf * @sys: the system the event belongs to @@ -6199,11 +6291,11 @@ enum tep_errno tep_parse_format(struct tep_handle *pevent, * * /sys/kernel/debug/tracing/events/.../.../format */ -enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, +enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, unsigned long size, const char *sys) { struct tep_event *event = NULL; - return __parse_event(pevent, &event, buf, size, sys); + return __parse_event(tep, &event, buf, size, sys); } int get_field_val(struct trace_seq *s, struct tep_format_field *field, @@ -6265,8 +6357,8 @@ void *tep_get_field_raw(struct trace_seq *s, struct tep_event *event, offset = field->offset; if (field->flags & TEP_FIELD_IS_DYNAMIC) { - offset = tep_read_number(event->pevent, - data + offset, field->size); + offset = tep_read_number(event->tep, + data + offset, field->size); *len = offset >> 16; offset &= 0xffff; } else @@ -6359,7 +6451,8 @@ int tep_get_any_field_val(struct trace_seq *s, struct tep_event *event, * @record: The record with the field name. * @err: print default error if failed. * - * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + * Returns positive value on success, negative in case of an error, + * or 0 if buffer is full. */ int tep_print_num_field(struct trace_seq *s, const char *fmt, struct tep_event *event, const char *name, @@ -6391,14 +6484,15 @@ int tep_print_num_field(struct trace_seq *s, const char *fmt, * @record: The record with the field name. * @err: print default error if failed. * - * Returns: 0 on success, -1 field not found, or 1 if buffer is full. + * Returns positive value on success, negative in case of an error, + * or 0 if buffer is full. */ int tep_print_func_field(struct trace_seq *s, const char *fmt, struct tep_event *event, const char *name, struct tep_record *record, int err) { struct tep_format_field *field = tep_find_field(event, name); - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; unsigned long long val; struct func_map *func; char tmp[128]; @@ -6409,7 +6503,7 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt, if (tep_read_number_field(field, record->data, &val)) goto failed; - func = find_func(pevent, val); + func = find_func(tep, val); if (func) snprintf(tmp, 128, "%s/0x%llx", func->func, func->addr - val); @@ -6441,7 +6535,7 @@ static void free_func_handle(struct tep_function_handler *func) /** * tep_register_print_function - register a helper function - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @func: the function to process the helper function * @ret_type: the return type of the helper function * @name: the name of the helper function @@ -6454,7 +6548,7 @@ static void free_func_handle(struct tep_function_handler *func) * The @parameters is a variable list of tep_func_arg_type enums that * must end with TEP_FUNC_ARG_VOID. */ -int tep_register_print_function(struct tep_handle *pevent, +int tep_register_print_function(struct tep_handle *tep, tep_func_handler func, enum tep_func_arg_type ret_type, char *name, ...) @@ -6466,7 +6560,7 @@ int tep_register_print_function(struct tep_handle *pevent, va_list ap; int ret; - func_handle = find_func_handler(pevent, name); + func_handle = find_func_handler(tep, name); if (func_handle) { /* * This is most like caused by the users own @@ -6474,7 +6568,7 @@ int tep_register_print_function(struct tep_handle *pevent, * system defaults. */ pr_stat("override of function helper '%s'", name); - remove_func_handler(pevent, name); + remove_func_handler(tep, name); } func_handle = calloc(1, sizeof(*func_handle)); @@ -6521,8 +6615,8 @@ int tep_register_print_function(struct tep_handle *pevent, } va_end(ap); - func_handle->next = pevent->func_handlers; - pevent->func_handlers = func_handle; + func_handle->next = tep->func_handlers; + tep->func_handlers = func_handle; return 0; out_free: @@ -6533,7 +6627,7 @@ int tep_register_print_function(struct tep_handle *pevent, /** * tep_unregister_print_function - unregister a helper function - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @func: the function to process the helper function * @name: the name of the helper function * @@ -6541,20 +6635,20 @@ int tep_register_print_function(struct tep_handle *pevent, * * Returns 0 if the handler was removed successully, -1 otherwise. */ -int tep_unregister_print_function(struct tep_handle *pevent, +int tep_unregister_print_function(struct tep_handle *tep, tep_func_handler func, char *name) { struct tep_function_handler *func_handle; - func_handle = find_func_handler(pevent, name); + func_handle = find_func_handler(tep, name); if (func_handle && func_handle->func == func) { - remove_func_handler(pevent, name); + remove_func_handler(tep, name); return 0; } return -1; } -static struct tep_event *search_event(struct tep_handle *pevent, int id, +static struct tep_event *search_event(struct tep_handle *tep, int id, const char *sys_name, const char *event_name) { @@ -6562,7 +6656,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id, if (id >= 0) { /* search by id */ - event = tep_find_event(pevent, id); + event = tep_find_event(tep, id); if (!event) return NULL; if (event_name && (strcmp(event_name, event->name) != 0)) @@ -6570,7 +6664,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id, if (sys_name && (strcmp(sys_name, event->system) != 0)) return NULL; } else { - event = tep_find_event_by_name(pevent, sys_name, event_name); + event = tep_find_event_by_name(tep, sys_name, event_name); if (!event) return NULL; } @@ -6579,7 +6673,7 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id, /** * tep_register_event_handler - register a way to parse an event - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @id: the id of the event to register * @sys_name: the system name the event belongs to * @event_name: the name of the event @@ -6593,15 +6687,21 @@ static struct tep_event *search_event(struct tep_handle *pevent, int id, * * If @id is >= 0, then it is used to find the event. * else @sys_name and @event_name are used. + * + * Returns: + * TEP_REGISTER_SUCCESS_OVERWRITE if an existing handler is overwritten + * TEP_REGISTER_SUCCESS if a new handler is registered successfully + * negative TEP_ERRNO_... in case of an error + * */ -int tep_register_event_handler(struct tep_handle *pevent, int id, +int tep_register_event_handler(struct tep_handle *tep, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context) { struct tep_event *event; struct event_handler *handle; - event = search_event(pevent, id, sys_name, event_name); + event = search_event(tep, id, sys_name, event_name); if (event == NULL) goto not_found; @@ -6610,7 +6710,7 @@ int tep_register_event_handler(struct tep_handle *pevent, int id, event->handler = func; event->context = context; - return 0; + return TEP_REGISTER_SUCCESS_OVERWRITE; not_found: /* Save for later use. */ @@ -6636,11 +6736,11 @@ int tep_register_event_handler(struct tep_handle *pevent, int id, } handle->func = func; - handle->next = pevent->handlers; - pevent->handlers = handle; + handle->next = tep->handlers; + tep->handlers = handle; handle->context = context; - return -1; + return TEP_REGISTER_SUCCESS; } static int handle_matches(struct event_handler *handler, int id, @@ -6664,7 +6764,7 @@ static int handle_matches(struct event_handler *handler, int id, /** * tep_unregister_event_handler - unregister an existing event handler - * @pevent: the handle to the pevent + * @tep: a handle to the trace event parser context * @id: the id of the event to unregister * @sys_name: the system name the handler belongs to * @event_name: the name of the event handler @@ -6678,7 +6778,7 @@ static int handle_matches(struct event_handler *handler, int id, * * Returns 0 if handler was removed successfully, -1 if event was not found. */ -int tep_unregister_event_handler(struct tep_handle *pevent, int id, +int tep_unregister_event_handler(struct tep_handle *tep, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context) { @@ -6686,7 +6786,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id, struct event_handler *handle; struct event_handler **next; - event = search_event(pevent, id, sys_name, event_name); + event = search_event(tep, id, sys_name, event_name); if (event == NULL) goto not_found; @@ -6700,7 +6800,7 @@ int tep_unregister_event_handler(struct tep_handle *pevent, int id, } not_found: - for (next = &pevent->handlers; *next; next = &(*next)->next) { + for (next = &tep->handlers; *next; next = &(*next)->next) { handle = *next; if (handle_matches(handle, id, sys_name, event_name, func, context)) @@ -6717,21 +6817,23 @@ not_found: } /** - * tep_alloc - create a pevent handle + * tep_alloc - create a tep handle */ struct tep_handle *tep_alloc(void) { - struct tep_handle *pevent = calloc(1, sizeof(*pevent)); + struct tep_handle *tep = calloc(1, sizeof(*tep)); - if (pevent) - pevent->ref_count = 1; + if (tep) { + tep->ref_count = 1; + tep->host_bigendian = tep_is_bigendian(); + } - return pevent; + return tep; } -void tep_ref(struct tep_handle *pevent) +void tep_ref(struct tep_handle *tep) { - pevent->ref_count++; + tep->ref_count++; } int tep_get_ref(struct tep_handle *tep) @@ -6781,10 +6883,10 @@ void tep_free_event(struct tep_event *event) } /** - * tep_free - free a pevent handle - * @pevent: the pevent handle to free + * tep_free - free a tep handle + * @tep: the tep handle to free */ -void tep_free(struct tep_handle *pevent) +void tep_free(struct tep_handle *tep) { struct cmdline_list *cmdlist, *cmdnext; struct func_list *funclist, *funcnext; @@ -6793,21 +6895,21 @@ void tep_free(struct tep_handle *pevent) struct event_handler *handle; int i; - if (!pevent) + if (!tep) return; - cmdlist = pevent->cmdlist; - funclist = pevent->funclist; - printklist = pevent->printklist; + cmdlist = tep->cmdlist; + funclist = tep->funclist; + printklist = tep->printklist; - pevent->ref_count--; - if (pevent->ref_count) + tep->ref_count--; + if (tep->ref_count) return; - if (pevent->cmdlines) { - for (i = 0; i < pevent->cmdline_count; i++) - free(pevent->cmdlines[i].comm); - free(pevent->cmdlines); + if (tep->cmdlines) { + for (i = 0; i < tep->cmdline_count; i++) + free(tep->cmdlines[i].comm); + free(tep->cmdlines); } while (cmdlist) { @@ -6817,12 +6919,12 @@ void tep_free(struct tep_handle *pevent) cmdlist = cmdnext; } - if (pevent->func_map) { - for (i = 0; i < (int)pevent->func_count; i++) { - free(pevent->func_map[i].func); - free(pevent->func_map[i].mod); + if (tep->func_map) { + for (i = 0; i < (int)tep->func_count; i++) { + free(tep->func_map[i].func); + free(tep->func_map[i].mod); } - free(pevent->func_map); + free(tep->func_map); } while (funclist) { @@ -6833,16 +6935,16 @@ void tep_free(struct tep_handle *pevent) funclist = funcnext; } - while (pevent->func_handlers) { - func_handler = pevent->func_handlers; - pevent->func_handlers = func_handler->next; + while (tep->func_handlers) { + func_handler = tep->func_handlers; + tep->func_handlers = func_handler->next; free_func_handle(func_handler); } - if (pevent->printk_map) { - for (i = 0; i < (int)pevent->printk_count; i++) - free(pevent->printk_map[i].printk); - free(pevent->printk_map); + if (tep->printk_map) { + for (i = 0; i < (int)tep->printk_count; i++) + free(tep->printk_map[i].printk); + free(tep->printk_map); } while (printklist) { @@ -6852,24 +6954,24 @@ void tep_free(struct tep_handle *pevent) printklist = printknext; } - for (i = 0; i < pevent->nr_events; i++) - tep_free_event(pevent->events[i]); + for (i = 0; i < tep->nr_events; i++) + tep_free_event(tep->events[i]); - while (pevent->handlers) { - handle = pevent->handlers; - pevent->handlers = handle->next; + while (tep->handlers) { + handle = tep->handlers; + tep->handlers = handle->next; free_handler(handle); } - free(pevent->trace_clock); - free(pevent->events); - free(pevent->sort_events); - free(pevent->func_resolver); + free(tep->trace_clock); + free(tep->events); + free(tep->sort_events); + free(tep->func_resolver); - free(pevent); + free(tep); } -void tep_unref(struct tep_handle *pevent) +void tep_unref(struct tep_handle *tep) { - tep_free(pevent); + tep_free(tep); } diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 35d37087d3c5..642f68ab5fb2 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -64,8 +64,8 @@ typedef int (*tep_event_handler_func)(struct trace_seq *s, struct tep_event *event, void *context); -typedef int (*tep_plugin_load_func)(struct tep_handle *pevent); -typedef int (*tep_plugin_unload_func)(struct tep_handle *pevent); +typedef int (*tep_plugin_load_func)(struct tep_handle *tep); +typedef int (*tep_plugin_unload_func)(struct tep_handle *tep); struct tep_plugin_option { struct tep_plugin_option *next; @@ -85,12 +85,12 @@ struct tep_plugin_option { * TEP_PLUGIN_LOADER: (required) * The function name to initialized the plugin. * - * int TEP_PLUGIN_LOADER(struct tep_handle *pevent) + * int TEP_PLUGIN_LOADER(struct tep_handle *tep) * * TEP_PLUGIN_UNLOADER: (optional) * The function called just before unloading * - * int TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) + * int TEP_PLUGIN_UNLOADER(struct tep_handle *tep) * * TEP_PLUGIN_OPTIONS: (optional) * Plugin options that can be set before loading @@ -278,7 +278,7 @@ struct tep_print_fmt { }; struct tep_event { - struct tep_handle *pevent; + struct tep_handle *tep; char *name; int id; int flags; @@ -393,9 +393,9 @@ struct tep_plugin_list; #define INVALID_PLUGIN_LIST_OPTION ((char **)((unsigned long)-1)) -struct tep_plugin_list *tep_load_plugins(struct tep_handle *pevent); +struct tep_plugin_list *tep_load_plugins(struct tep_handle *tep); void tep_unload_plugins(struct tep_plugin_list *plugin_list, - struct tep_handle *pevent); + struct tep_handle *tep); char **tep_plugin_list_options(void); void tep_plugin_free_options_list(char **list); int tep_plugin_add_options(const char *name, @@ -409,8 +409,10 @@ void tep_print_plugins(struct trace_seq *s, typedef char *(tep_func_resolver_t)(void *priv, unsigned long long *addrp, char **modp); void tep_set_flag(struct tep_handle *tep, int flag); +void tep_clear_flag(struct tep_handle *tep, enum tep_flag flag); +bool tep_test_flag(struct tep_handle *tep, enum tep_flag flags); -static inline int tep_host_bigendian(void) +static inline int tep_is_bigendian(void) { unsigned char str[] = { 0x1, 0x2, 0x3, 0x4 }; unsigned int val; @@ -428,36 +430,37 @@ enum trace_flag_type { TRACE_FLAG_SOFTIRQ = 0x10, }; -int tep_set_function_resolver(struct tep_handle *pevent, +int tep_set_function_resolver(struct tep_handle *tep, tep_func_resolver_t *func, void *priv); -void tep_reset_function_resolver(struct tep_handle *pevent); -int tep_register_comm(struct tep_handle *pevent, const char *comm, int pid); -int tep_register_trace_clock(struct tep_handle *pevent, const char *trace_clock); -int tep_register_function(struct tep_handle *pevent, char *name, +void tep_reset_function_resolver(struct tep_handle *tep); +int tep_register_comm(struct tep_handle *tep, const char *comm, int pid); +int tep_override_comm(struct tep_handle *tep, const char *comm, int pid); +int tep_register_trace_clock(struct tep_handle *tep, const char *trace_clock); +int tep_register_function(struct tep_handle *tep, char *name, unsigned long long addr, char *mod); -int tep_register_print_string(struct tep_handle *pevent, const char *fmt, +int tep_register_print_string(struct tep_handle *tep, const char *fmt, unsigned long long addr); -int tep_pid_is_registered(struct tep_handle *pevent, int pid); +bool tep_is_pid_registered(struct tep_handle *tep, int pid); -void tep_print_event_task(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_task(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record); -void tep_print_event_time(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_time(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record, bool use_trace_clock); -void tep_print_event_data(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event_data(struct tep_handle *tep, struct trace_seq *s, struct tep_event *event, struct tep_record *record); -void tep_print_event(struct tep_handle *pevent, struct trace_seq *s, +void tep_print_event(struct tep_handle *tep, struct trace_seq *s, struct tep_record *record, bool use_trace_clock); -int tep_parse_header_page(struct tep_handle *pevent, char *buf, unsigned long size, +int tep_parse_header_page(struct tep_handle *tep, char *buf, unsigned long size, int long_size); -enum tep_errno tep_parse_event(struct tep_handle *pevent, const char *buf, +enum tep_errno tep_parse_event(struct tep_handle *tep, const char *buf, unsigned long size, const char *sys); -enum tep_errno tep_parse_format(struct tep_handle *pevent, +enum tep_errno tep_parse_format(struct tep_handle *tep, struct tep_event **eventp, const char *buf, unsigned long size, const char *sys); @@ -484,51 +487,55 @@ int tep_print_func_field(struct trace_seq *s, const char *fmt, struct tep_event *event, const char *name, struct tep_record *record, int err); -int tep_register_event_handler(struct tep_handle *pevent, int id, +enum tep_reg_handler { + TEP_REGISTER_SUCCESS = 0, + TEP_REGISTER_SUCCESS_OVERWRITE, +}; + +int tep_register_event_handler(struct tep_handle *tep, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context); -int tep_unregister_event_handler(struct tep_handle *pevent, int id, +int tep_unregister_event_handler(struct tep_handle *tep, int id, const char *sys_name, const char *event_name, tep_event_handler_func func, void *context); -int tep_register_print_function(struct tep_handle *pevent, +int tep_register_print_function(struct tep_handle *tep, tep_func_handler func, enum tep_func_arg_type ret_type, char *name, ...); -int tep_unregister_print_function(struct tep_handle *pevent, +int tep_unregister_print_function(struct tep_handle *tep, tep_func_handler func, char *name); struct tep_format_field *tep_find_common_field(struct tep_event *event, const char *name); struct tep_format_field *tep_find_field(struct tep_event *event, const char *name); struct tep_format_field *tep_find_any_field(struct tep_event *event, const char *name); -const char *tep_find_function(struct tep_handle *pevent, unsigned long long addr); +const char *tep_find_function(struct tep_handle *tep, unsigned long long addr); unsigned long long -tep_find_function_address(struct tep_handle *pevent, unsigned long long addr); -unsigned long long tep_read_number(struct tep_handle *pevent, const void *ptr, int size); +tep_find_function_address(struct tep_handle *tep, unsigned long long addr); +unsigned long long tep_read_number(struct tep_handle *tep, const void *ptr, int size); int tep_read_number_field(struct tep_format_field *field, const void *data, unsigned long long *value); struct tep_event *tep_get_first_event(struct tep_handle *tep); int tep_get_events_count(struct tep_handle *tep); -struct tep_event *tep_find_event(struct tep_handle *pevent, int id); +struct tep_event *tep_find_event(struct tep_handle *tep, int id); struct tep_event * -tep_find_event_by_name(struct tep_handle *pevent, const char *sys, const char *name); +tep_find_event_by_name(struct tep_handle *tep, const char *sys, const char *name); struct tep_event * -tep_find_event_by_record(struct tep_handle *pevent, struct tep_record *record); - -void tep_data_lat_fmt(struct tep_handle *pevent, - struct trace_seq *s, struct tep_record *record); -int tep_data_type(struct tep_handle *pevent, struct tep_record *rec); -struct tep_event *tep_data_event_from_type(struct tep_handle *pevent, int type); -int tep_data_pid(struct tep_handle *pevent, struct tep_record *rec); -int tep_data_preempt_count(struct tep_handle *pevent, struct tep_record *rec); -int tep_data_flags(struct tep_handle *pevent, struct tep_record *rec); -const char *tep_data_comm_from_pid(struct tep_handle *pevent, int pid); -struct cmdline; -struct cmdline *tep_data_pid_from_comm(struct tep_handle *pevent, const char *comm, - struct cmdline *next); -int tep_cmdline_pid(struct tep_handle *pevent, struct cmdline *cmdline); +tep_find_event_by_record(struct tep_handle *tep, struct tep_record *record); + +void tep_data_latency_format(struct tep_handle *tep, + struct trace_seq *s, struct tep_record *record); +int tep_data_type(struct tep_handle *tep, struct tep_record *rec); +int tep_data_pid(struct tep_handle *tep, struct tep_record *rec); +int tep_data_preempt_count(struct tep_handle *tep, struct tep_record *rec); +int tep_data_flags(struct tep_handle *tep, struct tep_record *rec); +const char *tep_data_comm_from_pid(struct tep_handle *tep, int pid); +struct tep_cmdline; +struct tep_cmdline *tep_data_pid_from_comm(struct tep_handle *tep, const char *comm, + struct tep_cmdline *next); +int tep_cmdline_pid(struct tep_handle *tep, struct tep_cmdline *cmdline); void tep_print_field(struct trace_seq *s, void *data, struct tep_format_field *field); @@ -536,10 +543,12 @@ void tep_print_fields(struct trace_seq *s, void *data, int size __maybe_unused, struct tep_event *event); void tep_event_info(struct trace_seq *s, struct tep_event *event, struct tep_record *record); -int tep_strerror(struct tep_handle *pevent, enum tep_errno errnum, +int tep_strerror(struct tep_handle *tep, enum tep_errno errnum, char *buf, size_t buflen); -struct tep_event **tep_list_events(struct tep_handle *pevent, enum tep_event_sort_type); +struct tep_event **tep_list_events(struct tep_handle *tep, enum tep_event_sort_type); +struct tep_event **tep_list_events_copy(struct tep_handle *tep, + enum tep_event_sort_type); struct tep_format_field **tep_event_common_fields(struct tep_event *event); struct tep_format_field **tep_event_fields(struct tep_event *event); @@ -547,24 +556,28 @@ enum tep_endian { TEP_LITTLE_ENDIAN = 0, TEP_BIG_ENDIAN }; -int tep_get_cpus(struct tep_handle *pevent); -void tep_set_cpus(struct tep_handle *pevent, int cpus); -int tep_get_long_size(struct tep_handle *pevent); -void tep_set_long_size(struct tep_handle *pevent, int long_size); -int tep_get_page_size(struct tep_handle *pevent); -void tep_set_page_size(struct tep_handle *pevent, int _page_size); -int tep_is_file_bigendian(struct tep_handle *pevent); -void tep_set_file_bigendian(struct tep_handle *pevent, enum tep_endian endian); -int tep_is_host_bigendian(struct tep_handle *pevent); -void tep_set_host_bigendian(struct tep_handle *pevent, enum tep_endian endian); -int tep_is_latency_format(struct tep_handle *pevent); -void tep_set_latency_format(struct tep_handle *pevent, int lat); -int tep_get_header_page_size(struct tep_handle *pevent); +int tep_get_cpus(struct tep_handle *tep); +void tep_set_cpus(struct tep_handle *tep, int cpus); +int tep_get_long_size(struct tep_handle *tep); +void tep_set_long_size(struct tep_handle *tep, int long_size); +int tep_get_page_size(struct tep_handle *tep); +void tep_set_page_size(struct tep_handle *tep, int _page_size); +bool tep_is_file_bigendian(struct tep_handle *tep); +void tep_set_file_bigendian(struct tep_handle *tep, enum tep_endian endian); +bool tep_is_local_bigendian(struct tep_handle *tep); +void tep_set_local_bigendian(struct tep_handle *tep, enum tep_endian endian); +bool tep_is_latency_format(struct tep_handle *tep); +void tep_set_latency_format(struct tep_handle *tep, int lat); +int tep_get_header_page_size(struct tep_handle *tep); +int tep_get_header_timestamp_size(struct tep_handle *tep); +bool tep_is_old_format(struct tep_handle *tep); +void tep_set_print_raw(struct tep_handle *tep, int print_raw); +void tep_set_test_filters(struct tep_handle *tep, int test_filters); struct tep_handle *tep_alloc(void); -void tep_free(struct tep_handle *pevent); -void tep_ref(struct tep_handle *pevent); -void tep_unref(struct tep_handle *pevent); +void tep_free(struct tep_handle *tep); +void tep_ref(struct tep_handle *tep); +void tep_unref(struct tep_handle *tep); int tep_get_ref(struct tep_handle *tep); /* access to the internal parser */ @@ -576,8 +589,8 @@ const char *tep_get_input_buf(void); unsigned long long tep_get_input_buf_ptr(void); /* for debugging */ -void tep_print_funcs(struct tep_handle *pevent); -void tep_print_printk(struct tep_handle *pevent); +void tep_print_funcs(struct tep_handle *tep); +void tep_print_printk(struct tep_handle *tep); /* ----------------------- filtering ----------------------- */ @@ -704,13 +717,13 @@ struct tep_filter_type { #define TEP_FILTER_ERROR_BUFSZ 1024 struct tep_event_filter { - struct tep_handle *pevent; + struct tep_handle *tep; int filters; struct tep_filter_type *event_filters; char error_buffer[TEP_FILTER_ERROR_BUFSZ]; }; -struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent); +struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep); /* for backward compatibility */ #define FILTER_NONE TEP_ERRNO__NO_FILTER @@ -718,12 +731,6 @@ struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent); #define FILTER_MISS TEP_ERRNO__FILTER_MISS #define FILTER_MATCH TEP_ERRNO__FILTER_MATCH -enum tep_filter_trivial_type { - TEP_FILTER_TRIVIAL_FALSE, - TEP_FILTER_TRIVIAL_TRUE, - TEP_FILTER_TRIVIAL_BOTH, -}; - enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, const char *filter_str); @@ -738,9 +745,6 @@ int tep_event_filtered(struct tep_event_filter *filter, void tep_filter_reset(struct tep_event_filter *filter); -int tep_filter_clear_trivial(struct tep_event_filter *filter, - enum tep_filter_trivial_type type); - void tep_filter_free(struct tep_event_filter *filter); char *tep_filter_make_string(struct tep_event_filter *filter, int event_id); @@ -748,15 +752,8 @@ char *tep_filter_make_string(struct tep_event_filter *filter, int event_id); int tep_filter_remove_event(struct tep_event_filter *filter, int event_id); -int tep_filter_event_has_trivial(struct tep_event_filter *filter, - int event_id, - enum tep_filter_trivial_type type); - int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *source); -int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source, - enum tep_filter_trivial_type type); - int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter *filter2); #endif /* _PARSE_EVENTS_H */ diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index e74f16c88398..8ca28de9337a 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -269,7 +269,7 @@ void tep_print_plugins(struct trace_seq *s, } static void -load_plugin(struct tep_handle *pevent, const char *path, +load_plugin(struct tep_handle *tep, const char *path, const char *file, void *data) { struct tep_plugin_list **plugin_list = data; @@ -316,7 +316,7 @@ load_plugin(struct tep_handle *pevent, const char *path, *plugin_list = list; pr_stat("registering plugin: %s", plugin); - func(pevent); + func(tep); return; out_free: @@ -324,9 +324,9 @@ load_plugin(struct tep_handle *pevent, const char *path, } static void -load_plugins_dir(struct tep_handle *pevent, const char *suffix, +load_plugins_dir(struct tep_handle *tep, const char *suffix, const char *path, - void (*load_plugin)(struct tep_handle *pevent, + void (*load_plugin)(struct tep_handle *tep, const char *path, const char *name, void *data), @@ -359,15 +359,15 @@ load_plugins_dir(struct tep_handle *pevent, const char *suffix, if (strcmp(name + (strlen(name) - strlen(suffix)), suffix) != 0) continue; - load_plugin(pevent, path, name, data); + load_plugin(tep, path, name, data); } closedir(dir); } static void -load_plugins(struct tep_handle *pevent, const char *suffix, - void (*load_plugin)(struct tep_handle *pevent, +load_plugins(struct tep_handle *tep, const char *suffix, + void (*load_plugin)(struct tep_handle *tep, const char *path, const char *name, void *data), @@ -378,7 +378,7 @@ load_plugins(struct tep_handle *pevent, const char *suffix, char *envdir; int ret; - if (pevent->flags & TEP_DISABLE_PLUGINS) + if (tep->flags & TEP_DISABLE_PLUGINS) return; /* @@ -386,8 +386,8 @@ load_plugins(struct tep_handle *pevent, const char *suffix, * check that first. */ #ifdef PLUGIN_DIR - if (!(pevent->flags & TEP_DISABLE_SYS_PLUGINS)) - load_plugins_dir(pevent, suffix, PLUGIN_DIR, + if (!(tep->flags & TEP_DISABLE_SYS_PLUGINS)) + load_plugins_dir(tep, suffix, PLUGIN_DIR, load_plugin, data); #endif @@ -397,7 +397,7 @@ load_plugins(struct tep_handle *pevent, const char *suffix, */ envdir = getenv("TRACEEVENT_PLUGIN_DIR"); if (envdir) - load_plugins_dir(pevent, suffix, envdir, load_plugin, data); + load_plugins_dir(tep, suffix, envdir, load_plugin, data); /* * Now let the home directory override the environment @@ -413,22 +413,22 @@ load_plugins(struct tep_handle *pevent, const char *suffix, return; } - load_plugins_dir(pevent, suffix, path, load_plugin, data); + load_plugins_dir(tep, suffix, path, load_plugin, data); free(path); } struct tep_plugin_list* -tep_load_plugins(struct tep_handle *pevent) +tep_load_plugins(struct tep_handle *tep) { struct tep_plugin_list *list = NULL; - load_plugins(pevent, ".so", load_plugin, &list); + load_plugins(tep, ".so", load_plugin, &list); return list; } void -tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *pevent) +tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *tep) { tep_plugin_unload_func func; struct tep_plugin_list *list; @@ -438,7 +438,7 @@ tep_unload_plugins(struct tep_plugin_list *plugin_list, struct tep_handle *peven plugin_list = list->next; func = dlsym(list->handle, TEP_PLUGIN_UNLOADER_NAME); if (func) - func(pevent); + func(tep); dlclose(list->handle); free(list->name); free(list); diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index af2a1f3b7424..b887e7437d67 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -727,3 +727,52 @@ int kbuffer_start_of_data(struct kbuffer *kbuf) { return kbuf->start; } + +/** + * kbuffer_raw_get - get raw buffer info + * @kbuf: The kbuffer + * @subbuf: Start of mapped subbuffer + * @info: Info descriptor to fill in + * + * For debugging. This can return internals of the ring buffer. + * Expects to have info->next set to what it will read. + * The type, length and timestamp delta will be filled in, and + * @info->next will be updated to the next element. + * The @subbuf is used to know if the info is passed the end of + * data and NULL will be returned if it is. + */ +struct kbuffer_raw_info * +kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, struct kbuffer_raw_info *info) +{ + unsigned long long flags; + unsigned long long delta; + unsigned int type_len; + unsigned int size; + int start; + int length; + void *ptr = info->next; + + if (!kbuf || !subbuf) + return NULL; + + if (kbuf->flags & KBUFFER_FL_LONG_8) + start = 16; + else + start = 12; + + flags = read_long(kbuf, subbuf + 8); + size = (unsigned int)flags & COMMIT_MASK; + + if (ptr < subbuf || ptr >= subbuf + start + size) + return NULL; + + type_len = translate_data(kbuf, ptr, &ptr, &delta, &length); + + info->next = ptr + length; + + info->type = type_len; + info->delta = delta; + info->length = length; + + return info; +} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index 03dce757553f..ed4d697fc137 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -65,4 +65,17 @@ int kbuffer_subbuffer_size(struct kbuffer *kbuf); void kbuffer_set_old_format(struct kbuffer *kbuf); int kbuffer_start_of_data(struct kbuffer *kbuf); +/* Debugging */ + +struct kbuffer_raw_info { + int type; + int length; + unsigned long long delta; + void *next; +}; + +/* Read raw data */ +struct kbuffer_raw_info *kbuffer_raw_get(struct kbuffer *kbuf, void *subbuf, + struct kbuffer_raw_info *info); + #endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index cb5ce66dab6e..552592d153fb 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -154,7 +154,7 @@ add_filter_type(struct tep_event_filter *filter, int id) filter_type = &filter->event_filters[i]; filter_type->event_id = id; - filter_type->event = tep_find_event(filter->pevent, id); + filter_type->event = tep_find_event(filter->tep, id); filter_type->filter = NULL; filter->filters++; @@ -164,9 +164,9 @@ add_filter_type(struct tep_event_filter *filter, int id) /** * tep_filter_alloc - create a new event filter - * @pevent: The pevent that this filter is associated with + * @tep: The tep that this filter is associated with */ -struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent) +struct tep_event_filter *tep_filter_alloc(struct tep_handle *tep) { struct tep_event_filter *filter; @@ -175,8 +175,8 @@ struct tep_event_filter *tep_filter_alloc(struct tep_handle *pevent) return NULL; memset(filter, 0, sizeof(*filter)); - filter->pevent = pevent; - tep_ref(pevent); + filter->tep = tep; + tep_ref(tep); return filter; } @@ -256,7 +256,7 @@ static int event_match(struct tep_event *event, } static enum tep_errno -find_event(struct tep_handle *pevent, struct event_list **events, +find_event(struct tep_handle *tep, struct event_list **events, char *sys_name, char *event_name) { struct tep_event *event; @@ -299,8 +299,8 @@ find_event(struct tep_handle *pevent, struct event_list **events, } } - for (i = 0; i < pevent->nr_events; i++) { - event = pevent->events[i]; + for (i = 0; i < tep->nr_events; i++) { + event = tep->events[i]; if (event_match(event, sys_name ? &sreg : NULL, &ereg)) { match = 1; if (add_event(events, event) < 0) { @@ -1257,7 +1257,7 @@ static void filter_init_error_buf(struct tep_event_filter *filter) enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, const char *filter_str) { - struct tep_handle *pevent = filter->pevent; + struct tep_handle *tep = filter->tep; struct event_list *event; struct event_list *events = NULL; const char *filter_start; @@ -1313,7 +1313,7 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, } /* Find this event */ - ret = find_event(pevent, &events, strim(sys_name), strim(event_name)); + ret = find_event(tep, &events, strim(sys_name), strim(event_name)); if (ret < 0) { free_events(events); free(this_event); @@ -1334,7 +1334,7 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, if (ret < 0) rtn = ret; - if (ret >= 0 && pevent->test_filters) { + if (ret >= 0 && tep->test_filters) { char *test; test = tep_filter_make_string(filter, event->event->id); if (test) { @@ -1346,9 +1346,6 @@ enum tep_errno tep_filter_add_filter_str(struct tep_event_filter *filter, free_events(events); - if (rtn >= 0 && pevent->test_filters) - exit(0); - return rtn; } @@ -1380,7 +1377,7 @@ int tep_filter_strerror(struct tep_event_filter *filter, enum tep_errno err, return 0; } - return tep_strerror(filter->pevent, err, buf, buflen); + return tep_strerror(filter->tep, err, buf, buflen); } /** @@ -1443,7 +1440,7 @@ void tep_filter_reset(struct tep_event_filter *filter) void tep_filter_free(struct tep_event_filter *filter) { - tep_unref(filter->pevent); + tep_unref(filter->tep); tep_filter_reset(filter); @@ -1462,10 +1459,10 @@ static int copy_filter_type(struct tep_event_filter *filter, const char *name; char *str; - /* Can't assume that the pevent's are the same */ + /* Can't assume that the tep's are the same */ sys = filter_type->event->system; name = filter_type->event->name; - event = tep_find_event_by_name(filter->pevent, sys, name); + event = tep_find_event_by_name(filter->tep, sys, name); if (!event) return -1; @@ -1522,167 +1519,6 @@ int tep_filter_copy(struct tep_event_filter *dest, struct tep_event_filter *sour return ret; } - -/** - * tep_update_trivial - update the trivial filters with the given filter - * @dest - the filter to update - * @source - the filter as the source of the update - * @type - the type of trivial filter to update. - * - * Scan dest for trivial events matching @type to replace with the source. - * - * Returns 0 on success and -1 if there was a problem updating, but - * events may have still been updated on error. - */ -int tep_update_trivial(struct tep_event_filter *dest, struct tep_event_filter *source, - enum tep_filter_trivial_type type) -{ - struct tep_handle *src_pevent; - struct tep_handle *dest_pevent; - struct tep_event *event; - struct tep_filter_type *filter_type; - struct tep_filter_arg *arg; - char *str; - int i; - - src_pevent = source->pevent; - dest_pevent = dest->pevent; - - /* Do nothing if either of the filters has nothing to filter */ - if (!dest->filters || !source->filters) - return 0; - - for (i = 0; i < dest->filters; i++) { - filter_type = &dest->event_filters[i]; - arg = filter_type->filter; - if (arg->type != TEP_FILTER_ARG_BOOLEAN) - continue; - if ((arg->boolean.value && type == TEP_FILTER_TRIVIAL_FALSE) || - (!arg->boolean.value && type == TEP_FILTER_TRIVIAL_TRUE)) - continue; - - event = filter_type->event; - - if (src_pevent != dest_pevent) { - /* do a look up */ - event = tep_find_event_by_name(src_pevent, - event->system, - event->name); - if (!event) - return -1; - } - - str = tep_filter_make_string(source, event->id); - if (!str) - continue; - - /* Don't bother if the filter is trivial too */ - if (strcmp(str, "TRUE") != 0 && strcmp(str, "FALSE") != 0) - filter_event(dest, event, str, NULL); - free(str); - } - return 0; -} - -/** - * tep_filter_clear_trivial - clear TRUE and FALSE filters - * @filter: the filter to remove trivial filters from - * @type: remove only true, false, or both - * - * Removes filters that only contain a TRUE or FALES boolean arg. - * - * Returns 0 on success and -1 if there was a problem. - */ -int tep_filter_clear_trivial(struct tep_event_filter *filter, - enum tep_filter_trivial_type type) -{ - struct tep_filter_type *filter_type; - int count = 0; - int *ids = NULL; - int i; - - if (!filter->filters) - return 0; - - /* - * Two steps, first get all ids with trivial filters. - * then remove those ids. - */ - for (i = 0; i < filter->filters; i++) { - int *new_ids; - - filter_type = &filter->event_filters[i]; - if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN) - continue; - switch (type) { - case TEP_FILTER_TRIVIAL_FALSE: - if (filter_type->filter->boolean.value) - continue; - break; - case TEP_FILTER_TRIVIAL_TRUE: - if (!filter_type->filter->boolean.value) - continue; - default: - break; - } - - new_ids = realloc(ids, sizeof(*ids) * (count + 1)); - if (!new_ids) { - free(ids); - return -1; - } - - ids = new_ids; - ids[count++] = filter_type->event_id; - } - - if (!count) - return 0; - - for (i = 0; i < count; i++) - tep_filter_remove_event(filter, ids[i]); - - free(ids); - return 0; -} - -/** - * tep_filter_event_has_trivial - return true event contains trivial filter - * @filter: the filter with the information - * @event_id: the id of the event to test - * @type: trivial type to test for (TRUE, FALSE, EITHER) - * - * Returns 1 if the event contains a matching trivial type - * otherwise 0. - */ -int tep_filter_event_has_trivial(struct tep_event_filter *filter, - int event_id, - enum tep_filter_trivial_type type) -{ - struct tep_filter_type *filter_type; - - if (!filter->filters) - return 0; - - filter_type = find_filter_type(filter, event_id); - - if (!filter_type) - return 0; - - if (filter_type->filter->type != TEP_FILTER_ARG_BOOLEAN) - return 0; - - switch (type) { - case TEP_FILTER_TRIVIAL_FALSE: - return !filter_type->filter->boolean.value; - - case TEP_FILTER_TRIVIAL_TRUE: - return filter_type->filter->boolean.value; - default: - return 1; - } -} - static int test_filter(struct tep_event *event, struct tep_filter_arg *arg, struct tep_record *record, enum tep_errno *err); @@ -1692,8 +1528,8 @@ get_comm(struct tep_event *event, struct tep_record *record) const char *comm; int pid; - pid = tep_data_pid(event->pevent, record); - comm = tep_data_comm_from_pid(event->pevent, pid); + pid = tep_data_pid(event->tep, record); + comm = tep_data_comm_from_pid(event->tep, pid); return comm; } @@ -1861,7 +1697,7 @@ static int test_num(struct tep_event *event, struct tep_filter_arg *arg, static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record *record) { struct tep_event *event; - struct tep_handle *pevent; + struct tep_handle *tep; unsigned long long addr; const char *val = NULL; unsigned int size; @@ -1891,12 +1727,12 @@ static const char *get_field_str(struct tep_filter_arg *arg, struct tep_record * } else { event = arg->str.field->event; - pevent = event->pevent; + tep = event->tep; addr = get_value(event, arg->str.field, record); if (arg->str.field->flags & (TEP_FIELD_IS_POINTER | TEP_FIELD_IS_LONG)) /* convert to a kernel symbol */ - val = tep_find_function(pevent, addr); + val = tep_find_function(tep, addr); if (val == NULL) { /* just use the hex of the string name */ @@ -2036,7 +1872,7 @@ int tep_event_filtered(struct tep_event_filter *filter, int event_id) enum tep_errno tep_filter_match(struct tep_event_filter *filter, struct tep_record *record) { - struct tep_handle *pevent = filter->pevent; + struct tep_handle *tep = filter->tep; struct tep_filter_type *filter_type; int event_id; int ret; @@ -2047,7 +1883,7 @@ enum tep_errno tep_filter_match(struct tep_event_filter *filter, if (!filter->filters) return TEP_ERRNO__NO_FILTER; - event_id = tep_data_type(pevent, record); + event_id = tep_data_type(tep, record); filter_type = find_filter_type(filter, event_id); if (!filter_type) @@ -2409,14 +2245,6 @@ int tep_filter_compare(struct tep_event_filter *filter1, struct tep_event_filter break; if (filter_type1->filter->type != filter_type2->filter->type) break; - switch (filter_type1->filter->type) { - case TEP_FILTER_TRIVIAL_FALSE: - case TEP_FILTER_TRIVIAL_TRUE: - /* trivial types just need the type compared */ - continue; - default: - break; - } /* The best way to compare complex filters is with strings */ str1 = arg_to_str(filter1, filter_type1->filter); str2 = arg_to_str(filter2, filter_type2->filter); diff --git a/tools/lib/traceevent/parse-utils.c b/tools/lib/traceevent/parse-utils.c index 77e4ec6402dd..e99867111387 100644 --- a/tools/lib/traceevent/parse-utils.c +++ b/tools/lib/traceevent/parse-utils.c @@ -14,7 +14,7 @@ void __vwarning(const char *fmt, va_list ap) { if (errno) - perror("trace-cmd"); + perror("libtraceevent"); errno = 0; fprintf(stderr, " "); diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c index a51b366f47da..3d43b56a6c98 100644 --- a/tools/lib/traceevent/plugin_cfg80211.c +++ b/tools/lib/traceevent/plugin_cfg80211.c @@ -25,9 +25,9 @@ process___le16_to_cpup(struct trace_seq *s, unsigned long long *args) return val ? (long long) le16toh(*val) : 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_print_function(pevent, + tep_register_print_function(tep, process___le16_to_cpup, TEP_FUNC_ARG_INT, "__le16_to_cpup", @@ -36,8 +36,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_print_function(pevent, process___le16_to_cpup, + tep_unregister_print_function(tep, process___le16_to_cpup, "__le16_to_cpup"); } diff --git a/tools/lib/traceevent/plugin_function.c b/tools/lib/traceevent/plugin_function.c index a73eca34a8f9..7770fcb78e0f 100644 --- a/tools/lib/traceevent/plugin_function.c +++ b/tools/lib/traceevent/plugin_function.c @@ -126,7 +126,7 @@ static int add_and_get_index(const char *parent, const char *child, int cpu) static int function_handler(struct trace_seq *s, struct tep_record *record, struct tep_event *event, void *context) { - struct tep_handle *pevent = event->pevent; + struct tep_handle *tep = event->tep; unsigned long long function; unsigned long long pfunction; const char *func; @@ -136,12 +136,12 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, if (tep_get_field_val(s, event, "ip", record, &function, 1)) return trace_seq_putc(s, '!'); - func = tep_find_function(pevent, function); + func = tep_find_function(tep, function); if (tep_get_field_val(s, event, "parent_ip", record, &pfunction, 1)) return trace_seq_putc(s, '!'); - parent = tep_find_function(pevent, pfunction); + parent = tep_find_function(tep, pfunction); if (parent && ftrace_indent->set) index = add_and_get_index(parent, func, record->cpu); @@ -164,9 +164,9 @@ static int function_handler(struct trace_seq *s, struct tep_record *record, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_event_handler(pevent, -1, "ftrace", "function", + tep_register_event_handler(tep, -1, "ftrace", "function", function_handler, NULL); tep_plugin_add_options("ftrace", plugin_options); @@ -174,11 +174,11 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { int i, x; - tep_unregister_event_handler(pevent, -1, "ftrace", "function", + tep_unregister_event_handler(tep, -1, "ftrace", "function", function_handler, NULL); for (i = 0; i <= cpus; i++) { diff --git a/tools/lib/traceevent/plugin_hrtimer.c b/tools/lib/traceevent/plugin_hrtimer.c index 5db5e401275f..bb434e0ed03a 100644 --- a/tools/lib/traceevent/plugin_hrtimer.c +++ b/tools/lib/traceevent/plugin_hrtimer.c @@ -67,23 +67,23 @@ static int timer_start_handler(struct trace_seq *s, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_event_handler(pevent, -1, + tep_register_event_handler(tep, -1, "timer", "hrtimer_expire_entry", timer_expire_handler, NULL); - tep_register_event_handler(pevent, -1, "timer", "hrtimer_start", + tep_register_event_handler(tep, -1, "timer", "hrtimer_start", timer_start_handler, NULL); return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_event_handler(pevent, -1, + tep_unregister_event_handler(tep, -1, "timer", "hrtimer_expire_entry", timer_expire_handler, NULL); - tep_unregister_event_handler(pevent, -1, "timer", "hrtimer_start", + tep_unregister_event_handler(tep, -1, "timer", "hrtimer_start", timer_start_handler, NULL); } diff --git a/tools/lib/traceevent/plugin_jbd2.c b/tools/lib/traceevent/plugin_jbd2.c index a5e34135dd6a..04fc125f38cb 100644 --- a/tools/lib/traceevent/plugin_jbd2.c +++ b/tools/lib/traceevent/plugin_jbd2.c @@ -48,16 +48,16 @@ process_jiffies_to_msecs(struct trace_seq *s, unsigned long long *args) return jiffies; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_print_function(pevent, + tep_register_print_function(tep, process_jbd2_dev_to_name, TEP_FUNC_ARG_STRING, "jbd2_dev_to_name", TEP_FUNC_ARG_INT, TEP_FUNC_ARG_VOID); - tep_register_print_function(pevent, + tep_register_print_function(tep, process_jiffies_to_msecs, TEP_FUNC_ARG_LONG, "jiffies_to_msecs", @@ -66,11 +66,11 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_print_function(pevent, process_jbd2_dev_to_name, + tep_unregister_print_function(tep, process_jbd2_dev_to_name, "jbd2_dev_to_name"); - tep_unregister_print_function(pevent, process_jiffies_to_msecs, + tep_unregister_print_function(tep, process_jiffies_to_msecs, "jiffies_to_msecs"); } diff --git a/tools/lib/traceevent/plugin_kmem.c b/tools/lib/traceevent/plugin_kmem.c index 0e3c601f9ed1..edaec5d962c3 100644 --- a/tools/lib/traceevent/plugin_kmem.c +++ b/tools/lib/traceevent/plugin_kmem.c @@ -39,57 +39,57 @@ static int call_site_handler(struct trace_seq *s, struct tep_record *record, if (tep_read_number_field(field, data, &val)) return 1; - func = tep_find_function(event->pevent, val); + func = tep_find_function(event->tep, val); if (!func) return 1; - addr = tep_find_function_address(event->pevent, val); + addr = tep_find_function_address(event->tep, val); trace_seq_printf(s, "(%s+0x%x) ", func, (int)(val - addr)); return 1; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_event_handler(pevent, -1, "kmem", "kfree", + tep_register_event_handler(tep, -1, "kmem", "kfree", call_site_handler, NULL); - tep_register_event_handler(pevent, -1, "kmem", "kmalloc", + tep_register_event_handler(tep, -1, "kmem", "kmalloc", call_site_handler, NULL); - tep_register_event_handler(pevent, -1, "kmem", "kmalloc_node", + tep_register_event_handler(tep, -1, "kmem", "kmalloc_node", call_site_handler, NULL); - tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_alloc", + tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc", call_site_handler, NULL); - tep_register_event_handler(pevent, -1, "kmem", + tep_register_event_handler(tep, -1, "kmem", "kmem_cache_alloc_node", call_site_handler, NULL); - tep_register_event_handler(pevent, -1, "kmem", "kmem_cache_free", + tep_register_event_handler(tep, -1, "kmem", "kmem_cache_free", call_site_handler, NULL); return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_event_handler(pevent, -1, "kmem", "kfree", + tep_unregister_event_handler(tep, -1, "kmem", "kfree", call_site_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc", + tep_unregister_event_handler(tep, -1, "kmem", "kmalloc", call_site_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kmem", "kmalloc_node", + tep_unregister_event_handler(tep, -1, "kmem", "kmalloc_node", call_site_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_alloc", + tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc", call_site_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kmem", + tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_alloc_node", call_site_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kmem", "kmem_cache_free", + tep_unregister_event_handler(tep, -1, "kmem", "kmem_cache_free", call_site_handler, NULL); } diff --git a/tools/lib/traceevent/plugin_kvm.c b/tools/lib/traceevent/plugin_kvm.c index 754050eea467..c8e623065a7e 100644 --- a/tools/lib/traceevent/plugin_kvm.c +++ b/tools/lib/traceevent/plugin_kvm.c @@ -389,8 +389,8 @@ static int kvm_mmu_print_role(struct trace_seq *s, struct tep_record *record, * We can only use the structure if file is of the same * endianness. */ - if (tep_is_file_bigendian(event->pevent) == - tep_is_host_bigendian(event->pevent)) { + if (tep_is_file_bigendian(event->tep) == + tep_is_local_bigendian(event->tep)) { trace_seq_printf(s, "%u q%u%s %s%s %spae %snxe %swp%s%s%s", role.level, @@ -445,40 +445,40 @@ process_is_writable_pte(struct trace_seq *s, unsigned long long *args) return pte & PT_WRITABLE_MASK; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { init_disassembler(); - tep_register_event_handler(pevent, -1, "kvm", "kvm_exit", + tep_register_event_handler(tep, -1, "kvm", "kvm_exit", kvm_exit_handler, NULL); - tep_register_event_handler(pevent, -1, "kvm", "kvm_emulate_insn", + tep_register_event_handler(tep, -1, "kvm", "kvm_emulate_insn", kvm_emulate_insn_handler, NULL); - tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit", + tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", kvm_nested_vmexit_handler, NULL); - tep_register_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject", + tep_register_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", kvm_nested_vmexit_inject_handler, NULL); - tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page", + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", kvm_mmu_get_page_handler, NULL); - tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page", + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", kvm_mmu_print_role, NULL); - tep_register_event_handler(pevent, -1, + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_unsync_page", kvm_mmu_print_role, NULL); - tep_register_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page", + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", kvm_mmu_print_role, NULL); - tep_register_event_handler(pevent, -1, "kvmmmu", + tep_register_event_handler(tep, -1, "kvmmmu", "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, NULL); - tep_register_print_function(pevent, + tep_register_print_function(tep, process_is_writable_pte, TEP_FUNC_ARG_INT, "is_writable_pte", @@ -487,37 +487,37 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_event_handler(pevent, -1, "kvm", "kvm_exit", + tep_unregister_event_handler(tep, -1, "kvm", "kvm_exit", kvm_exit_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kvm", "kvm_emulate_insn", + tep_unregister_event_handler(tep, -1, "kvm", "kvm_emulate_insn", kvm_emulate_insn_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit", + tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit", kvm_nested_vmexit_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kvm", "kvm_nested_vmexit_inject", + tep_unregister_event_handler(tep, -1, "kvm", "kvm_nested_vmexit_inject", kvm_nested_vmexit_inject_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_get_page", + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_get_page", kvm_mmu_get_page_handler, NULL); - tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_sync_page", + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_sync_page", kvm_mmu_print_role, NULL); - tep_unregister_event_handler(pevent, -1, + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_unsync_page", kvm_mmu_print_role, NULL); - tep_unregister_event_handler(pevent, -1, "kvmmmu", "kvm_mmu_zap_page", + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_zap_page", kvm_mmu_print_role, NULL); - tep_unregister_event_handler(pevent, -1, "kvmmmu", + tep_unregister_event_handler(tep, -1, "kvmmmu", "kvm_mmu_prepare_zap_page", kvm_mmu_print_role, NULL); - tep_unregister_print_function(pevent, process_is_writable_pte, + tep_unregister_print_function(tep, process_is_writable_pte, "is_writable_pte"); } diff --git a/tools/lib/traceevent/plugin_mac80211.c b/tools/lib/traceevent/plugin_mac80211.c index e38b9477aad2..884303c26b5c 100644 --- a/tools/lib/traceevent/plugin_mac80211.c +++ b/tools/lib/traceevent/plugin_mac80211.c @@ -87,17 +87,17 @@ static int drv_bss_info_changed(struct trace_seq *s, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_event_handler(pevent, -1, "mac80211", + tep_register_event_handler(tep, -1, "mac80211", "drv_bss_info_changed", drv_bss_info_changed, NULL); return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_event_handler(pevent, -1, "mac80211", + tep_unregister_event_handler(tep, -1, "mac80211", "drv_bss_info_changed", drv_bss_info_changed, NULL); } diff --git a/tools/lib/traceevent/plugin_sched_switch.c b/tools/lib/traceevent/plugin_sched_switch.c index 834c9e378ff8..957389a0ff7a 100644 --- a/tools/lib/traceevent/plugin_sched_switch.c +++ b/tools/lib/traceevent/plugin_sched_switch.c @@ -62,7 +62,7 @@ static void write_and_save_comm(struct tep_format_field *field, comm = &s->buffer[len]; /* Help out the comm to ids. This will handle dups */ - tep_register_comm(field->event->pevent, comm, pid); + tep_register_comm(field->event->tep, comm, pid); } static int sched_wakeup_handler(struct trace_seq *s, @@ -135,27 +135,27 @@ static int sched_switch_handler(struct trace_seq *s, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_event_handler(pevent, -1, "sched", "sched_switch", + tep_register_event_handler(tep, -1, "sched", "sched_switch", sched_switch_handler, NULL); - tep_register_event_handler(pevent, -1, "sched", "sched_wakeup", + tep_register_event_handler(tep, -1, "sched", "sched_wakeup", sched_wakeup_handler, NULL); - tep_register_event_handler(pevent, -1, "sched", "sched_wakeup_new", + tep_register_event_handler(tep, -1, "sched", "sched_wakeup_new", sched_wakeup_handler, NULL); return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_event_handler(pevent, -1, "sched", "sched_switch", + tep_unregister_event_handler(tep, -1, "sched", "sched_switch", sched_switch_handler, NULL); - tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup", + tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup", sched_wakeup_handler, NULL); - tep_unregister_event_handler(pevent, -1, "sched", "sched_wakeup_new", + tep_unregister_event_handler(tep, -1, "sched", "sched_wakeup_new", sched_wakeup_handler, NULL); } diff --git a/tools/lib/traceevent/plugin_scsi.c b/tools/lib/traceevent/plugin_scsi.c index 4eba25cc1431..5d0387a4b65a 100644 --- a/tools/lib/traceevent/plugin_scsi.c +++ b/tools/lib/traceevent/plugin_scsi.c @@ -414,9 +414,9 @@ unsigned long long process_scsi_trace_parse_cdb(struct trace_seq *s, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_print_function(pevent, + tep_register_print_function(tep, process_scsi_trace_parse_cdb, TEP_FUNC_ARG_STRING, "scsi_trace_parse_cdb", @@ -427,8 +427,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_print_function(pevent, process_scsi_trace_parse_cdb, + tep_unregister_print_function(tep, process_scsi_trace_parse_cdb, "scsi_trace_parse_cdb"); } diff --git a/tools/lib/traceevent/plugin_xen.c b/tools/lib/traceevent/plugin_xen.c index bc0496e4c296..993b208d0323 100644 --- a/tools/lib/traceevent/plugin_xen.c +++ b/tools/lib/traceevent/plugin_xen.c @@ -120,9 +120,9 @@ unsigned long long process_xen_hypercall_name(struct trace_seq *s, return 0; } -int TEP_PLUGIN_LOADER(struct tep_handle *pevent) +int TEP_PLUGIN_LOADER(struct tep_handle *tep) { - tep_register_print_function(pevent, + tep_register_print_function(tep, process_xen_hypercall_name, TEP_FUNC_ARG_STRING, "xen_hypercall_name", @@ -131,8 +131,8 @@ int TEP_PLUGIN_LOADER(struct tep_handle *pevent) return 0; } -void TEP_PLUGIN_UNLOADER(struct tep_handle *pevent) +void TEP_PLUGIN_UNLOADER(struct tep_handle *tep) { - tep_unregister_print_function(pevent, process_xen_hypercall_name, + tep_unregister_print_function(tep, process_xen_hypercall_name, "xen_hypercall_name"); } diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index 8ff1d55954d1..8d5ecd2bf877 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -100,7 +100,8 @@ static void expand_buffer(struct trace_seq *s) * @fmt: printf format string * * It returns 0 if the trace oversizes the buffer's free - * space, 1 otherwise. + * space, the number of characters printed, or a negative + * value in case of an error. * * The tracer may use either sequence operations or its own * copy to user routines. To simplify formating of a trace @@ -129,9 +130,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) goto try_again; } - s->len += ret; + if (ret > 0) + s->len += ret; - return 1; + return ret; } /** @@ -139,6 +141,10 @@ trace_seq_printf(struct trace_seq *s, const char *fmt, ...) * @s: trace sequence descriptor * @fmt: printf format string * + * It returns 0 if the trace oversizes the buffer's free + * space, the number of characters printed, or a negative + * value in case of an error. + * * * The tracer may use either sequence operations or its own * copy to user routines. To simplify formating of a trace * trace_seq_printf is used to store strings into a special @@ -163,9 +169,10 @@ trace_seq_vprintf(struct trace_seq *s, const char *fmt, va_list args) goto try_again; } - s->len += ret; + if (ret > 0) + s->len += ret; - return len; + return ret; } /** |