diff options
Diffstat (limited to 'tools/lib/bpf')
| -rw-r--r-- | tools/lib/bpf/.gitignore | 5 | ||||
| -rw-r--r-- | tools/lib/bpf/Makefile | 62 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.c | 11 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf.h | 10 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_core_read.h | 263 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_endian.h | 72 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_helpers.h | 47 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_prog_linfo.c | 14 | ||||
| -rw-r--r-- | tools/lib/bpf/bpf_tracing.h | 195 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.c | 97 | ||||
| -rw-r--r-- | tools/lib/bpf/btf.h | 6 | ||||
| -rw-r--r-- | tools/lib/bpf/btf_dump.c | 37 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.c | 2143 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.h | 99 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf.map | 18 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_internal.h | 63 | ||||
| -rw-r--r-- | tools/lib/bpf/libbpf_probes.c | 1 | ||||
| -rw-r--r-- | tools/lib/bpf/netlink.c | 87 | ||||
| -rw-r--r-- | tools/lib/bpf/nlattr.c | 10 | ||||
| -rw-r--r-- | tools/lib/bpf/test_libbpf.cpp | 18 | ||||
| -rw-r--r-- | tools/lib/bpf/xsk.c | 173 | 
21 files changed, 2481 insertions, 950 deletions
diff --git a/tools/lib/bpf/.gitignore b/tools/lib/bpf/.gitignore index d9e9dec04605..e97c2ebcf447 100644 --- a/tools/lib/bpf/.gitignore +++ b/tools/lib/bpf/.gitignore @@ -1,5 +1,8 @@  libbpf_version.h  libbpf.pc  FEATURE-DUMP.libbpf -test_libbpf  libbpf.so.* +TAGS +tags +cscope.* +/bpf_helper_defs.h diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile index 56ce6292071b..97830e46d1a0 100644 --- a/tools/lib/bpf/Makefile +++ b/tools/lib/bpf/Makefile @@ -56,7 +56,7 @@ ifndef VERBOSE  endif  FEATURE_USER = .libbpf -FEATURE_TESTS = libelf libelf-mmap bpf reallocarray cxx +FEATURE_TESTS = libelf libelf-mmap bpf reallocarray  FEATURE_DISPLAY = libelf bpf  INCLUDES = -I. -I$(srctree)/tools/include -I$(srctree)/tools/arch/$(ARCH)/include/uapi -I$(srctree)/tools/include/uapi @@ -138,34 +138,29 @@ STATIC_OBJDIR	:= $(OUTPUT)staticobjs/  BPF_IN_SHARED	:= $(SHARED_OBJDIR)libbpf-in.o  BPF_IN_STATIC	:= $(STATIC_OBJDIR)libbpf-in.o  VERSION_SCRIPT	:= libbpf.map +BPF_HELPER_DEFS	:= $(OUTPUT)bpf_helper_defs.h  LIB_TARGET	:= $(addprefix $(OUTPUT),$(LIB_TARGET))  LIB_FILE	:= $(addprefix $(OUTPUT),$(LIB_FILE))  PC_FILE		:= $(addprefix $(OUTPUT),$(PC_FILE)) +TAGS_PROG := $(if $(shell which etags 2>/dev/null),etags,ctags) +  GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \  			   cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' | \ -			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}' | \ +			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \  			   sort -u | wc -l)  VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \  			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)  CMD_TARGETS = $(LIB_TARGET) $(PC_FILE) -CXX_TEST_TARGET = $(OUTPUT)test_libbpf - -ifeq ($(feature-cxx), 1) -	CMD_TARGETS += $(CXX_TEST_TARGET) -endif - -TARGETS = $(CMD_TARGETS) -  all: fixdep  	$(Q)$(MAKE) all_cmd  all_cmd: $(CMD_TARGETS) check -$(BPF_IN_SHARED): force elfdep bpfdep +$(BPF_IN_SHARED): force elfdep bpfdep $(BPF_HELPER_DEFS)  	@(test -f ../../include/uapi/linux/bpf.h -a -f ../../../include/uapi/linux/bpf.h && ( \  	(diff -B ../../include/uapi/linux/bpf.h ../../../include/uapi/linux/bpf.h >/dev/null) || \  	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/bpf.h' differs from latest version at 'include/uapi/linux/bpf.h'" >&2 )) || true @@ -183,23 +178,25 @@ $(BPF_IN_SHARED): force elfdep bpfdep  	echo "Warning: Kernel ABI header at 'tools/include/uapi/linux/if_xdp.h' differs from latest version at 'include/uapi/linux/if_xdp.h'" >&2 )) || true  	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(SHARED_OBJDIR) CFLAGS="$(CFLAGS) $(SHLIB_FLAGS)" -$(BPF_IN_STATIC): force elfdep bpfdep +$(BPF_IN_STATIC): force elfdep bpfdep $(BPF_HELPER_DEFS)  	$(Q)$(MAKE) $(build)=libbpf OUTPUT=$(STATIC_OBJDIR) +$(BPF_HELPER_DEFS): $(srctree)/tools/include/uapi/linux/bpf.h +	$(Q)$(srctree)/scripts/bpf_helpers_doc.py --header 		\ +		--file $(srctree)/tools/include/uapi/linux/bpf.h > $(BPF_HELPER_DEFS) +  $(OUTPUT)libbpf.so: $(OUTPUT)libbpf.so.$(LIBBPF_VERSION)  $(OUTPUT)libbpf.so.$(LIBBPF_VERSION): $(BPF_IN_SHARED) -	$(QUIET_LINK)$(CC) --shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ -				    -Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@ +	$(QUIET_LINK)$(CC) $(LDFLAGS) \ +		--shared -Wl,-soname,libbpf.so.$(LIBBPF_MAJOR_VERSION) \ +		-Wl,--version-script=$(VERSION_SCRIPT) $^ -lelf -o $@  	@ln -sf $(@F) $(OUTPUT)libbpf.so  	@ln -sf $(@F) $(OUTPUT)libbpf.so.$(LIBBPF_MAJOR_VERSION)  $(OUTPUT)libbpf.a: $(BPF_IN_STATIC)  	$(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ -$(OUTPUT)test_libbpf: test_libbpf.cpp $(OUTPUT)libbpf.a -	$(QUIET_LINK)$(CXX) $(INCLUDES) $^ -lelf -o $@ -  $(OUTPUT)libbpf.pc:  	$(QUIET_GEN)sed -e "s|@PREFIX@|$(prefix)|" \  		-e "s|@LIBDIR@|$(libdir_SQ)|" \ @@ -215,9 +212,9 @@ check_abi: $(OUTPUT)libbpf.so  		     "versioned symbols in $^ ($(VERSIONED_SYM_COUNT))." \  		     "Please make sure all LIBBPF_API symbols are"	 \  		     "versioned in $(VERSION_SCRIPT)." >&2;		 \ -		readelf -s --wide $(OUTPUT)libbpf-in.o |		 \ +		readelf -s --wide $(BPF_IN_SHARED) |			 \  		    cut -d "@" -f1 | sed 's/_v[0-9]_[0-9]_[0-9].*//' |	 \ -		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$8}'|   \ +		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \  		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \  		readelf -s --wide $(OUTPUT)libbpf.so |			 \  		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \ @@ -247,13 +244,18 @@ install_lib: all_cmd  		$(call do_install_mkdir,$(libdir_SQ)); \  		cp -fpR $(LIB_FILE) $(DESTDIR)$(libdir_SQ) -install_headers: +install_headers: $(BPF_HELPER_DEFS)  	$(call QUIET_INSTALL, headers) \  		$(call do_install,bpf.h,$(prefix)/include/bpf,644); \  		$(call do_install,libbpf.h,$(prefix)/include/bpf,644); \  		$(call do_install,btf.h,$(prefix)/include/bpf,644); \  		$(call do_install,libbpf_util.h,$(prefix)/include/bpf,644); \ -		$(call do_install,xsk.h,$(prefix)/include/bpf,644); +		$(call do_install,xsk.h,$(prefix)/include/bpf,644); \ +		$(call do_install,bpf_helpers.h,$(prefix)/include/bpf,644); \ +		$(call do_install,$(BPF_HELPER_DEFS),$(prefix)/include/bpf,644); \ +		$(call do_install,bpf_tracing.h,$(prefix)/include/bpf,644); \ +		$(call do_install,bpf_endian.h,$(prefix)/include/bpf,644); \ +		$(call do_install,bpf_core_read.h,$(prefix)/include/bpf,644);  install_pkgconfig: $(PC_FILE)  	$(call QUIET_INSTALL, $(PC_FILE)) \ @@ -268,14 +270,15 @@ config-clean:  	$(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null  clean: -	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(TARGETS) $(CXX_TEST_TARGET) \ +	$(call QUIET_CLEAN, libbpf) $(RM) -rf $(CMD_TARGETS) \  		*.o *~ *.a *.so *.so.$(LIBBPF_MAJOR_VERSION) .*.d .*.cmd \ -		*.pc LIBBPF-CFLAGS $(SHARED_OBJDIR) $(STATIC_OBJDIR) +		*.pc LIBBPF-CFLAGS $(BPF_HELPER_DEFS) \ +		$(SHARED_OBJDIR) $(STATIC_OBJDIR)  	$(call QUIET_CLEAN, core-gen) $(RM) $(OUTPUT)FEATURE-DUMP.libbpf -PHONY += force elfdep bpfdep +PHONY += force elfdep bpfdep cscope tags  force:  elfdep: @@ -284,6 +287,17 @@ elfdep:  bpfdep:  	@if [ "$(feature-bpf)" != "1" ]; then echo "BPF API too old"; exit 1 ; fi +cscope: +	ls *.c *.h > cscope.files +	cscope -b -q -I $(srctree)/include -f cscope.out + +tags: +	rm -f TAGS tags +	ls *.c *.h | xargs $(TAGS_PROG) -a +  # Declare the contents of the .PHONY variable as phony.  We keep that  # information in a variable so we can use it in if_changed and friends.  .PHONY: $(PHONY) + +# Delete partially updated (corrupted) files on error +.DELETE_ON_ERROR: diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c index cbb933532981..98596e15390f 100644 --- a/tools/lib/bpf/bpf.c +++ b/tools/lib/bpf/bpf.c @@ -189,7 +189,7 @@ static void *  alloc_zero_tailing_info(const void *orecord, __u32 cnt,  			__u32 actual_rec_size, __u32 expected_rec_size)  { -	__u64 info_len = actual_rec_size * cnt; +	__u64 info_len = (__u64)actual_rec_size * cnt;  	void *info, *nrecord;  	int i; @@ -228,6 +228,13 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,  	memset(&attr, 0, sizeof(attr));  	attr.prog_type = load_attr->prog_type;  	attr.expected_attach_type = load_attr->expected_attach_type; +	if (attr.prog_type == BPF_PROG_TYPE_TRACING) { +		attr.attach_btf_id = load_attr->attach_btf_id; +		attr.attach_prog_fd = load_attr->attach_prog_fd; +	} else { +		attr.prog_ifindex = load_attr->prog_ifindex; +		attr.kern_version = load_attr->kern_version; +	}  	attr.insn_cnt = (__u32)load_attr->insns_cnt;  	attr.insns = ptr_to_u64(load_attr->insns);  	attr.license = ptr_to_u64(load_attr->license); @@ -241,8 +248,6 @@ int bpf_load_program_xattr(const struct bpf_load_program_attr *load_attr,  		attr.log_size = 0;  	} -	attr.kern_version = load_attr->kern_version; -	attr.prog_ifindex = load_attr->prog_ifindex;  	attr.prog_btf_fd = load_attr->prog_btf_fd;  	attr.func_info_rec_size = load_attr->func_info_rec_size;  	attr.func_info_cnt = load_attr->func_info_cnt; diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h index 0db01334740f..3c791fa8e68e 100644 --- a/tools/lib/bpf/bpf.h +++ b/tools/lib/bpf/bpf.h @@ -77,8 +77,14 @@ struct bpf_load_program_attr {  	const struct bpf_insn *insns;  	size_t insns_cnt;  	const char *license; -	__u32 kern_version; -	__u32 prog_ifindex; +	union { +		__u32 kern_version; +		__u32 attach_prog_fd; +	}; +	union { +		__u32 prog_ifindex; +		__u32 attach_btf_id; +	};  	__u32 prog_btf_fd;  	__u32 func_info_rec_size;  	const void *func_info; diff --git a/tools/lib/bpf/bpf_core_read.h b/tools/lib/bpf/bpf_core_read.h new file mode 100644 index 000000000000..7009dc90e012 --- /dev/null +++ b/tools/lib/bpf/bpf_core_read.h @@ -0,0 +1,263 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_CORE_READ_H__ +#define __BPF_CORE_READ_H__ + +/* + * enum bpf_field_info_kind is passed as a second argument into + * __builtin_preserve_field_info() built-in to get a specific aspect of + * a field, captured as a first argument. __builtin_preserve_field_info(field, + * info_kind) returns __u32 integer and produces BTF field relocation, which + * is understood and processed by libbpf during BPF object loading. See + * selftests/bpf for examples. + */ +enum bpf_field_info_kind { +	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ +	BPF_FIELD_BYTE_SIZE = 1, +	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ +	BPF_FIELD_SIGNED = 3, +	BPF_FIELD_LSHIFT_U64 = 4, +	BPF_FIELD_RSHIFT_U64 = 5, +}; + +#define __CORE_RELO(src, field, info)					      \ +	__builtin_preserve_field_info((src)->field, BPF_FIELD_##info) + +#if __BYTE_ORDER == __LITTLE_ENDIAN +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \ +	bpf_probe_read((void *)dst,					      \ +		       __CORE_RELO(src, fld, BYTE_SIZE),		      \ +		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#else +/* semantics of LSHIFT_64 assumes loading values into low-ordered bytes, so + * for big-endian we need to adjust destination pointer accordingly, based on + * field byte size + */ +#define __CORE_BITFIELD_PROBE_READ(dst, src, fld)			      \ +	bpf_probe_read((void *)dst + (8 - __CORE_RELO(src, fld, BYTE_SIZE)),  \ +		       __CORE_RELO(src, fld, BYTE_SIZE),		      \ +		       (const void *)src + __CORE_RELO(src, fld, BYTE_OFFSET)) +#endif + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * All this is done in relocatable manner, so bitfield changes such as + * signedness, bit size, offset changes, this will be handled automatically. + * This version of macro is using bpf_probe_read() to read underlying integer + * storage. Macro functions as an expression and its return type is + * bpf_probe_read()'s return value: 0, on success, <0 on error. + */ +#define BPF_CORE_READ_BITFIELD_PROBED(s, field) ({			      \ +	unsigned long long val = 0;					      \ +									      \ +	__CORE_BITFIELD_PROBE_READ(&val, s, field);			      \ +	val <<= __CORE_RELO(s, field, LSHIFT_U64);			      \ +	if (__CORE_RELO(s, field, SIGNED))				      \ +		val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64);  \ +	else								      \ +		val = val >> __CORE_RELO(s, field, RSHIFT_U64);		      \ +	val;								      \ +}) + +/* + * Extract bitfield, identified by s->field, and return its value as u64. + * This version of macro is using direct memory reads and should be used from + * BPF program types that support such functionality (e.g., typed raw + * tracepoints). + */ +#define BPF_CORE_READ_BITFIELD(s, field) ({				      \ +	const void *p = (const void *)s + __CORE_RELO(s, field, BYTE_OFFSET); \ +	unsigned long long val;						      \ +									      \ +	switch (__CORE_RELO(s, field, BYTE_SIZE)) {			      \ +	case 1: val = *(const unsigned char *)p;			      \ +	case 2: val = *(const unsigned short *)p;			      \ +	case 4: val = *(const unsigned int *)p;				      \ +	case 8: val = *(const unsigned long long *)p;			      \ +	}								      \ +	val <<= __CORE_RELO(s, field, LSHIFT_U64);			      \ +	if (__CORE_RELO(s, field, SIGNED))				      \ +		val = ((long long)val) >> __CORE_RELO(s, field, RSHIFT_U64);  \ +	else								      \ +		val = val >> __CORE_RELO(s, field, RSHIFT_U64);		      \ +	val;								      \ +}) + +/* + * Convenience macro to check that field actually exists in target kernel's. + * Returns: + *    1, if matching field is present in target kernel; + *    0, if no matching field found. + */ +#define bpf_core_field_exists(field)					    \ +	__builtin_preserve_field_info(field, BPF_FIELD_EXISTS) + +/* + * Convenience macro to get byte size of a field. Works for integers, + * struct/unions, pointers, arrays, and enums. + */ +#define bpf_core_field_size(field)					    \ +	__builtin_preserve_field_info(field, BPF_FIELD_BYTE_SIZE) + +/* + * bpf_core_read() abstracts away bpf_probe_read() call and captures offset + * relocation for source address using __builtin_preserve_access_index() + * built-in, provided by Clang. + * + * __builtin_preserve_access_index() takes as an argument an expression of + * taking an address of a field within struct/union. It makes compiler emit + * a relocation, which records BTF type ID describing root struct/union and an + * accessor string which describes exact embedded field that was used to take + * an address. See detailed description of this relocation format and + * semantics in comments to struct bpf_field_reloc in libbpf_internal.h. + * + * This relocation allows libbpf to adjust BPF instruction to use correct + * actual field offset, based on target kernel BTF type that matches original + * (local) BTF, used to record relocation. + */ +#define bpf_core_read(dst, sz, src)					    \ +	bpf_probe_read(dst, sz,						    \ +		       (const void *)__builtin_preserve_access_index(src)) + +/* + * bpf_core_read_str() is a thin wrapper around bpf_probe_read_str() + * additionally emitting BPF CO-RE field relocation for specified source + * argument. + */ +#define bpf_core_read_str(dst, sz, src)					    \ +	bpf_probe_read_str(dst, sz,					    \ +			   (const void *)__builtin_preserve_access_index(src)) + +#define ___concat(a, b) a ## b +#define ___apply(fn, n) ___concat(fn, n) +#define ___nth(_1, _2, _3, _4, _5, _6, _7, _8, _9, _10, __11, N, ...) N + +/* + * return number of provided arguments; used for switch-based variadic macro + * definitions (see ___last, ___arrow, etc below) + */ +#define ___narg(...) ___nth(_, ##__VA_ARGS__, 10, 9, 8, 7, 6, 5, 4, 3, 2, 1, 0) +/* + * return 0 if no arguments are passed, N - otherwise; used for + * recursively-defined macros to specify termination (0) case, and generic + * (N) case (e.g., ___read_ptrs, ___core_read) + */ +#define ___empty(...) ___nth(_, ##__VA_ARGS__, N, N, N, N, N, N, N, N, N, N, 0) + +#define ___last1(x) x +#define ___last2(a, x) x +#define ___last3(a, b, x) x +#define ___last4(a, b, c, x) x +#define ___last5(a, b, c, d, x) x +#define ___last6(a, b, c, d, e, x) x +#define ___last7(a, b, c, d, e, f, x) x +#define ___last8(a, b, c, d, e, f, g, x) x +#define ___last9(a, b, c, d, e, f, g, h, x) x +#define ___last10(a, b, c, d, e, f, g, h, i, x) x +#define ___last(...) ___apply(___last, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___nolast2(a, _) a +#define ___nolast3(a, b, _) a, b +#define ___nolast4(a, b, c, _) a, b, c +#define ___nolast5(a, b, c, d, _) a, b, c, d +#define ___nolast6(a, b, c, d, e, _) a, b, c, d, e +#define ___nolast7(a, b, c, d, e, f, _) a, b, c, d, e, f +#define ___nolast8(a, b, c, d, e, f, g, _) a, b, c, d, e, f, g +#define ___nolast9(a, b, c, d, e, f, g, h, _) a, b, c, d, e, f, g, h +#define ___nolast10(a, b, c, d, e, f, g, h, i, _) a, b, c, d, e, f, g, h, i +#define ___nolast(...) ___apply(___nolast, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___arrow1(a) a +#define ___arrow2(a, b) a->b +#define ___arrow3(a, b, c) a->b->c +#define ___arrow4(a, b, c, d) a->b->c->d +#define ___arrow5(a, b, c, d, e) a->b->c->d->e +#define ___arrow6(a, b, c, d, e, f) a->b->c->d->e->f +#define ___arrow7(a, b, c, d, e, f, g) a->b->c->d->e->f->g +#define ___arrow8(a, b, c, d, e, f, g, h) a->b->c->d->e->f->g->h +#define ___arrow9(a, b, c, d, e, f, g, h, i) a->b->c->d->e->f->g->h->i +#define ___arrow10(a, b, c, d, e, f, g, h, i, j) a->b->c->d->e->f->g->h->i->j +#define ___arrow(...) ___apply(___arrow, ___narg(__VA_ARGS__))(__VA_ARGS__) + +#define ___type(...) typeof(___arrow(__VA_ARGS__)) + +#define ___read(read_fn, dst, src_type, src, accessor)			    \ +	read_fn((void *)(dst), sizeof(*(dst)), &((src_type)(src))->accessor) + +/* "recursively" read a sequence of inner pointers using local __t var */ +#define ___rd_first(src, a) ___read(bpf_core_read, &__t, ___type(src), src, a); +#define ___rd_last(...)							    \ +	___read(bpf_core_read, &__t,					    \ +		___type(___nolast(__VA_ARGS__)), __t, ___last(__VA_ARGS__)); +#define ___rd_p1(...) const void *__t; ___rd_first(__VA_ARGS__) +#define ___rd_p2(...) ___rd_p1(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p3(...) ___rd_p2(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p4(...) ___rd_p3(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p5(...) ___rd_p4(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p6(...) ___rd_p5(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p7(...) ___rd_p6(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p8(...) ___rd_p7(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___rd_p9(...) ___rd_p8(___nolast(__VA_ARGS__)) ___rd_last(__VA_ARGS__) +#define ___read_ptrs(src, ...)						    \ +	___apply(___rd_p, ___narg(__VA_ARGS__))(src, __VA_ARGS__) + +#define ___core_read0(fn, dst, src, a)					    \ +	___read(fn, dst, ___type(src), src, a); +#define ___core_readN(fn, dst, src, ...)				    \ +	___read_ptrs(src, ___nolast(__VA_ARGS__))			    \ +	___read(fn, dst, ___type(src, ___nolast(__VA_ARGS__)), __t,	    \ +		___last(__VA_ARGS__)); +#define ___core_read(fn, dst, src, a, ...)				    \ +	___apply(___core_read, ___empty(__VA_ARGS__))(fn, dst,		    \ +						      src, a, ##__VA_ARGS__) + +/* + * BPF_CORE_READ_INTO() is a more performance-conscious variant of + * BPF_CORE_READ(), in which final field is read into user-provided storage. + * See BPF_CORE_READ() below for more details on general usage. + */ +#define BPF_CORE_READ_INTO(dst, src, a, ...)				    \ +	({								    \ +		___core_read(bpf_core_read, dst, src, a, ##__VA_ARGS__)	    \ +	}) + +/* + * BPF_CORE_READ_STR_INTO() does same "pointer chasing" as + * BPF_CORE_READ() for intermediate pointers, but then executes (and returns + * corresponding error code) bpf_core_read_str() for final string read. + */ +#define BPF_CORE_READ_STR_INTO(dst, src, a, ...)			    \ +	({								    \ +		___core_read(bpf_core_read_str, dst, src, a, ##__VA_ARGS__) \ +	}) + +/* + * BPF_CORE_READ() is used to simplify BPF CO-RE relocatable read, especially + * when there are few pointer chasing steps. + * E.g., what in non-BPF world (or in BPF w/ BCC) would be something like: + *	int x = s->a.b.c->d.e->f->g; + * can be succinctly achieved using BPF_CORE_READ as: + *	int x = BPF_CORE_READ(s, a.b.c, d.e, f, g); + * + * BPF_CORE_READ will decompose above statement into 4 bpf_core_read (BPF + * CO-RE relocatable bpf_probe_read() wrapper) calls, logically equivalent to: + * 1. const void *__t = s->a.b.c; + * 2. __t = __t->d.e; + * 3. __t = __t->f; + * 4. return __t->g; + * + * Equivalence is logical, because there is a heavy type casting/preservation + * involved, as well as all the reads are happening through bpf_probe_read() + * calls using __builtin_preserve_access_index() to emit CO-RE relocations. + * + * N.B. Only up to 9 "field accessors" are supported, which should be more + * than enough for any practical purpose. + */ +#define BPF_CORE_READ(src, a, ...)					    \ +	({								    \ +		___type(src, a, ##__VA_ARGS__) __r;			    \ +		BPF_CORE_READ_INTO(&__r, src, a, ##__VA_ARGS__);	    \ +		__r;							    \ +	}) + +#endif + diff --git a/tools/lib/bpf/bpf_endian.h b/tools/lib/bpf/bpf_endian.h new file mode 100644 index 000000000000..fbe28008450f --- /dev/null +++ b/tools/lib/bpf/bpf_endian.h @@ -0,0 +1,72 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_ENDIAN__ +#define __BPF_ENDIAN__ + +#include <linux/stddef.h> +#include <linux/swab.h> + +/* LLVM's BPF target selects the endianness of the CPU + * it compiles on, or the user specifies (bpfel/bpfeb), + * respectively. The used __BYTE_ORDER__ is defined by + * the compiler, we cannot rely on __BYTE_ORDER from + * libc headers, since it doesn't reflect the actual + * requested byte order. + * + * Note, LLVM's BPF target has different __builtin_bswapX() + * semantics. It does map to BPF_ALU | BPF_END | BPF_TO_BE + * in bpfel and bpfeb case, which means below, that we map + * to cpu_to_be16(). We could use it unconditionally in BPF + * case, but better not rely on it, so that this header here + * can be used from application and BPF program side, which + * use different targets. + */ +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +# define __bpf_ntohs(x)			__builtin_bswap16(x) +# define __bpf_htons(x)			__builtin_bswap16(x) +# define __bpf_constant_ntohs(x)	___constant_swab16(x) +# define __bpf_constant_htons(x)	___constant_swab16(x) +# define __bpf_ntohl(x)			__builtin_bswap32(x) +# define __bpf_htonl(x)			__builtin_bswap32(x) +# define __bpf_constant_ntohl(x)	___constant_swab32(x) +# define __bpf_constant_htonl(x)	___constant_swab32(x) +# define __bpf_be64_to_cpu(x)		__builtin_bswap64(x) +# define __bpf_cpu_to_be64(x)		__builtin_bswap64(x) +# define __bpf_constant_be64_to_cpu(x)	___constant_swab64(x) +# define __bpf_constant_cpu_to_be64(x)	___constant_swab64(x) +#elif __BYTE_ORDER__ == __ORDER_BIG_ENDIAN__ +# define __bpf_ntohs(x)			(x) +# define __bpf_htons(x)			(x) +# define __bpf_constant_ntohs(x)	(x) +# define __bpf_constant_htons(x)	(x) +# define __bpf_ntohl(x)			(x) +# define __bpf_htonl(x)			(x) +# define __bpf_constant_ntohl(x)	(x) +# define __bpf_constant_htonl(x)	(x) +# define __bpf_be64_to_cpu(x)		(x) +# define __bpf_cpu_to_be64(x)		(x) +# define __bpf_constant_be64_to_cpu(x)  (x) +# define __bpf_constant_cpu_to_be64(x)  (x) +#else +# error "Fix your compiler's __BYTE_ORDER__?!" +#endif + +#define bpf_htons(x)				\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_htons(x) : __bpf_htons(x)) +#define bpf_ntohs(x)				\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_ntohs(x) : __bpf_ntohs(x)) +#define bpf_htonl(x)				\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_htonl(x) : __bpf_htonl(x)) +#define bpf_ntohl(x)				\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_ntohl(x) : __bpf_ntohl(x)) +#define bpf_cpu_to_be64(x)			\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_cpu_to_be64(x) : __bpf_cpu_to_be64(x)) +#define bpf_be64_to_cpu(x)			\ +	(__builtin_constant_p(x) ?		\ +	 __bpf_constant_be64_to_cpu(x) : __bpf_be64_to_cpu(x)) + +#endif /* __BPF_ENDIAN__ */ diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h new file mode 100644 index 000000000000..0c7d28292898 --- /dev/null +++ b/tools/lib/bpf/bpf_helpers.h @@ -0,0 +1,47 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_HELPERS__ +#define __BPF_HELPERS__ + +#include "bpf_helper_defs.h" + +#define __uint(name, val) int (*name)[val] +#define __type(name, val) typeof(val) *name + +/* Helper macro to print out debug messages */ +#define bpf_printk(fmt, ...)				\ +({							\ +	char ____fmt[] = fmt;				\ +	bpf_trace_printk(____fmt, sizeof(____fmt),	\ +			 ##__VA_ARGS__);		\ +}) + +/* + * Helper macro to place programs, maps, license in + * different sections in elf_bpf file. Section names + * are interpreted by elf_bpf loader + */ +#define SEC(NAME) __attribute__((section(NAME), used)) + +#ifndef __always_inline +#define __always_inline __attribute__((always_inline)) +#endif + +/* + * Helper structure used by eBPF C program + * to describe BPF map attributes to libbpf loader + */ +struct bpf_map_def { +	unsigned int type; +	unsigned int key_size; +	unsigned int value_size; +	unsigned int max_entries; +	unsigned int map_flags; +}; + +enum libbpf_pin_type { +	LIBBPF_PIN_NONE, +	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ +	LIBBPF_PIN_BY_NAME, +}; + +#endif diff --git a/tools/lib/bpf/bpf_prog_linfo.c b/tools/lib/bpf/bpf_prog_linfo.c index 8c67561c93b0..3ed1a27b5f7c 100644 --- a/tools/lib/bpf/bpf_prog_linfo.c +++ b/tools/lib/bpf/bpf_prog_linfo.c @@ -101,6 +101,7 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)  {  	struct bpf_prog_linfo *prog_linfo;  	__u32 nr_linfo, nr_jited_func; +	__u64 data_sz;  	nr_linfo = info->nr_line_info; @@ -122,11 +123,11 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)  	/* Copy xlated line_info */  	prog_linfo->nr_linfo = nr_linfo;  	prog_linfo->rec_size = info->line_info_rec_size; -	prog_linfo->raw_linfo = malloc(nr_linfo * prog_linfo->rec_size); +	data_sz = (__u64)nr_linfo * prog_linfo->rec_size; +	prog_linfo->raw_linfo = malloc(data_sz);  	if (!prog_linfo->raw_linfo)  		goto err_free; -	memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, -	       nr_linfo * prog_linfo->rec_size); +	memcpy(prog_linfo->raw_linfo, (void *)(long)info->line_info, data_sz);  	nr_jited_func = info->nr_jited_ksyms;  	if (!nr_jited_func || @@ -142,13 +143,12 @@ struct bpf_prog_linfo *bpf_prog_linfo__new(const struct bpf_prog_info *info)  	/* Copy jited_line_info */  	prog_linfo->nr_jited_func = nr_jited_func;  	prog_linfo->jited_rec_size = info->jited_line_info_rec_size; -	prog_linfo->raw_jited_linfo = malloc(nr_linfo * -					     prog_linfo->jited_rec_size); +	data_sz = (__u64)nr_linfo * prog_linfo->jited_rec_size; +	prog_linfo->raw_jited_linfo = malloc(data_sz);  	if (!prog_linfo->raw_jited_linfo)  		goto err_free;  	memcpy(prog_linfo->raw_jited_linfo, -	       (void *)(long)info->jited_line_info, -	       nr_linfo * prog_linfo->jited_rec_size); +	       (void *)(long)info->jited_line_info, data_sz);  	/* Number of jited_line_info per jited func */  	prog_linfo->nr_jited_linfo_per_func = malloc(nr_jited_func * diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h new file mode 100644 index 000000000000..b0dafe8b4ebc --- /dev/null +++ b/tools/lib/bpf/bpf_tracing.h @@ -0,0 +1,195 @@ +/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ +#ifndef __BPF_TRACING_H__ +#define __BPF_TRACING_H__ + +/* Scan the ARCH passed in from ARCH env variable (see Makefile) */ +#if defined(__TARGET_ARCH_x86) +	#define bpf_target_x86 +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_s390) +	#define bpf_target_s390 +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_arm) +	#define bpf_target_arm +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_arm64) +	#define bpf_target_arm64 +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_mips) +	#define bpf_target_mips +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_powerpc) +	#define bpf_target_powerpc +	#define bpf_target_defined +#elif defined(__TARGET_ARCH_sparc) +	#define bpf_target_sparc +	#define bpf_target_defined +#else +	#undef bpf_target_defined +#endif + +/* Fall back to what the compiler says */ +#ifndef bpf_target_defined +#if defined(__x86_64__) +	#define bpf_target_x86 +#elif defined(__s390__) +	#define bpf_target_s390 +#elif defined(__arm__) +	#define bpf_target_arm +#elif defined(__aarch64__) +	#define bpf_target_arm64 +#elif defined(__mips__) +	#define bpf_target_mips +#elif defined(__powerpc__) +	#define bpf_target_powerpc +#elif defined(__sparc__) +	#define bpf_target_sparc +#endif +#endif + +#if defined(bpf_target_x86) + +#ifdef __KERNEL__ +#define PT_REGS_PARM1(x) ((x)->di) +#define PT_REGS_PARM2(x) ((x)->si) +#define PT_REGS_PARM3(x) ((x)->dx) +#define PT_REGS_PARM4(x) ((x)->cx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->sp) +#define PT_REGS_FP(x) ((x)->bp) +#define PT_REGS_RC(x) ((x)->ax) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->ip) +#else +#ifdef __i386__ +/* i386 kernel is built with -mregparm=3 */ +#define PT_REGS_PARM1(x) ((x)->eax) +#define PT_REGS_PARM2(x) ((x)->edx) +#define PT_REGS_PARM3(x) ((x)->ecx) +#define PT_REGS_PARM4(x) 0 +#define PT_REGS_PARM5(x) 0 +#define PT_REGS_RET(x) ((x)->esp) +#define PT_REGS_FP(x) ((x)->ebp) +#define PT_REGS_RC(x) ((x)->eax) +#define PT_REGS_SP(x) ((x)->esp) +#define PT_REGS_IP(x) ((x)->eip) +#else +#define PT_REGS_PARM1(x) ((x)->rdi) +#define PT_REGS_PARM2(x) ((x)->rsi) +#define PT_REGS_PARM3(x) ((x)->rdx) +#define PT_REGS_PARM4(x) ((x)->rcx) +#define PT_REGS_PARM5(x) ((x)->r8) +#define PT_REGS_RET(x) ((x)->rsp) +#define PT_REGS_FP(x) ((x)->rbp) +#define PT_REGS_RC(x) ((x)->rax) +#define PT_REGS_SP(x) ((x)->rsp) +#define PT_REGS_IP(x) ((x)->rip) +#endif +#endif + +#elif defined(bpf_target_s390) + +/* s390 provides user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_S390 const volatile user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_PARM2(x) (((PT_REGS_S390 *)(x))->gprs[3]) +#define PT_REGS_PARM3(x) (((PT_REGS_S390 *)(x))->gprs[4]) +#define PT_REGS_PARM4(x) (((PT_REGS_S390 *)(x))->gprs[5]) +#define PT_REGS_PARM5(x) (((PT_REGS_S390 *)(x))->gprs[6]) +#define PT_REGS_RET(x) (((PT_REGS_S390 *)(x))->gprs[14]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_S390 *)(x))->gprs[11]) +#define PT_REGS_RC(x) (((PT_REGS_S390 *)(x))->gprs[2]) +#define PT_REGS_SP(x) (((PT_REGS_S390 *)(x))->gprs[15]) +#define PT_REGS_IP(x) (((PT_REGS_S390 *)(x))->psw.addr) + +#elif defined(bpf_target_arm) + +#define PT_REGS_PARM1(x) ((x)->uregs[0]) +#define PT_REGS_PARM2(x) ((x)->uregs[1]) +#define PT_REGS_PARM3(x) ((x)->uregs[2]) +#define PT_REGS_PARM4(x) ((x)->uregs[3]) +#define PT_REGS_PARM5(x) ((x)->uregs[4]) +#define PT_REGS_RET(x) ((x)->uregs[14]) +#define PT_REGS_FP(x) ((x)->uregs[11]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->uregs[0]) +#define PT_REGS_SP(x) ((x)->uregs[13]) +#define PT_REGS_IP(x) ((x)->uregs[12]) + +#elif defined(bpf_target_arm64) + +/* arm64 provides struct user_pt_regs instead of struct pt_regs to userspace */ +struct pt_regs; +#define PT_REGS_ARM64 const volatile struct user_pt_regs +#define PT_REGS_PARM1(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_PARM2(x) (((PT_REGS_ARM64 *)(x))->regs[1]) +#define PT_REGS_PARM3(x) (((PT_REGS_ARM64 *)(x))->regs[2]) +#define PT_REGS_PARM4(x) (((PT_REGS_ARM64 *)(x))->regs[3]) +#define PT_REGS_PARM5(x) (((PT_REGS_ARM64 *)(x))->regs[4]) +#define PT_REGS_RET(x) (((PT_REGS_ARM64 *)(x))->regs[30]) +/* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_FP(x) (((PT_REGS_ARM64 *)(x))->regs[29]) +#define PT_REGS_RC(x) (((PT_REGS_ARM64 *)(x))->regs[0]) +#define PT_REGS_SP(x) (((PT_REGS_ARM64 *)(x))->sp) +#define PT_REGS_IP(x) (((PT_REGS_ARM64 *)(x))->pc) + +#elif defined(bpf_target_mips) + +#define PT_REGS_PARM1(x) ((x)->regs[4]) +#define PT_REGS_PARM2(x) ((x)->regs[5]) +#define PT_REGS_PARM3(x) ((x)->regs[6]) +#define PT_REGS_PARM4(x) ((x)->regs[7]) +#define PT_REGS_PARM5(x) ((x)->regs[8]) +#define PT_REGS_RET(x) ((x)->regs[31]) +#define PT_REGS_FP(x) ((x)->regs[30]) /* Works only with CONFIG_FRAME_POINTER */ +#define PT_REGS_RC(x) ((x)->regs[1]) +#define PT_REGS_SP(x) ((x)->regs[29]) +#define PT_REGS_IP(x) ((x)->cp0_epc) + +#elif defined(bpf_target_powerpc) + +#define PT_REGS_PARM1(x) ((x)->gpr[3]) +#define PT_REGS_PARM2(x) ((x)->gpr[4]) +#define PT_REGS_PARM3(x) ((x)->gpr[5]) +#define PT_REGS_PARM4(x) ((x)->gpr[6]) +#define PT_REGS_PARM5(x) ((x)->gpr[7]) +#define PT_REGS_RC(x) ((x)->gpr[3]) +#define PT_REGS_SP(x) ((x)->sp) +#define PT_REGS_IP(x) ((x)->nip) + +#elif defined(bpf_target_sparc) + +#define PT_REGS_PARM1(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_PARM2(x) ((x)->u_regs[UREG_I1]) +#define PT_REGS_PARM3(x) ((x)->u_regs[UREG_I2]) +#define PT_REGS_PARM4(x) ((x)->u_regs[UREG_I3]) +#define PT_REGS_PARM5(x) ((x)->u_regs[UREG_I4]) +#define PT_REGS_RET(x) ((x)->u_regs[UREG_I7]) +#define PT_REGS_RC(x) ((x)->u_regs[UREG_I0]) +#define PT_REGS_SP(x) ((x)->u_regs[UREG_FP]) + +/* Should this also be a bpf_target check for the sparc case? */ +#if defined(__arch64__) +#define PT_REGS_IP(x) ((x)->tpc) +#else +#define PT_REGS_IP(x) ((x)->pc) +#endif + +#endif + +#if defined(bpf_target_powerpc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = (ctx)->link; }) +#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP +#elif defined(bpf_target_sparc) +#define BPF_KPROBE_READ_RET_IP(ip, ctx)		({ (ip) = PT_REGS_RET(ctx); }) +#define BPF_KRETPROBE_READ_RET_IP		BPF_KPROBE_READ_RET_IP +#else +#define BPF_KPROBE_READ_RET_IP(ip, ctx)					    \ +	({ bpf_probe_read(&(ip), sizeof(ip), (void *)PT_REGS_RET(ctx)); }) +#define BPF_KRETPROBE_READ_RET_IP(ip, ctx)				    \ +	({ bpf_probe_read(&(ip), sizeof(ip),				    \ +			  (void *)(PT_REGS_FP(ctx) + sizeof(ip))); }) +#endif + +#endif diff --git a/tools/lib/bpf/btf.c b/tools/lib/bpf/btf.c index 1aa189a9112a..88efa2bb7137 100644 --- a/tools/lib/bpf/btf.c +++ b/tools/lib/bpf/btf.c @@ -269,10 +269,9 @@ __s64 btf__resolve_size(const struct btf *btf, __u32 type_id)  		t = btf__type_by_id(btf, type_id);  	} +done:  	if (size < 0)  		return -EINVAL; - -done:  	if (nelems && size > UINT32_MAX / nelems)  		return -E2BIG; @@ -317,6 +316,28 @@ __s32 btf__find_by_name(const struct btf *btf, const char *type_name)  	return -ENOENT;  } +__s32 btf__find_by_name_kind(const struct btf *btf, const char *type_name, +			     __u32 kind) +{ +	__u32 i; + +	if (kind == BTF_KIND_UNKN || !strcmp(type_name, "void")) +		return 0; + +	for (i = 1; i <= btf->nr_types; i++) { +		const struct btf_type *t = btf->types[i]; +		const char *name; + +		if (btf_kind(t) != kind) +			continue; +		name = btf__name_by_offset(btf, t->name_off); +		if (name && !strcmp(type_name, name)) +			return i; +	} + +	return -ENOENT; +} +  void btf__free(struct btf *btf)  {  	if (!btf) @@ -390,14 +411,14 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)  	GElf_Ehdr ehdr;  	if (elf_version(EV_CURRENT) == EV_NONE) { -		pr_warning("failed to init libelf for %s\n", path); +		pr_warn("failed to init libelf for %s\n", path);  		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);  	}  	fd = open(path, O_RDONLY);  	if (fd < 0) {  		err = -errno; -		pr_warning("failed to open %s: %s\n", path, strerror(errno)); +		pr_warn("failed to open %s: %s\n", path, strerror(errno));  		return ERR_PTR(err);  	} @@ -405,19 +426,19 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)  	elf = elf_begin(fd, ELF_C_READ, NULL);  	if (!elf) { -		pr_warning("failed to open %s as ELF file\n", path); +		pr_warn("failed to open %s as ELF file\n", path);  		goto done;  	}  	if (!gelf_getehdr(elf, &ehdr)) { -		pr_warning("failed to get EHDR from %s\n", path); +		pr_warn("failed to get EHDR from %s\n", path);  		goto done;  	}  	if (!btf_check_endianness(&ehdr)) { -		pr_warning("non-native ELF endianness is not supported\n"); +		pr_warn("non-native ELF endianness is not supported\n");  		goto done;  	}  	if (!elf_rawdata(elf_getscn(elf, ehdr.e_shstrndx), NULL)) { -		pr_warning("failed to get e_shstrndx from %s\n", path); +		pr_warn("failed to get e_shstrndx from %s\n", path);  		goto done;  	} @@ -427,29 +448,29 @@ struct btf *btf__parse_elf(const char *path, struct btf_ext **btf_ext)  		idx++;  		if (gelf_getshdr(scn, &sh) != &sh) { -			pr_warning("failed to get section(%d) header from %s\n", -				   idx, path); +			pr_warn("failed to get section(%d) header from %s\n", +				idx, path);  			goto done;  		}  		name = elf_strptr(elf, ehdr.e_shstrndx, sh.sh_name);  		if (!name) { -			pr_warning("failed to get section(%d) name from %s\n", -				   idx, path); +			pr_warn("failed to get section(%d) name from %s\n", +				idx, path);  			goto done;  		}  		if (strcmp(name, BTF_ELF_SEC) == 0) {  			btf_data = elf_getdata(scn, 0);  			if (!btf_data) { -				pr_warning("failed to get section(%d, %s) data from %s\n", -					   idx, name, path); +				pr_warn("failed to get section(%d, %s) data from %s\n", +					idx, name, path);  				goto done;  			}  			continue;  		} else if (btf_ext && strcmp(name, BTF_EXT_ELF_SEC) == 0) {  			btf_ext_data = elf_getdata(scn, 0);  			if (!btf_ext_data) { -				pr_warning("failed to get section(%d, %s) data from %s\n", -					   idx, name, path); +				pr_warn("failed to get section(%d, %s) data from %s\n", +					idx, name, path);  				goto done;  			}  			continue; @@ -600,9 +621,9 @@ int btf__load(struct btf *btf)  			       log_buf, log_buf_size, false);  	if (btf->fd < 0) {  		err = -errno; -		pr_warning("Error loading BTF: %s(%d)\n", strerror(errno), errno); +		pr_warn("Error loading BTF: %s(%d)\n", strerror(errno), errno);  		if (*log_buf) -			pr_warning("%s\n", log_buf); +			pr_warn("%s\n", log_buf);  		goto done;  	} @@ -707,8 +728,8 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  	if (snprintf(container_name, max_name, "____btf_map_%s", map_name) ==  	    max_name) { -		pr_warning("map:%s length of '____btf_map_%s' is too long\n", -			   map_name, map_name); +		pr_warn("map:%s length of '____btf_map_%s' is too long\n", +			map_name, map_name);  		return -EINVAL;  	} @@ -721,14 +742,14 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  	container_type = btf__type_by_id(btf, container_id);  	if (!container_type) { -		pr_warning("map:%s cannot find BTF type for container_id:%u\n", -			   map_name, container_id); +		pr_warn("map:%s cannot find BTF type for container_id:%u\n", +			map_name, container_id);  		return -EINVAL;  	}  	if (!btf_is_struct(container_type) || btf_vlen(container_type) < 2) { -		pr_warning("map:%s container_name:%s is an invalid container struct\n", -			   map_name, container_name); +		pr_warn("map:%s container_name:%s is an invalid container struct\n", +			map_name, container_name);  		return -EINVAL;  	} @@ -737,25 +758,25 @@ int btf__get_map_kv_tids(const struct btf *btf, const char *map_name,  	key_size = btf__resolve_size(btf, key->type);  	if (key_size < 0) { -		pr_warning("map:%s invalid BTF key_type_size\n", map_name); +		pr_warn("map:%s invalid BTF key_type_size\n", map_name);  		return key_size;  	}  	if (expected_key_size != key_size) { -		pr_warning("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", -			   map_name, (__u32)key_size, expected_key_size); +		pr_warn("map:%s btf_key_type_size:%u != map_def_key_size:%u\n", +			map_name, (__u32)key_size, expected_key_size);  		return -EINVAL;  	}  	value_size = btf__resolve_size(btf, value->type);  	if (value_size < 0) { -		pr_warning("map:%s invalid BTF value_type_size\n", map_name); +		pr_warn("map:%s invalid BTF value_type_size\n", map_name);  		return value_size;  	}  	if (expected_value_size != value_size) { -		pr_warning("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", -			   map_name, (__u32)value_size, expected_value_size); +		pr_warn("map:%s btf_value_type_size:%u != map_def_value_size:%u\n", +			map_name, (__u32)value_size, expected_value_size);  		return -EINVAL;  	} @@ -888,14 +909,14 @@ static int btf_ext_setup_line_info(struct btf_ext *btf_ext)  	return btf_ext_setup_info(btf_ext, ¶m);  } -static int btf_ext_setup_offset_reloc(struct btf_ext *btf_ext) +static int btf_ext_setup_field_reloc(struct btf_ext *btf_ext)  {  	struct btf_ext_sec_setup_param param = { -		.off = btf_ext->hdr->offset_reloc_off, -		.len = btf_ext->hdr->offset_reloc_len, -		.min_rec_size = sizeof(struct bpf_offset_reloc), -		.ext_info = &btf_ext->offset_reloc_info, -		.desc = "offset_reloc", +		.off = btf_ext->hdr->field_reloc_off, +		.len = btf_ext->hdr->field_reloc_len, +		.min_rec_size = sizeof(struct bpf_field_reloc), +		.ext_info = &btf_ext->field_reloc_info, +		.desc = "field_reloc",  	};  	return btf_ext_setup_info(btf_ext, ¶m); @@ -975,9 +996,9 @@ struct btf_ext *btf_ext__new(__u8 *data, __u32 size)  		goto done;  	if (btf_ext->hdr->hdr_len < -	    offsetofend(struct btf_ext_header, offset_reloc_len)) +	    offsetofend(struct btf_ext_header, field_reloc_len))  		goto done; -	err = btf_ext_setup_offset_reloc(btf_ext); +	err = btf_ext_setup_field_reloc(btf_ext);  	if (err)  		goto done; diff --git a/tools/lib/bpf/btf.h b/tools/lib/bpf/btf.h index 9cb44b4fbf60..d9ac73a02cde 100644 --- a/tools/lib/bpf/btf.h +++ b/tools/lib/bpf/btf.h @@ -60,8 +60,8 @@ struct btf_ext_header {  	__u32	line_info_len;  	/* optional part of .BTF.ext header */ -	__u32	offset_reloc_off; -	__u32	offset_reloc_len; +	__u32	field_reloc_off; +	__u32	field_reloc_len;  };  LIBBPF_API void btf__free(struct btf *btf); @@ -72,6 +72,8 @@ LIBBPF_API int btf__finalize_data(struct bpf_object *obj, struct btf *btf);  LIBBPF_API int btf__load(struct btf *btf);  LIBBPF_API __s32 btf__find_by_name(const struct btf *btf,  				   const char *type_name); +LIBBPF_API __s32 btf__find_by_name_kind(const struct btf *btf, +					const char *type_name, __u32 kind);  LIBBPF_API __u32 btf__get_nr_types(const struct btf *btf);  LIBBPF_API const struct btf_type *btf__type_by_id(const struct btf *btf,  						  __u32 id); diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c index ede55fec3618..cb126d8fcf75 100644 --- a/tools/lib/bpf/btf_dump.c +++ b/tools/lib/bpf/btf_dump.c @@ -428,7 +428,7 @@ static int btf_dump_order_type(struct btf_dump *d, __u32 id, bool through_ptr)  		/* type loop, but resolvable through fwd declaration */  		if (btf_is_composite(t) && through_ptr && t->name_off != 0)  			return 0; -		pr_warning("unsatisfiable type cycle, id:[%u]\n", id); +		pr_warn("unsatisfiable type cycle, id:[%u]\n", id);  		return -ELOOP;  	} @@ -636,8 +636,8 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)  			if (id == cont_id)  				return;  			if (t->name_off == 0) { -				pr_warning("anonymous struct/union loop, id:[%u]\n", -					   id); +				pr_warn("anonymous struct/union loop, id:[%u]\n", +					id);  				return;  			}  			btf_dump_emit_struct_fwd(d, id, t); @@ -782,7 +782,7 @@ static int btf_align_of(const struct btf *btf, __u32 id)  		return align;  	}  	default: -		pr_warning("unsupported BTF_KIND:%u\n", btf_kind(t)); +		pr_warn("unsupported BTF_KIND:%u\n", btf_kind(t));  		return 1;  	}  } @@ -876,7 +876,6 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,  	__u16 vlen = btf_vlen(t);  	packed = is_struct ? btf_is_struct_packed(d->btf, id, t) : 0; -	align = packed ? 1 : btf_align_of(d->btf, id);  	btf_dump_printf(d, "%s%s%s {",  			is_struct ? "struct" : "union", @@ -906,6 +905,13 @@ static void btf_dump_emit_struct_def(struct btf_dump *d,  		btf_dump_printf(d, ";");  	} +	/* pad at the end, if necessary */ +	if (is_struct) { +		align = packed ? 1 : btf_align_of(d->btf, id); +		btf_dump_emit_bit_padding(d, off, t->size * 8, 0, align, +					  lvl + 1); +	} +  	if (vlen)  		btf_dump_printf(d, "\n");  	btf_dump_printf(d, "%s}", pfx(lvl)); @@ -969,6 +975,17 @@ static void btf_dump_emit_typedef_def(struct btf_dump *d, __u32 id,  {  	const char *name = btf_dump_ident_name(d, id); +	/* +	 * Old GCC versions are emitting invalid typedef for __gnuc_va_list +	 * pointing to VOID. This generates warnings from btf_dump() and +	 * results in uncompilable header file, so we are fixing it up here +	 * with valid typedef into __builtin_va_list. +	 */ +	if (t->type == 0 && strcmp(name, "__gnuc_va_list") == 0) { +		btf_dump_printf(d, "typedef __builtin_va_list __gnuc_va_list"); +		return; +	} +  	btf_dump_printf(d, "typedef ");  	btf_dump_emit_type_decl(d, t->type, name, lvl);  } @@ -1050,7 +1067,7 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,  			 * chain, restore stack, emit warning, and try to  			 * proceed nevertheless  			 */ -			pr_warning("not enough memory for decl stack:%d", err); +			pr_warn("not enough memory for decl stack:%d", err);  			d->decl_stack_cnt = stack_start;  			return;  		} @@ -1079,8 +1096,8 @@ static void btf_dump_emit_type_decl(struct btf_dump *d, __u32 id,  		case BTF_KIND_TYPEDEF:  			goto done;  		default: -			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", -				   btf_kind(t), id); +			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", +				btf_kind(t), id);  			goto done;  		}  	} @@ -1306,8 +1323,8 @@ static void btf_dump_emit_type_chain(struct btf_dump *d,  			return;  		}  		default: -			pr_warning("unexpected type in decl chain, kind:%u, id:[%u]\n", -				   kind, id); +			pr_warn("unexpected type in decl chain, kind:%u, id:[%u]\n", +				kind, id);  			return;  		} diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c index e0276520171b..3f09772192f1 100644 --- a/tools/lib/bpf/libbpf.c +++ b/tools/lib/bpf/libbpf.c @@ -33,6 +33,7 @@  #include <linux/limits.h>  #include <linux/perf_event.h>  #include <linux/ring_buffer.h> +#include <linux/version.h>  #include <sys/epoll.h>  #include <sys/ioctl.h>  #include <sys/mman.h> @@ -104,7 +105,7 @@ void libbpf_print(enum libbpf_print_level level, const char *format, ...)  	err = action;			\  	if (err)			\  		goto out;		\ -} while(0) +} while (0)  /* Copied from tools/perf/util/util.h */ @@ -141,6 +142,8 @@ struct bpf_capabilities {  	__u32 btf_func:1;  	/* BTF_KIND_VAR and BTF_KIND_DATASEC support */  	__u32 btf_datasec:1; +	/* BPF_F_MMAPABLE is supported for arrays */ +	__u32 array_mmap:1;  };  /* @@ -168,10 +171,8 @@ struct bpf_program {  			RELO_DATA,  		} type;  		int insn_idx; -		union { -			int map_idx; -			int text_off; -		}; +		int map_idx; +		int sym_off;  	} *reloc_desc;  	int nr_reloc;  	int log_level; @@ -187,6 +188,8 @@ struct bpf_program {  	bpf_program_clear_priv_t clear_priv;  	enum bpf_attach_type expected_attach_type; +	__u32 attach_btf_id; +	__u32 attach_prog_fd;  	void *func_info;  	__u32 func_info_rec_size;  	__u32 func_info_cnt; @@ -225,6 +228,9 @@ struct bpf_map {  	void *priv;  	bpf_map_clear_priv_t clear_priv;  	enum libbpf_map_type libbpf_type; +	char *pin_path; +	bool pinned; +	bool reused;  };  struct bpf_secdata { @@ -248,6 +254,7 @@ struct bpf_object {  	bool loaded;  	bool has_pseudo_calls; +	bool relaxed_core_relocs;  	/*  	 * Information when doing elf related work. Only valid if fd @@ -255,7 +262,7 @@ struct bpf_object {  	 */  	struct {  		int fd; -		void *obj_buf; +		const void *obj_buf;  		size_t obj_buf_sz;  		Elf *elf;  		GElf_Ehdr ehdr; @@ -267,8 +274,8 @@ struct bpf_object {  		struct {  			GElf_Shdr shdr;  			Elf_Data *data; -		} *reloc; -		int nr_reloc; +		} *reloc_sects; +		int nr_reloc_sects;  		int maps_shndx;  		int btf_maps_shndx;  		int text_shndx; @@ -310,8 +317,8 @@ void bpf_program__unload(struct bpf_program *prog)  		for (i = 0; i < prog->instances.nr; i++)  			zclose(prog->instances.fds[i]);  	} else if (prog->instances.nr != -1) { -		pr_warning("Internal error: instances.nr is %d\n", -			   prog->instances.nr); +		pr_warn("Internal error: instances.nr is %d\n", +			prog->instances.nr);  	}  	prog->instances.nr = -1; @@ -362,8 +369,8 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,  	const size_t bpf_insn_sz = sizeof(struct bpf_insn);  	if (size == 0 || size % bpf_insn_sz) { -		pr_warning("corrupted section '%s', size: %zu\n", -			   section_name, size); +		pr_warn("corrupted section '%s', size: %zu\n", +			section_name, size);  		return -EINVAL;  	} @@ -371,22 +378,22 @@ bpf_program__init(void *data, size_t size, char *section_name, int idx,  	prog->section_name = strdup(section_name);  	if (!prog->section_name) { -		pr_warning("failed to alloc name for prog under section(%d) %s\n", -			   idx, section_name); +		pr_warn("failed to alloc name for prog under section(%d) %s\n", +			idx, section_name);  		goto errout;  	}  	prog->pin_name = __bpf_program__pin_name(prog);  	if (!prog->pin_name) { -		pr_warning("failed to alloc pin name for prog under section(%d) %s\n", -			   idx, section_name); +		pr_warn("failed to alloc pin name for prog under section(%d) %s\n", +			idx, section_name);  		goto errout;  	}  	prog->insns = malloc(size);  	if (!prog->insns) { -		pr_warning("failed to alloc insns for prog under section %s\n", -			   section_name); +		pr_warn("failed to alloc insns for prog under section %s\n", +			section_name);  		goto errout;  	}  	prog->insns_cnt = size / bpf_insn_sz; @@ -424,8 +431,8 @@ bpf_object__add_program(struct bpf_object *obj, void *data, size_t size,  		 * is still valid, so don't need special treat for  		 * bpf_close_object().  		 */ -		pr_warning("failed to alloc a new program under section '%s'\n", -			   section_name); +		pr_warn("failed to alloc a new program under section '%s'\n", +			section_name);  		bpf_program__exit(&prog);  		return -ENOMEM;  	} @@ -465,8 +472,8 @@ bpf_object__init_prog_names(struct bpf_object *obj)  					  obj->efile.strtabidx,  					  sym.st_name);  			if (!name) { -				pr_warning("failed to get sym name string for prog %s\n", -					   prog->section_name); +				pr_warn("failed to get sym name string for prog %s\n", +					prog->section_name);  				return -LIBBPF_ERRNO__LIBELF;  			}  		} @@ -475,15 +482,15 @@ bpf_object__init_prog_names(struct bpf_object *obj)  			name = ".text";  		if (!name) { -			pr_warning("failed to find sym for prog %s\n", -				   prog->section_name); +			pr_warn("failed to find sym for prog %s\n", +				prog->section_name);  			return -EINVAL;  		}  		prog->name = strdup(name);  		if (!prog->name) { -			pr_warning("failed to allocate memory for prog sym %s\n", -				   name); +			pr_warn("failed to allocate memory for prog sym %s\n", +				name);  			return -ENOMEM;  		}  	} @@ -491,25 +498,43 @@ bpf_object__init_prog_names(struct bpf_object *obj)  	return 0;  } +static __u32 get_kernel_version(void) +{ +	__u32 major, minor, patch; +	struct utsname info; + +	uname(&info); +	if (sscanf(info.release, "%u.%u.%u", &major, &minor, &patch) != 3) +		return 0; +	return KERNEL_VERSION(major, minor, patch); +} +  static struct bpf_object *bpf_object__new(const char *path, -					  void *obj_buf, -					  size_t obj_buf_sz) +					  const void *obj_buf, +					  size_t obj_buf_sz, +					  const char *obj_name)  {  	struct bpf_object *obj;  	char *end;  	obj = calloc(1, sizeof(struct bpf_object) + strlen(path) + 1);  	if (!obj) { -		pr_warning("alloc memory failed for %s\n", path); +		pr_warn("alloc memory failed for %s\n", path);  		return ERR_PTR(-ENOMEM);  	}  	strcpy(obj->path, path); -	/* Using basename() GNU version which doesn't modify arg. */ -	strncpy(obj->name, basename((void *)path), sizeof(obj->name) - 1); -	end = strchr(obj->name, '.'); -	if (end) -		*end = 0; +	if (obj_name) { +		strncpy(obj->name, obj_name, sizeof(obj->name) - 1); +		obj->name[sizeof(obj->name) - 1] = 0; +	} else { +		/* Using basename() GNU version which doesn't modify arg. */ +		strncpy(obj->name, basename((void *)path), +			sizeof(obj->name) - 1); +		end = strchr(obj->name, '.'); +		if (end) +			*end = 0; +	}  	obj->efile.fd = -1;  	/* @@ -526,6 +551,7 @@ static struct bpf_object *bpf_object__new(const char *path,  	obj->efile.rodata_shndx = -1;  	obj->efile.bss_shndx = -1; +	obj->kern_version = get_kernel_version();  	obj->loaded = false;  	INIT_LIST_HEAD(&obj->list); @@ -547,8 +573,8 @@ static void bpf_object__elf_finish(struct bpf_object *obj)  	obj->efile.rodata = NULL;  	obj->efile.bss = NULL; -	zfree(&obj->efile.reloc); -	obj->efile.nr_reloc = 0; +	zfree(&obj->efile.reloc_sects); +	obj->efile.nr_reloc_sects = 0;  	zclose(obj->efile.fd);  	obj->efile.obj_buf = NULL;  	obj->efile.obj_buf_sz = 0; @@ -560,7 +586,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)  	GElf_Ehdr *ep;  	if (obj_elf_valid(obj)) { -		pr_warning("elf init: internal error\n"); +		pr_warn("elf init: internal error\n");  		return -LIBBPF_ERRNO__LIBELF;  	} @@ -569,7 +595,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)  		 * obj_buf should have been validated by  		 * bpf_object__open_buffer().  		 */ -		obj->efile.elf = elf_memory(obj->efile.obj_buf, +		obj->efile.elf = elf_memory((char *)obj->efile.obj_buf,  					    obj->efile.obj_buf_sz);  	} else {  		obj->efile.fd = open(obj->path, O_RDONLY); @@ -578,7 +604,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)  			err = -errno;  			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); -			pr_warning("failed to open %s: %s\n", obj->path, cp); +			pr_warn("failed to open %s: %s\n", obj->path, cp);  			return err;  		} @@ -587,13 +613,13 @@ static int bpf_object__elf_init(struct bpf_object *obj)  	}  	if (!obj->efile.elf) { -		pr_warning("failed to open %s as ELF file\n", obj->path); +		pr_warn("failed to open %s as ELF file\n", obj->path);  		err = -LIBBPF_ERRNO__LIBELF;  		goto errout;  	}  	if (!gelf_getehdr(obj->efile.elf, &obj->efile.ehdr)) { -		pr_warning("failed to get EHDR from %s\n", obj->path); +		pr_warn("failed to get EHDR from %s\n", obj->path);  		err = -LIBBPF_ERRNO__FORMAT;  		goto errout;  	} @@ -602,7 +628,7 @@ static int bpf_object__elf_init(struct bpf_object *obj)  	/* Old LLVM set e_machine to EM_NONE */  	if (ep->e_type != ET_REL ||  	    (ep->e_machine && ep->e_machine != EM_BPF)) { -		pr_warning("%s is not an eBPF object file\n", obj->path); +		pr_warn("%s is not an eBPF object file\n", obj->path);  		err = -LIBBPF_ERRNO__FORMAT;  		goto errout;  	} @@ -624,7 +650,7 @@ static int bpf_object__check_endianness(struct bpf_object *obj)  #else  # error "Unrecognized __BYTE_ORDER__"  #endif -	pr_warning("endianness mismatch.\n"); +	pr_warn("endianness mismatch.\n");  	return -LIBBPF_ERRNO__ENDIAN;  } @@ -642,7 +668,7 @@ bpf_object__init_kversion(struct bpf_object *obj, void *data, size_t size)  	__u32 kver;  	if (size != sizeof(kver)) { -		pr_warning("invalid kver section in %s\n", obj->path); +		pr_warn("invalid kver section in %s\n", obj->path);  		return -LIBBPF_ERRNO__FORMAT;  	}  	memcpy(&kver, data, sizeof(kver)); @@ -684,15 +710,15 @@ static int bpf_object_search_section_size(const struct bpf_object *obj,  		idx++;  		if (gelf_getshdr(scn, &sh) != &sh) { -			pr_warning("failed to get section(%d) header from %s\n", -				   idx, obj->path); +			pr_warn("failed to get section(%d) header from %s\n", +				idx, obj->path);  			return -EIO;  		}  		sec_name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);  		if (!sec_name) { -			pr_warning("failed to get section(%d) name from %s\n", -				   idx, obj->path); +			pr_warn("failed to get section(%d) name from %s\n", +				idx, obj->path);  			return -EIO;  		} @@ -701,8 +727,8 @@ static int bpf_object_search_section_size(const struct bpf_object *obj,  		data = elf_getdata(scn, 0);  		if (!data) { -			pr_warning("failed to get section(%d) data from %s(%s)\n", -				   idx, name, obj->path); +			pr_warn("failed to get section(%d) data from %s(%s)\n", +				idx, name, obj->path);  			return -EIO;  		} @@ -762,8 +788,8 @@ int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,  		sname = elf_strptr(obj->efile.elf, obj->efile.strtabidx,  				   sym.st_name);  		if (!sname) { -			pr_warning("failed to get sym name string for var %s\n", -				   name); +			pr_warn("failed to get sym name string for var %s\n", +				name);  			return -EIO;  		}  		if (strcmp(name, sname) == 0) { @@ -787,7 +813,7 @@ static struct bpf_map *bpf_object__add_map(struct bpf_object *obj)  	new_cap = max((size_t)4, obj->maps_cap * 3 / 2);  	new_maps = realloc(obj->maps, new_cap * sizeof(*obj->maps));  	if (!new_maps) { -		pr_warning("alloc maps for object failed\n"); +		pr_warn("alloc maps for object failed\n");  		return ERR_PTR(-ENOMEM);  	} @@ -828,11 +854,9 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,  		 libbpf_type_to_btf_name[type]);  	map->name = strdup(map_name);  	if (!map->name) { -		pr_warning("failed to alloc map name\n"); +		pr_warn("failed to alloc map name\n");  		return -ENOMEM;  	} -	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu.\n", -		 map_name, map->sec_idx, map->sec_offset);  	def = &map->def;  	def->type = BPF_MAP_TYPE_ARRAY; @@ -840,11 +864,17 @@ bpf_object__init_internal_map(struct bpf_object *obj, enum libbpf_map_type type,  	def->value_size = data->d_size;  	def->max_entries = 1;  	def->map_flags = type == LIBBPF_MAP_RODATA ? BPF_F_RDONLY_PROG : 0; +	if (obj->caps.array_mmap) +		def->map_flags |= BPF_F_MMAPABLE; + +	pr_debug("map '%s' (global data): at sec_idx %d, offset %zu, flags %x.\n", +		 map_name, map->sec_idx, map->sec_offset, def->map_flags); +  	if (data_buff) {  		*data_buff = malloc(data->d_size);  		if (!*data_buff) {  			zfree(&map->name); -			pr_warning("failed to alloc map content buffer\n"); +			pr_warn("failed to alloc map content buffer\n");  			return -ENOMEM;  		}  		memcpy(*data_buff, data->d_buf, data->d_size); @@ -906,8 +936,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  	if (scn)  		data = elf_getdata(scn, NULL);  	if (!scn || !data) { -		pr_warning("failed to get Elf_Data from map section %d\n", -			   obj->efile.maps_shndx); +		pr_warn("failed to get Elf_Data from map section %d\n", +			obj->efile.maps_shndx);  		return -EINVAL;  	} @@ -932,13 +962,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  	pr_debug("maps in %s: %d maps in %zd bytes\n",  		 obj->path, nr_maps, data->d_size); -	map_def_sz = data->d_size / nr_maps; -	if (!data->d_size || (data->d_size % nr_maps) != 0) { -		pr_warning("unable to determine map definition size " -			   "section %s, %d maps in %zd bytes\n", -			   obj->path, nr_maps, data->d_size); +	if (!data->d_size || nr_maps == 0 || (data->d_size % nr_maps) != 0) { +		pr_warn("unable to determine map definition size section %s, %d maps in %zd bytes\n", +			obj->path, nr_maps, data->d_size);  		return -EINVAL;  	} +	map_def_sz = data->d_size / nr_maps;  	/* Fill obj->maps using data in "maps" section.  */  	for (i = 0; i < nr_syms; i++) { @@ -959,8 +988,8 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  		map_name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,  				      sym.st_name);  		if (!map_name) { -			pr_warning("failed to get map #%d name sym string for obj %s\n", -				   i, obj->path); +			pr_warn("failed to get map #%d name sym string for obj %s\n", +				i, obj->path);  			return -LIBBPF_ERRNO__FORMAT;  		} @@ -970,14 +999,14 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  		pr_debug("map '%s' (legacy): at sec_idx %d, offset %zu.\n",  			 map_name, map->sec_idx, map->sec_offset);  		if (sym.st_value + map_def_sz > data->d_size) { -			pr_warning("corrupted maps section in %s: last map \"%s\" too small\n", -				   obj->path, map_name); +			pr_warn("corrupted maps section in %s: last map \"%s\" too small\n", +				obj->path, map_name);  			return -EINVAL;  		}  		map->name = strdup(map_name);  		if (!map->name) { -			pr_warning("failed to alloc map name\n"); +			pr_warn("failed to alloc map name\n");  			return -ENOMEM;  		}  		pr_debug("map %d is \"%s\"\n", i, map->name); @@ -998,13 +1027,12 @@ static int bpf_object__init_user_maps(struct bpf_object *obj, bool strict)  			 * incompatible.  			 */  			char *b; +  			for (b = ((char *)def) + sizeof(struct bpf_map_def);  			     b < ((char *)def) + map_def_sz; b++) {  				if (*b != 0) { -					pr_warning("maps section in %s: \"%s\" " -						   "has unrecognized, non-zero " -						   "options\n", -						   obj->path, map_name); +					pr_warn("maps section in %s: \"%s\" has unrecognized, non-zero options\n", +						obj->path, map_name);  					if (strict)  						return -EINVAL;  				} @@ -1041,27 +1069,28 @@ skip_mods_and_typedefs(const struct btf *btf, __u32 id, __u32 *res_id)   */  static bool get_map_field_int(const char *map_name, const struct btf *btf,  			      const struct btf_type *def, -			      const struct btf_member *m, __u32 *res) { +			      const struct btf_member *m, __u32 *res) +{  	const struct btf_type *t = skip_mods_and_typedefs(btf, m->type, NULL);  	const char *name = btf__name_by_offset(btf, m->name_off);  	const struct btf_array *arr_info;  	const struct btf_type *arr_t;  	if (!btf_is_ptr(t)) { -		pr_warning("map '%s': attr '%s': expected PTR, got %u.\n", -			   map_name, name, btf_kind(t)); +		pr_warn("map '%s': attr '%s': expected PTR, got %u.\n", +			map_name, name, btf_kind(t));  		return false;  	}  	arr_t = btf__type_by_id(btf, t->type);  	if (!arr_t) { -		pr_warning("map '%s': attr '%s': type [%u] not found.\n", -			   map_name, name, t->type); +		pr_warn("map '%s': attr '%s': type [%u] not found.\n", +			map_name, name, t->type);  		return false;  	}  	if (!btf_is_array(arr_t)) { -		pr_warning("map '%s': attr '%s': expected ARRAY, got %u.\n", -			   map_name, name, btf_kind(arr_t)); +		pr_warn("map '%s': attr '%s': expected ARRAY, got %u.\n", +			map_name, name, btf_kind(arr_t));  		return false;  	}  	arr_info = btf_array(arr_t); @@ -1069,10 +1098,32 @@ static bool get_map_field_int(const char *map_name, const struct btf *btf,  	return true;  } +static int build_map_pin_path(struct bpf_map *map, const char *path) +{ +	char buf[PATH_MAX]; +	int err, len; + +	if (!path) +		path = "/sys/fs/bpf"; + +	len = snprintf(buf, PATH_MAX, "%s/%s", path, bpf_map__name(map)); +	if (len < 0) +		return -EINVAL; +	else if (len >= PATH_MAX) +		return -ENAMETOOLONG; + +	err = bpf_map__set_pin_path(map, buf); +	if (err) +		return err; + +	return 0; +} +  static int bpf_object__init_user_btf_map(struct bpf_object *obj,  					 const struct btf_type *sec,  					 int var_idx, int sec_idx, -					 const Elf_Data *data, bool strict) +					 const Elf_Data *data, bool strict, +					 const char *pin_root_path)  {  	const struct btf_type *var, *def, *t;  	const struct btf_var_secinfo *vi; @@ -1089,33 +1140,33 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  	vlen = btf_vlen(var);  	if (map_name == NULL || map_name[0] == '\0') { -		pr_warning("map #%d: empty name.\n", var_idx); +		pr_warn("map #%d: empty name.\n", var_idx);  		return -EINVAL;  	}  	if ((__u64)vi->offset + vi->size > data->d_size) { -		pr_warning("map '%s' BTF data is corrupted.\n", map_name); +		pr_warn("map '%s' BTF data is corrupted.\n", map_name);  		return -EINVAL;  	}  	if (!btf_is_var(var)) { -		pr_warning("map '%s': unexpected var kind %u.\n", -			   map_name, btf_kind(var)); +		pr_warn("map '%s': unexpected var kind %u.\n", +			map_name, btf_kind(var));  		return -EINVAL;  	}  	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&  	    var_extra->linkage != BTF_VAR_STATIC) { -		pr_warning("map '%s': unsupported var linkage %u.\n", -			   map_name, var_extra->linkage); +		pr_warn("map '%s': unsupported var linkage %u.\n", +			map_name, var_extra->linkage);  		return -EOPNOTSUPP;  	}  	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);  	if (!btf_is_struct(def)) { -		pr_warning("map '%s': unexpected def kind %u.\n", -			   map_name, btf_kind(var)); +		pr_warn("map '%s': unexpected def kind %u.\n", +			map_name, btf_kind(var));  		return -EINVAL;  	}  	if (def->size > vi->size) { -		pr_warning("map '%s': invalid def size.\n", map_name); +		pr_warn("map '%s': invalid def size.\n", map_name);  		return -EINVAL;  	} @@ -1124,7 +1175,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  		return PTR_ERR(map);  	map->name = strdup(map_name);  	if (!map->name) { -		pr_warning("map '%s': failed to alloc map name.\n", map_name); +		pr_warn("map '%s': failed to alloc map name.\n", map_name);  		return -ENOMEM;  	}  	map->libbpf_type = LIBBPF_MAP_UNSPEC; @@ -1140,8 +1191,7 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  		const char *name = btf__name_by_offset(obj->btf, m->name_off);  		if (!name) { -			pr_warning("map '%s': invalid field #%d.\n", -				   map_name, i); +			pr_warn("map '%s': invalid field #%d.\n", map_name, i);  			return -EINVAL;  		}  		if (strcmp(name, "type") == 0) { @@ -1171,8 +1221,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  			pr_debug("map '%s': found key_size = %u.\n",  				 map_name, sz);  			if (map->def.key_size && map->def.key_size != sz) { -				pr_warning("map '%s': conflicting key size %u != %u.\n", -					   map_name, map->def.key_size, sz); +				pr_warn("map '%s': conflicting key size %u != %u.\n", +					map_name, map->def.key_size, sz);  				return -EINVAL;  			}  			map->def.key_size = sz; @@ -1181,26 +1231,26 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  			t = btf__type_by_id(obj->btf, m->type);  			if (!t) { -				pr_warning("map '%s': key type [%d] not found.\n", -					   map_name, m->type); +				pr_warn("map '%s': key type [%d] not found.\n", +					map_name, m->type);  				return -EINVAL;  			}  			if (!btf_is_ptr(t)) { -				pr_warning("map '%s': key spec is not PTR: %u.\n", -					   map_name, btf_kind(t)); +				pr_warn("map '%s': key spec is not PTR: %u.\n", +					map_name, btf_kind(t));  				return -EINVAL;  			}  			sz = btf__resolve_size(obj->btf, t->type);  			if (sz < 0) { -				pr_warning("map '%s': can't determine key size for type [%u]: %lld.\n", -					   map_name, t->type, sz); +				pr_warn("map '%s': can't determine key size for type [%u]: %lld.\n", +					map_name, t->type, sz);  				return sz;  			}  			pr_debug("map '%s': found key [%u], sz = %lld.\n",  				 map_name, t->type, sz);  			if (map->def.key_size && map->def.key_size != sz) { -				pr_warning("map '%s': conflicting key size %u != %lld.\n", -					   map_name, map->def.key_size, sz); +				pr_warn("map '%s': conflicting key size %u != %lld.\n", +					map_name, map->def.key_size, sz);  				return -EINVAL;  			}  			map->def.key_size = sz; @@ -1214,8 +1264,8 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  			pr_debug("map '%s': found value_size = %u.\n",  				 map_name, sz);  			if (map->def.value_size && map->def.value_size != sz) { -				pr_warning("map '%s': conflicting value size %u != %u.\n", -					   map_name, map->def.value_size, sz); +				pr_warn("map '%s': conflicting value size %u != %u.\n", +					map_name, map->def.value_size, sz);  				return -EINVAL;  			}  			map->def.value_size = sz; @@ -1224,34 +1274,58 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  			t = btf__type_by_id(obj->btf, m->type);  			if (!t) { -				pr_warning("map '%s': value type [%d] not found.\n", -					   map_name, m->type); +				pr_warn("map '%s': value type [%d] not found.\n", +					map_name, m->type);  				return -EINVAL;  			}  			if (!btf_is_ptr(t)) { -				pr_warning("map '%s': value spec is not PTR: %u.\n", -					   map_name, btf_kind(t)); +				pr_warn("map '%s': value spec is not PTR: %u.\n", +					map_name, btf_kind(t));  				return -EINVAL;  			}  			sz = btf__resolve_size(obj->btf, t->type);  			if (sz < 0) { -				pr_warning("map '%s': can't determine value size for type [%u]: %lld.\n", -					   map_name, t->type, sz); +				pr_warn("map '%s': can't determine value size for type [%u]: %lld.\n", +					map_name, t->type, sz);  				return sz;  			}  			pr_debug("map '%s': found value [%u], sz = %lld.\n",  				 map_name, t->type, sz);  			if (map->def.value_size && map->def.value_size != sz) { -				pr_warning("map '%s': conflicting value size %u != %lld.\n", -					   map_name, map->def.value_size, sz); +				pr_warn("map '%s': conflicting value size %u != %lld.\n", +					map_name, map->def.value_size, sz);  				return -EINVAL;  			}  			map->def.value_size = sz;  			map->btf_value_type_id = t->type; +		} else if (strcmp(name, "pinning") == 0) { +			__u32 val; +			int err; + +			if (!get_map_field_int(map_name, obj->btf, def, m, +					       &val)) +				return -EINVAL; +			pr_debug("map '%s': found pinning = %u.\n", +				 map_name, val); + +			if (val != LIBBPF_PIN_NONE && +			    val != LIBBPF_PIN_BY_NAME) { +				pr_warn("map '%s': invalid pinning value %u.\n", +					map_name, val); +				return -EINVAL; +			} +			if (val == LIBBPF_PIN_BY_NAME) { +				err = build_map_pin_path(map, pin_root_path); +				if (err) { +					pr_warn("map '%s': couldn't build pin path.\n", +						map_name); +					return err; +				} +			}  		} else {  			if (strict) { -				pr_warning("map '%s': unknown field '%s'.\n", -					   map_name, name); +				pr_warn("map '%s': unknown field '%s'.\n", +					map_name, name);  				return -ENOTSUP;  			}  			pr_debug("map '%s': ignoring unknown field '%s'.\n", @@ -1260,14 +1334,15 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,  	}  	if (map->def.type == BPF_MAP_TYPE_UNSPEC) { -		pr_warning("map '%s': map type isn't specified.\n", map_name); +		pr_warn("map '%s': map type isn't specified.\n", map_name);  		return -EINVAL;  	}  	return 0;  } -static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict) +static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict, +					  const char *pin_root_path)  {  	const struct btf_type *sec = NULL;  	int nr_types, i, vlen, err; @@ -1283,8 +1358,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)  	if (scn)  		data = elf_getdata(scn, NULL);  	if (!scn || !data) { -		pr_warning("failed to get Elf_Data from map section %d (%s)\n", -			   obj->efile.maps_shndx, MAPS_ELF_SEC); +		pr_warn("failed to get Elf_Data from map section %d (%s)\n", +			obj->efile.maps_shndx, MAPS_ELF_SEC);  		return -EINVAL;  	} @@ -1301,7 +1376,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)  	}  	if (!sec) { -		pr_warning("DATASEC '%s' not found.\n", MAPS_ELF_SEC); +		pr_warn("DATASEC '%s' not found.\n", MAPS_ELF_SEC);  		return -ENOENT;  	} @@ -1309,7 +1384,8 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)  	for (i = 0; i < vlen; i++) {  		err = bpf_object__init_user_btf_map(obj, sec, i,  						    obj->efile.btf_maps_shndx, -						    data, strict); +						    data, strict, +						    pin_root_path);  		if (err)  			return err;  	} @@ -1317,16 +1393,17 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict)  	return 0;  } -static int bpf_object__init_maps(struct bpf_object *obj, int flags) +static int bpf_object__init_maps(struct bpf_object *obj, bool relaxed_maps, +				 const char *pin_root_path)  { -	bool strict = !(flags & MAPS_RELAX_COMPAT); +	bool strict = !relaxed_maps;  	int err;  	err = bpf_object__init_user_maps(obj, strict);  	if (err)  		return err; -	err = bpf_object__init_user_btf_maps(obj, strict); +	err = bpf_object__init_user_btf_maps(obj, strict, pin_root_path);  	if (err)  		return err; @@ -1445,14 +1522,13 @@ static int bpf_object__init_btf(struct bpf_object *obj,  	if (btf_data) {  		obj->btf = btf__new(btf_data->d_buf, btf_data->d_size);  		if (IS_ERR(obj->btf)) { -			pr_warning("Error loading ELF section %s: %d.\n", -				   BTF_ELF_SEC, err); +			pr_warn("Error loading ELF section %s: %d.\n", +				BTF_ELF_SEC, err);  			goto out;  		}  		err = btf__finalize_data(obj, obj->btf);  		if (err) { -			pr_warning("Error finalizing %s: %d.\n", -				   BTF_ELF_SEC, err); +			pr_warn("Error finalizing %s: %d.\n", BTF_ELF_SEC, err);  			goto out;  		}  	} @@ -1465,8 +1541,8 @@ static int bpf_object__init_btf(struct bpf_object *obj,  		obj->btf_ext = btf_ext__new(btf_ext_data->d_buf,  					    btf_ext_data->d_size);  		if (IS_ERR(obj->btf_ext)) { -			pr_warning("Error loading ELF section %s: %ld. Ignored and continue.\n", -				   BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext)); +			pr_warn("Error loading ELF section %s: %ld. Ignored and continue.\n", +				BTF_EXT_ELF_SEC, PTR_ERR(obj->btf_ext));  			obj->btf_ext = NULL;  			goto out;  		} @@ -1482,7 +1558,7 @@ out:  		obj->btf = NULL;  	}  	if (btf_required && !obj->btf) { -		pr_warning("BTF is required, but is missing or corrupted.\n"); +		pr_warn("BTF is required, but is missing or corrupted.\n");  		return err == 0 ? -ENOENT : err;  	}  	return 0; @@ -1500,8 +1576,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  	err = btf__load(obj->btf);  	if (err) { -		pr_warning("Error loading %s into kernel: %d.\n", -			   BTF_ELF_SEC, err); +		pr_warn("Error loading %s into kernel: %d.\n", +			BTF_ELF_SEC, err);  		btf__free(obj->btf);  		obj->btf = NULL;  		/* btf_ext can't exist without btf, so free it as well */ @@ -1516,7 +1592,8 @@ static int bpf_object__sanitize_and_load_btf(struct bpf_object *obj)  	return 0;  } -static int bpf_object__elf_collect(struct bpf_object *obj, int flags) +static int bpf_object__elf_collect(struct bpf_object *obj, bool relaxed_maps, +				   const char *pin_root_path)  {  	Elf *elf = obj->efile.elf;  	GElf_Ehdr *ep = &obj->efile.ehdr; @@ -1527,7 +1604,7 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  	/* Elf is corrupted/truncated, avoid calling elf_strptr. */  	if (!elf_rawdata(elf_getscn(elf, ep->e_shstrndx), NULL)) { -		pr_warning("failed to get e_shstrndx from %s\n", obj->path); +		pr_warn("failed to get e_shstrndx from %s\n", obj->path);  		return -LIBBPF_ERRNO__FORMAT;  	} @@ -1538,22 +1615,22 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  		idx++;  		if (gelf_getshdr(scn, &sh) != &sh) { -			pr_warning("failed to get section(%d) header from %s\n", -				   idx, obj->path); +			pr_warn("failed to get section(%d) header from %s\n", +				idx, obj->path);  			return -LIBBPF_ERRNO__FORMAT;  		}  		name = elf_strptr(elf, ep->e_shstrndx, sh.sh_name);  		if (!name) { -			pr_warning("failed to get section(%d) name from %s\n", -				   idx, obj->path); +			pr_warn("failed to get section(%d) name from %s\n", +				idx, obj->path);  			return -LIBBPF_ERRNO__FORMAT;  		}  		data = elf_getdata(scn, 0);  		if (!data) { -			pr_warning("failed to get section(%d) data from %s(%s)\n", -				   idx, name, obj->path); +			pr_warn("failed to get section(%d) data from %s(%s)\n", +				idx, name, obj->path);  			return -LIBBPF_ERRNO__FORMAT;  		}  		pr_debug("section(%d) %s, size %ld, link %d, flags %lx, type=%d\n", @@ -1583,8 +1660,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  			btf_ext_data = data;  		} else if (sh.sh_type == SHT_SYMTAB) {  			if (obj->efile.symbols) { -				pr_warning("bpf: multiple SYMTAB in %s\n", -					   obj->path); +				pr_warn("bpf: multiple SYMTAB in %s\n", +					obj->path);  				return -LIBBPF_ERRNO__FORMAT;  			}  			obj->efile.symbols = data; @@ -1594,14 +1671,16 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  				if (strcmp(name, ".text") == 0)  					obj->efile.text_shndx = idx;  				err = bpf_object__add_program(obj, data->d_buf, -							      data->d_size, name, idx); +							      data->d_size, +							      name, idx);  				if (err) {  					char errmsg[STRERR_BUFSIZE]; -					char *cp = libbpf_strerror_r(-err, errmsg, -								     sizeof(errmsg)); +					char *cp; -					pr_warning("failed to alloc program %s (%s): %s", -						   name, obj->path, cp); +					cp = libbpf_strerror_r(-err, errmsg, +							       sizeof(errmsg)); +					pr_warn("failed to alloc program %s (%s): %s", +						name, obj->path, cp);  					return err;  				}  			} else if (strcmp(name, ".data") == 0) { @@ -1614,8 +1693,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  				pr_debug("skip section(%d) %s\n", idx, name);  			}  		} else if (sh.sh_type == SHT_REL) { -			int nr_reloc = obj->efile.nr_reloc; -			void *reloc = obj->efile.reloc; +			int nr_sects = obj->efile.nr_reloc_sects; +			void *sects = obj->efile.reloc_sects;  			int sec = sh.sh_info; /* points to other section */  			/* Only do relo for section with exec instructions */ @@ -1625,18 +1704,18 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  				continue;  			} -			reloc = reallocarray(reloc, nr_reloc + 1, -					     sizeof(*obj->efile.reloc)); -			if (!reloc) { -				pr_warning("realloc failed\n"); +			sects = reallocarray(sects, nr_sects + 1, +					     sizeof(*obj->efile.reloc_sects)); +			if (!sects) { +				pr_warn("reloc_sects realloc failed\n");  				return -ENOMEM;  			} -			obj->efile.reloc = reloc; -			obj->efile.nr_reloc++; +			obj->efile.reloc_sects = sects; +			obj->efile.nr_reloc_sects++; -			obj->efile.reloc[nr_reloc].shdr = sh; -			obj->efile.reloc[nr_reloc].data = data; +			obj->efile.reloc_sects[nr_sects].shdr = sh; +			obj->efile.reloc_sects[nr_sects].data = data;  		} else if (sh.sh_type == SHT_NOBITS && strcmp(name, ".bss") == 0) {  			obj->efile.bss = data;  			obj->efile.bss_shndx = idx; @@ -1645,13 +1724,13 @@ static int bpf_object__elf_collect(struct bpf_object *obj, int flags)  		}  	} -	if (!obj->efile.strtabidx || obj->efile.strtabidx >= idx) { -		pr_warning("Corrupted ELF file: index of strtab invalid\n"); +	if (!obj->efile.strtabidx || obj->efile.strtabidx > idx) { +		pr_warn("Corrupted ELF file: index of strtab invalid\n");  		return -LIBBPF_ERRNO__FORMAT;  	}  	err = bpf_object__init_btf(obj, btf_data, btf_ext_data);  	if (!err) -		err = bpf_object__init_maps(obj, flags); +		err = bpf_object__init_maps(obj, relaxed_maps, pin_root_path);  	if (!err)  		err = bpf_object__sanitize_and_load_btf(obj);  	if (!err) @@ -1701,14 +1780,6 @@ static bool bpf_object__shndx_is_maps(const struct bpf_object *obj,  	       shndx == obj->efile.btf_maps_shndx;  } -static bool bpf_object__relo_in_known_section(const struct bpf_object *obj, -					      int shndx) -{ -	return shndx == obj->efile.text_shndx || -	       bpf_object__shndx_is_maps(obj, shndx) || -	       bpf_object__shndx_is_data(obj, shndx); -} -  static enum libbpf_map_type  bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)  { @@ -1722,130 +1793,164 @@ bpf_object__section_to_libbpf_map_type(const struct bpf_object *obj, int shndx)  		return LIBBPF_MAP_UNSPEC;  } +static int bpf_program__record_reloc(struct bpf_program *prog, +				     struct reloc_desc *reloc_desc, +				     __u32 insn_idx, const char *name, +				     const GElf_Sym *sym, const GElf_Rel *rel) +{ +	struct bpf_insn *insn = &prog->insns[insn_idx]; +	size_t map_idx, nr_maps = prog->obj->nr_maps; +	struct bpf_object *obj = prog->obj; +	__u32 shdr_idx = sym->st_shndx; +	enum libbpf_map_type type; +	struct bpf_map *map; + +	/* sub-program call relocation */ +	if (insn->code == (BPF_JMP | BPF_CALL)) { +		if (insn->src_reg != BPF_PSEUDO_CALL) { +			pr_warn("incorrect bpf_call opcode\n"); +			return -LIBBPF_ERRNO__RELOC; +		} +		/* text_shndx can be 0, if no default "main" program exists */ +		if (!shdr_idx || shdr_idx != obj->efile.text_shndx) { +			pr_warn("bad call relo against section %u\n", shdr_idx); +			return -LIBBPF_ERRNO__RELOC; +		} +		if (sym->st_value % 8) { +			pr_warn("bad call relo offset: %llu\n", (__u64)sym->st_value); +			return -LIBBPF_ERRNO__RELOC; +		} +		reloc_desc->type = RELO_CALL; +		reloc_desc->insn_idx = insn_idx; +		reloc_desc->sym_off = sym->st_value; +		obj->has_pseudo_calls = true; +		return 0; +	} + +	if (insn->code != (BPF_LD | BPF_IMM | BPF_DW)) { +		pr_warn("invalid relo for insns[%d].code 0x%x\n", +			insn_idx, insn->code); +		return -LIBBPF_ERRNO__RELOC; +	} +	if (!shdr_idx || shdr_idx >= SHN_LORESERVE) { +		pr_warn("invalid relo for \'%s\' in special section 0x%x; forgot to initialize global var?..\n", +			name, shdr_idx); +		return -LIBBPF_ERRNO__RELOC; +	} + +	type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); + +	/* generic map reference relocation */ +	if (type == LIBBPF_MAP_UNSPEC) { +		if (!bpf_object__shndx_is_maps(obj, shdr_idx)) { +			pr_warn("bad map relo against section %u\n", +				shdr_idx); +			return -LIBBPF_ERRNO__RELOC; +		} +		for (map_idx = 0; map_idx < nr_maps; map_idx++) { +			map = &obj->maps[map_idx]; +			if (map->libbpf_type != type || +			    map->sec_idx != sym->st_shndx || +			    map->sec_offset != sym->st_value) +				continue; +			pr_debug("found map %zd (%s, sec %d, off %zu) for insn %u\n", +				 map_idx, map->name, map->sec_idx, +				 map->sec_offset, insn_idx); +			break; +		} +		if (map_idx >= nr_maps) { +			pr_warn("map relo failed to find map for sec %u, off %llu\n", +				shdr_idx, (__u64)sym->st_value); +			return -LIBBPF_ERRNO__RELOC; +		} +		reloc_desc->type = RELO_LD64; +		reloc_desc->insn_idx = insn_idx; +		reloc_desc->map_idx = map_idx; +		reloc_desc->sym_off = 0; /* sym->st_value determines map_idx */ +		return 0; +	} + +	/* global data map relocation */ +	if (!bpf_object__shndx_is_data(obj, shdr_idx)) { +		pr_warn("bad data relo against section %u\n", shdr_idx); +		return -LIBBPF_ERRNO__RELOC; +	} +	if (!obj->caps.global_data) { +		pr_warn("relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", +			name, insn_idx); +		return -LIBBPF_ERRNO__RELOC; +	} +	for (map_idx = 0; map_idx < nr_maps; map_idx++) { +		map = &obj->maps[map_idx]; +		if (map->libbpf_type != type) +			continue; +		pr_debug("found data map %zd (%s, sec %d, off %zu) for insn %u\n", +			 map_idx, map->name, map->sec_idx, map->sec_offset, +			 insn_idx); +		break; +	} +	if (map_idx >= nr_maps) { +		pr_warn("data relo failed to find map for sec %u\n", +			shdr_idx); +		return -LIBBPF_ERRNO__RELOC; +	} + +	reloc_desc->type = RELO_DATA; +	reloc_desc->insn_idx = insn_idx; +	reloc_desc->map_idx = map_idx; +	reloc_desc->sym_off = sym->st_value; +	return 0; +} +  static int  bpf_program__collect_reloc(struct bpf_program *prog, GElf_Shdr *shdr,  			   Elf_Data *data, struct bpf_object *obj)  {  	Elf_Data *symbols = obj->efile.symbols; -	struct bpf_map *maps = obj->maps; -	size_t nr_maps = obj->nr_maps; -	int i, nrels; +	int err, i, nrels;  	pr_debug("collecting relocating info for: '%s'\n", prog->section_name);  	nrels = shdr->sh_size / shdr->sh_entsize;  	prog->reloc_desc = malloc(sizeof(*prog->reloc_desc) * nrels);  	if (!prog->reloc_desc) { -		pr_warning("failed to alloc memory in relocation\n"); +		pr_warn("failed to alloc memory in relocation\n");  		return -ENOMEM;  	}  	prog->nr_reloc = nrels;  	for (i = 0; i < nrels; i++) { -		struct bpf_insn *insns = prog->insns; -		enum libbpf_map_type type; -		unsigned int insn_idx; -		unsigned int shdr_idx;  		const char *name; -		size_t map_idx; +		__u32 insn_idx;  		GElf_Sym sym;  		GElf_Rel rel;  		if (!gelf_getrel(data, i, &rel)) { -			pr_warning("relocation: failed to get %d reloc\n", i); +			pr_warn("relocation: failed to get %d reloc\n", i);  			return -LIBBPF_ERRNO__FORMAT;  		} -  		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) { -			pr_warning("relocation: symbol %"PRIx64" not found\n", -				   GELF_R_SYM(rel.r_info)); +			pr_warn("relocation: symbol %"PRIx64" not found\n", +				GELF_R_SYM(rel.r_info));  			return -LIBBPF_ERRNO__FORMAT;  		} +		if (rel.r_offset % sizeof(struct bpf_insn)) +			return -LIBBPF_ERRNO__FORMAT; +		insn_idx = rel.r_offset / sizeof(struct bpf_insn);  		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,  				  sym.st_name) ? : "<?>"; -		pr_debug("relo for %lld value %lld name %d (\'%s\')\n", -			 (long long) (rel.r_info >> 32), -			 (long long) sym.st_value, sym.st_name, name); +		pr_debug("relo for shdr %u, symb %llu, value %llu, type %d, bind %d, name %d (\'%s\'), insn %u\n", +			 (__u32)sym.st_shndx, (__u64)GELF_R_SYM(rel.r_info), +			 (__u64)sym.st_value, GELF_ST_TYPE(sym.st_info), +			 GELF_ST_BIND(sym.st_info), sym.st_name, name, +			 insn_idx); -		shdr_idx = sym.st_shndx; -		insn_idx = rel.r_offset / sizeof(struct bpf_insn); -		pr_debug("relocation: insn_idx=%u, shdr_idx=%u\n", -			 insn_idx, shdr_idx); - -		if (shdr_idx >= SHN_LORESERVE) { -			pr_warning("relocation: not yet supported relo for non-static global \'%s\' variable in special section (0x%x) found in insns[%d].code 0x%x\n", -				   name, shdr_idx, insn_idx, -				   insns[insn_idx].code); -			return -LIBBPF_ERRNO__RELOC; -		} -		if (!bpf_object__relo_in_known_section(obj, shdr_idx)) { -			pr_warning("Program '%s' contains unrecognized relo data pointing to section %u\n", -				   prog->section_name, shdr_idx); -			return -LIBBPF_ERRNO__RELOC; -		} - -		if (insns[insn_idx].code == (BPF_JMP | BPF_CALL)) { -			if (insns[insn_idx].src_reg != BPF_PSEUDO_CALL) { -				pr_warning("incorrect bpf_call opcode\n"); -				return -LIBBPF_ERRNO__RELOC; -			} -			prog->reloc_desc[i].type = RELO_CALL; -			prog->reloc_desc[i].insn_idx = insn_idx; -			prog->reloc_desc[i].text_off = sym.st_value; -			obj->has_pseudo_calls = true; -			continue; -		} - -		if (insns[insn_idx].code != (BPF_LD | BPF_IMM | BPF_DW)) { -			pr_warning("bpf: relocation: invalid relo for insns[%d].code 0x%x\n", -				   insn_idx, insns[insn_idx].code); -			return -LIBBPF_ERRNO__RELOC; -		} - -		if (bpf_object__shndx_is_maps(obj, shdr_idx) || -		    bpf_object__shndx_is_data(obj, shdr_idx)) { -			type = bpf_object__section_to_libbpf_map_type(obj, shdr_idx); -			if (type != LIBBPF_MAP_UNSPEC) { -				if (GELF_ST_BIND(sym.st_info) == STB_GLOBAL) { -					pr_warning("bpf: relocation: not yet supported relo for non-static global \'%s\' variable found in insns[%d].code 0x%x\n", -						   name, insn_idx, insns[insn_idx].code); -					return -LIBBPF_ERRNO__RELOC; -				} -				if (!obj->caps.global_data) { -					pr_warning("bpf: relocation: kernel does not support global \'%s\' variable access in insns[%d]\n", -						   name, insn_idx); -					return -LIBBPF_ERRNO__RELOC; -				} -			} - -			for (map_idx = 0; map_idx < nr_maps; map_idx++) { -				if (maps[map_idx].libbpf_type != type) -					continue; -				if (type != LIBBPF_MAP_UNSPEC || -				    (maps[map_idx].sec_idx == sym.st_shndx && -				     maps[map_idx].sec_offset == sym.st_value)) { -					pr_debug("relocation: found map %zd (%s, sec_idx %d, offset %zu) for insn %u\n", -						 map_idx, maps[map_idx].name, -						 maps[map_idx].sec_idx, -						 maps[map_idx].sec_offset, -						 insn_idx); -					break; -				} -			} - -			if (map_idx >= nr_maps) { -				pr_warning("bpf relocation: map_idx %d larger than %d\n", -					   (int)map_idx, (int)nr_maps - 1); -				return -LIBBPF_ERRNO__RELOC; -			} - -			prog->reloc_desc[i].type = type != LIBBPF_MAP_UNSPEC ? -						   RELO_DATA : RELO_LD64; -			prog->reloc_desc[i].insn_idx = insn_idx; -			prog->reloc_desc[i].map_idx = map_idx; -		} +		err = bpf_program__record_reloc(prog, &prog->reloc_desc[i], +						insn_idx, name, &sym, &rel); +		if (err) +			return err;  	}  	return 0;  } @@ -1897,16 +2002,22 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)  		return -errno;  	new_fd = open("/", O_RDONLY | O_CLOEXEC); -	if (new_fd < 0) +	if (new_fd < 0) { +		err = -errno;  		goto err_free_new_name; +	}  	new_fd = dup3(fd, new_fd, O_CLOEXEC); -	if (new_fd < 0) +	if (new_fd < 0) { +		err = -errno;  		goto err_close_new_fd; +	}  	err = zclose(map->fd); -	if (err) +	if (err) { +		err = -errno;  		goto err_close_new_fd; +	}  	free(map->name);  	map->fd = new_fd; @@ -1918,6 +2029,7 @@ int bpf_map__reuse_fd(struct bpf_map *map, int fd)  	map->def.map_flags = info.map_flags;  	map->btf_key_type_id = info.btf_key_type_id;  	map->btf_value_type_id = info.btf_value_type_id; +	map->reused = true;  	return 0; @@ -1925,7 +2037,7 @@ err_close_new_fd:  	close(new_fd);  err_free_new_name:  	free(new_name); -	return -errno; +	return err;  }  int bpf_map__resize(struct bpf_map *map, __u32 max_entries) @@ -1964,8 +2076,8 @@ bpf_object__probe_name(struct bpf_object *obj)  	ret = bpf_load_program_xattr(&attr, NULL, 0);  	if (ret < 0) {  		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -		pr_warning("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", -			   __func__, cp, errno); +		pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n", +			__func__, cp, errno);  		return -errno;  	}  	close(ret); @@ -2005,8 +2117,8 @@ bpf_object__probe_global_data(struct bpf_object *obj)  	map = bpf_create_map_xattr(&map_attr);  	if (map < 0) {  		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -		pr_warning("Error in %s():%s(%d). Couldn't create simple array map.\n", -			   __func__, cp, errno); +		pr_warn("Error in %s():%s(%d). Couldn't create simple array map.\n", +			__func__, cp, errno);  		return -errno;  	} @@ -2030,7 +2142,7 @@ bpf_object__probe_global_data(struct bpf_object *obj)  static int bpf_object__probe_btf_func(struct bpf_object *obj)  { -	const char strs[] = "\0int\0x\0a"; +	static const char strs[] = "\0int\0x\0a";  	/* void x(int a) {} */  	__u32 types[] = {  		/* int */ @@ -2056,7 +2168,7 @@ static int bpf_object__probe_btf_func(struct bpf_object *obj)  static int bpf_object__probe_btf_datasec(struct bpf_object *obj)  { -	const char strs[] = "\0x\0.data"; +	static const char strs[] = "\0x\0.data";  	/* static int a; */  	__u32 types[] = {  		/* int */ @@ -2081,6 +2193,27 @@ static int bpf_object__probe_btf_datasec(struct bpf_object *obj)  	return 0;  } +static int bpf_object__probe_array_mmap(struct bpf_object *obj) +{ +	struct bpf_create_map_attr attr = { +		.map_type = BPF_MAP_TYPE_ARRAY, +		.map_flags = BPF_F_MMAPABLE, +		.key_size = sizeof(int), +		.value_size = sizeof(int), +		.max_entries = 1, +	}; +	int fd; + +	fd = bpf_create_map_xattr(&attr); +	if (fd >= 0) { +		obj->caps.array_mmap = 1; +		close(fd); +		return 1; +	} + +	return 0; +} +  static int  bpf_object__probe_caps(struct bpf_object *obj)  { @@ -2089,6 +2222,7 @@ bpf_object__probe_caps(struct bpf_object *obj)  		bpf_object__probe_global_data,  		bpf_object__probe_btf_func,  		bpf_object__probe_btf_datasec, +		bpf_object__probe_array_mmap,  	};  	int i, ret; @@ -2101,6 +2235,66 @@ bpf_object__probe_caps(struct bpf_object *obj)  	return 0;  } +static bool map_is_reuse_compat(const struct bpf_map *map, int map_fd) +{ +	struct bpf_map_info map_info = {}; +	char msg[STRERR_BUFSIZE]; +	__u32 map_info_len; + +	map_info_len = sizeof(map_info); + +	if (bpf_obj_get_info_by_fd(map_fd, &map_info, &map_info_len)) { +		pr_warn("failed to get map info for map FD %d: %s\n", +			map_fd, libbpf_strerror_r(errno, msg, sizeof(msg))); +		return false; +	} + +	return (map_info.type == map->def.type && +		map_info.key_size == map->def.key_size && +		map_info.value_size == map->def.value_size && +		map_info.max_entries == map->def.max_entries && +		map_info.map_flags == map->def.map_flags); +} + +static int +bpf_object__reuse_map(struct bpf_map *map) +{ +	char *cp, errmsg[STRERR_BUFSIZE]; +	int err, pin_fd; + +	pin_fd = bpf_obj_get(map->pin_path); +	if (pin_fd < 0) { +		err = -errno; +		if (err == -ENOENT) { +			pr_debug("found no pinned map to reuse at '%s'\n", +				 map->pin_path); +			return 0; +		} + +		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); +		pr_warn("couldn't retrieve pinned map '%s': %s\n", +			map->pin_path, cp); +		return err; +	} + +	if (!map_is_reuse_compat(map, pin_fd)) { +		pr_warn("couldn't reuse pinned map at '%s': parameter mismatch\n", +			map->pin_path); +		close(pin_fd); +		return -EINVAL; +	} + +	err = bpf_map__reuse_fd(map, pin_fd); +	if (err) { +		close(pin_fd); +		return err; +	} +	map->pinned = true; +	pr_debug("reused pinned map at '%s'\n", map->pin_path); + +	return 0; +} +  static int  bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)  { @@ -2121,8 +2315,8 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)  		err = bpf_map_freeze(map->fd);  		if (err) {  			cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -			pr_warning("Error freezing map(%s) as read-only: %s\n", -				   map->name, cp); +			pr_warn("Error freezing map(%s) as read-only: %s\n", +				map->name, cp);  			err = 0;  		}  	} @@ -2143,6 +2337,15 @@ bpf_object__create_maps(struct bpf_object *obj)  		char *cp, errmsg[STRERR_BUFSIZE];  		int *pfd = &map->fd; +		if (map->pin_path) { +			err = bpf_object__reuse_map(map); +			if (err) { +				pr_warn("error reusing pinned map %s\n", +					map->name); +				return err; +			} +		} +  		if (map->fd >= 0) {  			pr_debug("skip map create (preset) %s: fd=%d\n",  				 map->name, map->fd); @@ -2161,8 +2364,8 @@ bpf_object__create_maps(struct bpf_object *obj)  			if (!nr_cpus)  				nr_cpus = libbpf_num_possible_cpus();  			if (nr_cpus < 0) { -				pr_warning("failed to determine number of system CPUs: %d\n", -					   nr_cpus); +				pr_warn("failed to determine number of system CPUs: %d\n", +					nr_cpus);  				err = nr_cpus;  				goto err_out;  			} @@ -2190,8 +2393,8 @@ bpf_object__create_maps(struct bpf_object *obj)  				 create_attr.btf_value_type_id)) {  			err = -errno;  			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); -			pr_warning("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", -				   map->name, cp, err); +			pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n", +				map->name, cp, err);  			create_attr.btf_fd = 0;  			create_attr.btf_key_type_id = 0;  			create_attr.btf_value_type_id = 0; @@ -2206,8 +2409,8 @@ bpf_object__create_maps(struct bpf_object *obj)  			err = -errno;  err_out:  			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg)); -			pr_warning("failed to create map (name: '%s'): %s(%d)\n", -				   map->name, cp, err); +			pr_warn("failed to create map (name: '%s'): %s(%d)\n", +				map->name, cp, err);  			for (j = 0; j < i; j++)  				zclose(obj->maps[j].fd);  			return err; @@ -2221,6 +2424,15 @@ err_out:  			}  		} +		if (map->pin_path && !map->pinned) { +			err = bpf_map__pin(map, NULL); +			if (err) { +				pr_warn("failed to auto-pin map name '%s' at '%s'\n", +					map->name, map->pin_path); +				return err; +			} +		} +  		pr_debug("created map %s: fd=%d\n", map->name, *pfd);  	} @@ -2232,8 +2444,8 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,  			void *btf_prog_info, const char *info_name)  {  	if (err != -ENOENT) { -		pr_warning("Error in loading %s for sec %s.\n", -			   info_name, prog->section_name); +		pr_warn("Error in loading %s for sec %s.\n", +			info_name, prog->section_name);  		return err;  	} @@ -2244,14 +2456,14 @@ check_btf_ext_reloc_err(struct bpf_program *prog, int err,  		 * Some info has already been found but has problem  		 * in the last btf_ext reloc. Must have to error out.  		 */ -		pr_warning("Error in relocating %s for sec %s.\n", -			   info_name, prog->section_name); +		pr_warn("Error in relocating %s for sec %s.\n", +			info_name, prog->section_name);  		return err;  	}  	/* Have problem loading the very first info. Ignore the rest. */ -	pr_warning("Cannot find %s for main program sec %s. Ignore all %s.\n", -		   info_name, prog->section_name, info_name); +	pr_warn("Cannot find %s for main program sec %s. Ignore all %s.\n", +		info_name, prog->section_name, info_name);  	return 0;  } @@ -2315,8 +2527,8 @@ struct bpf_core_spec {  	int raw_spec[BPF_CORE_SPEC_MAX_LEN];  	/* raw spec length */  	int raw_len; -	/* field byte offset represented by spec */ -	__u32 offset; +	/* field bit offset represented by spec */ +	__u32 bit_offset;  };  static bool str_is_empty(const char *s) @@ -2325,10 +2537,10 @@ static bool str_is_empty(const char *s)  }  /* - * Turn bpf_offset_reloc into a low- and high-level spec representation, + * Turn bpf_field_reloc into a low- and high-level spec representation,   * validating correctness along the way, as well as calculating resulting - * field offset (in bytes), specified by accessor string. Low-level spec - * captures every single level of nestedness, including traversing anonymous + * field bit offset, specified by accessor string. Low-level spec captures + * every single level of nestedness, including traversing anonymous   * struct/union members. High-level one only captures semantically meaningful   * "turning points": named fields and array indicies.   * E.g., for this case: @@ -2400,7 +2612,7 @@ static int bpf_core_spec_parse(const struct btf *btf,  	sz = btf__resolve_size(btf, id);  	if (sz < 0)  		return sz; -	spec->offset = access_idx * sz; +	spec->bit_offset = access_idx * sz * 8;  	for (i = 1; i < spec->raw_len; i++) {  		t = skip_mods_and_typedefs(btf, id, &id); @@ -2411,17 +2623,13 @@ static int bpf_core_spec_parse(const struct btf *btf,  		if (btf_is_composite(t)) {  			const struct btf_member *m; -			__u32 offset; +			__u32 bit_offset;  			if (access_idx >= btf_vlen(t))  				return -EINVAL; -			if (btf_member_bitfield_size(t, access_idx)) -				return -EINVAL; -			offset = btf_member_bit_offset(t, access_idx); -			if (offset % 8) -				return -EINVAL; -			spec->offset += offset / 8; +			bit_offset = btf_member_bit_offset(t, access_idx); +			spec->bit_offset += bit_offset;  			m = btf_members(t) + access_idx;  			if (m->name_off) { @@ -2450,10 +2658,10 @@ static int bpf_core_spec_parse(const struct btf *btf,  			sz = btf__resolve_size(btf, id);  			if (sz < 0)  				return sz; -			spec->offset += access_idx * sz; +			spec->bit_offset += access_idx * sz * 8;  		} else { -			pr_warning("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", -				   type_id, spec_str, i, id, btf_kind(t)); +			pr_warn("relo for [%u] %s (at idx %d) captures type [%d] of unexpected kind %d\n", +				type_id, spec_str, i, id, btf_kind(t));  			return -EINVAL;  		}  	} @@ -2551,12 +2759,14 @@ err_out:  }  /* Check two types for compatibility, skipping const/volatile/restrict and - * typedefs, to ensure we are relocating offset to the compatible entities: + * typedefs, to ensure we are relocating compatible entities:   *   - any two STRUCTs/UNIONs are compatible and can be mixed; - *   - any two FWDs are compatible; + *   - any two FWDs are compatible, if their names match (modulo flavor suffix);   *   - any two PTRs are always compatible; + *   - for ENUMs, names should be the same (ignoring flavor suffix) or at + *     least one of enums should be anonymous;   *   - for ENUMs, check sizes, names are ignored; - *   - for INT, size and bitness should match, signedness is ignored; + *   - for INT, size and signedness are ignored;   *   - for ARRAY, dimensionality is ignored, element types are checked for   *     compatibility recursively;   *   - everything else shouldn't be ever a target of relocation. @@ -2582,23 +2792,36 @@ recur:  		return 0;  	switch (btf_kind(local_type)) { -	case BTF_KIND_FWD:  	case BTF_KIND_PTR:  		return 1; -	case BTF_KIND_ENUM: -		return local_type->size == targ_type->size; +	case BTF_KIND_FWD: +	case BTF_KIND_ENUM: { +		const char *local_name, *targ_name; +		size_t local_len, targ_len; + +		local_name = btf__name_by_offset(local_btf, +						 local_type->name_off); +		targ_name = btf__name_by_offset(targ_btf, targ_type->name_off); +		local_len = bpf_core_essential_name_len(local_name); +		targ_len = bpf_core_essential_name_len(targ_name); +		/* one of them is anonymous or both w/ same flavor-less names */ +		return local_len == 0 || targ_len == 0 || +		       (local_len == targ_len && +			strncmp(local_name, targ_name, local_len) == 0); +	}  	case BTF_KIND_INT: +		/* just reject deprecated bitfield-like integers; all other +		 * integers are by default compatible between each other +		 */  		return btf_int_offset(local_type) == 0 && -		       btf_int_offset(targ_type) == 0 && -		       local_type->size == targ_type->size && -		       btf_int_bits(local_type) == btf_int_bits(targ_type); +		       btf_int_offset(targ_type) == 0;  	case BTF_KIND_ARRAY:  		local_id = btf_array(local_type)->type;  		targ_id = btf_array(targ_type)->type;  		goto recur;  	default: -		pr_warning("unexpected kind %d relocated, local [%d], target [%d]\n", -			   btf_kind(local_type), local_id, targ_id); +		pr_warn("unexpected kind %d relocated, local [%d], target [%d]\n", +			btf_kind(local_type), local_id, targ_id);  		return 0;  	}  } @@ -2607,7 +2830,7 @@ recur:   * Given single high-level named field accessor in local type, find   * corresponding high-level accessor for a target type. Along the way,   * maintain low-level spec for target as well. Also keep updating target - * offset. + * bit offset.   *   * Searching is performed through recursive exhaustive enumeration of all   * fields of a struct/union. If there are any anonymous (embedded) @@ -2646,21 +2869,16 @@ static int bpf_core_match_member(const struct btf *local_btf,  	n = btf_vlen(targ_type);  	m = btf_members(targ_type);  	for (i = 0; i < n; i++, m++) { -		__u32 offset; +		__u32 bit_offset; -		/* bitfield relocations not supported */ -		if (btf_member_bitfield_size(targ_type, i)) -			continue; -		offset = btf_member_bit_offset(targ_type, i); -		if (offset % 8) -			continue; +		bit_offset = btf_member_bit_offset(targ_type, i);  		/* too deep struct/union/array nesting */  		if (spec->raw_len == BPF_CORE_SPEC_MAX_LEN)  			return -E2BIG;  		/* speculate this member will be the good one */ -		spec->offset += offset / 8; +		spec->bit_offset += bit_offset;  		spec->raw_spec[spec->raw_len++] = i;  		targ_name = btf__name_by_offset(targ_btf, m->name_off); @@ -2689,7 +2907,7 @@ static int bpf_core_match_member(const struct btf *local_btf,  			return found;  		}  		/* member turned out not to be what we looked for */ -		spec->offset -= offset / 8; +		spec->bit_offset -= bit_offset;  		spec->raw_len--;  	} @@ -2698,7 +2916,7 @@ static int bpf_core_match_member(const struct btf *local_btf,  /*   * Try to match local spec to a target type and, if successful, produce full - * target spec (high-level, low-level + offset). + * target spec (high-level, low-level + bit offset).   */  static int bpf_core_spec_match(struct bpf_core_spec *local_spec,  			       const struct btf *targ_btf, __u32 targ_id, @@ -2761,35 +2979,165 @@ static int bpf_core_spec_match(struct bpf_core_spec *local_spec,  			sz = btf__resolve_size(targ_btf, targ_id);  			if (sz < 0)  				return sz; -			targ_spec->offset += local_acc->idx * sz; +			targ_spec->bit_offset += local_acc->idx * sz * 8;  		}  	}  	return 1;  } +static int bpf_core_calc_field_relo(const struct bpf_program *prog, +				    const struct bpf_field_reloc *relo, +				    const struct bpf_core_spec *spec, +				    __u32 *val, bool *validate) +{ +	const struct bpf_core_accessor *acc = &spec->spec[spec->len - 1]; +	const struct btf_type *t = btf__type_by_id(spec->btf, acc->type_id); +	__u32 byte_off, byte_sz, bit_off, bit_sz; +	const struct btf_member *m; +	const struct btf_type *mt; +	bool bitfield; +	__s64 sz; + +	/* a[n] accessor needs special handling */ +	if (!acc->name) { +		if (relo->kind == BPF_FIELD_BYTE_OFFSET) { +			*val = spec->bit_offset / 8; +		} else if (relo->kind == BPF_FIELD_BYTE_SIZE) { +			sz = btf__resolve_size(spec->btf, acc->type_id); +			if (sz < 0) +				return -EINVAL; +			*val = sz; +		} else { +			pr_warn("prog '%s': relo %d at insn #%d can't be applied to array access\n", +				bpf_program__title(prog, false), +				relo->kind, relo->insn_off / 8); +			return -EINVAL; +		} +		if (validate) +			*validate = true; +		return 0; +	} + +	m = btf_members(t) + acc->idx; +	mt = skip_mods_and_typedefs(spec->btf, m->type, NULL); +	bit_off = spec->bit_offset; +	bit_sz = btf_member_bitfield_size(t, acc->idx); + +	bitfield = bit_sz > 0; +	if (bitfield) { +		byte_sz = mt->size; +		byte_off = bit_off / 8 / byte_sz * byte_sz; +		/* figure out smallest int size necessary for bitfield load */ +		while (bit_off + bit_sz - byte_off * 8 > byte_sz * 8) { +			if (byte_sz >= 8) { +				/* bitfield can't be read with 64-bit read */ +				pr_warn("prog '%s': relo %d at insn #%d can't be satisfied for bitfield\n", +					bpf_program__title(prog, false), +					relo->kind, relo->insn_off / 8); +				return -E2BIG; +			} +			byte_sz *= 2; +			byte_off = bit_off / 8 / byte_sz * byte_sz; +		} +	} else { +		sz = btf__resolve_size(spec->btf, m->type); +		if (sz < 0) +			return -EINVAL; +		byte_sz = sz; +		byte_off = spec->bit_offset / 8; +		bit_sz = byte_sz * 8; +	} + +	/* for bitfields, all the relocatable aspects are ambiguous and we +	 * might disagree with compiler, so turn off validation of expected +	 * value, except for signedness +	 */ +	if (validate) +		*validate = !bitfield; + +	switch (relo->kind) { +	case BPF_FIELD_BYTE_OFFSET: +		*val = byte_off; +		break; +	case BPF_FIELD_BYTE_SIZE: +		*val = byte_sz; +		break; +	case BPF_FIELD_SIGNED: +		/* enums will be assumed unsigned */ +		*val = btf_is_enum(mt) || +		       (btf_int_encoding(mt) & BTF_INT_SIGNED); +		if (validate) +			*validate = true; /* signedness is never ambiguous */ +		break; +	case BPF_FIELD_LSHIFT_U64: +#if __BYTE_ORDER == __LITTLE_ENDIAN +		*val = 64 - (bit_off + bit_sz - byte_off  * 8); +#else +		*val = (8 - byte_sz) * 8 + (bit_off - byte_off * 8); +#endif +		break; +	case BPF_FIELD_RSHIFT_U64: +		*val = 64 - bit_sz; +		if (validate) +			*validate = true; /* right shift is never ambiguous */ +		break; +	case BPF_FIELD_EXISTS: +	default: +		pr_warn("prog '%s': unknown relo %d at insn #%d\n", +			bpf_program__title(prog, false), +			relo->kind, relo->insn_off / 8); +		return -EINVAL; +	} + +	return 0; +} +  /*   * Patch relocatable BPF instruction. - * Expected insn->imm value is provided for validation, as well as the new - * relocated value. + * + * Patched value is determined by relocation kind and target specification. + * For field existence relocation target spec will be NULL if field is not + * found. + * Expected insn->imm value is determined using relocation kind and local + * spec, and is checked before patching instruction. If actual insn->imm value + * is wrong, bail out with error.   *   * Currently three kinds of BPF instructions are supported:   * 1. rX = <imm> (assignment with immediate operand);   * 2. rX += <imm> (arithmetic operations with immediate operand); - * 3. *(rX) = <imm> (indirect memory assignment with immediate operand). - * - * If actual insn->imm value is wrong, bail out.   */ -static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off, -			       __u32 orig_off, __u32 new_off) +static int bpf_core_reloc_insn(struct bpf_program *prog, +			       const struct bpf_field_reloc *relo, +			       const struct bpf_core_spec *local_spec, +			       const struct bpf_core_spec *targ_spec)  { +	bool failed = false, validate = true; +	__u32 orig_val, new_val;  	struct bpf_insn *insn; -	int insn_idx; +	int insn_idx, err;  	__u8 class; -	if (insn_off % sizeof(struct bpf_insn)) +	if (relo->insn_off % sizeof(struct bpf_insn))  		return -EINVAL; -	insn_idx = insn_off / sizeof(struct bpf_insn); +	insn_idx = relo->insn_off / sizeof(struct bpf_insn); + +	if (relo->kind == BPF_FIELD_EXISTS) { +		orig_val = 1; /* can't generate EXISTS relo w/o local field */ +		new_val = targ_spec ? 1 : 0; +	} else if (!targ_spec) { +		failed = true; +		new_val = (__u32)-1; +	} else { +		err = bpf_core_calc_field_relo(prog, relo, local_spec, +					       &orig_val, &validate); +		if (err) +			return err; +		err = bpf_core_calc_field_relo(prog, relo, targ_spec, +					       &new_val, NULL); +		if (err) +			return err; +	}  	insn = &prog->insns[insn_idx];  	class = BPF_CLASS(insn->code); @@ -2797,19 +3145,25 @@ static int bpf_core_reloc_insn(struct bpf_program *prog, int insn_off,  	if (class == BPF_ALU || class == BPF_ALU64) {  		if (BPF_SRC(insn->code) != BPF_K)  			return -EINVAL; -		if (insn->imm != orig_off) +		if (!failed && validate && insn->imm != orig_val) { +			pr_warn("prog '%s': unexpected insn #%d value: got %u, exp %u -> %u\n", +				bpf_program__title(prog, false), insn_idx, +				insn->imm, orig_val, new_val);  			return -EINVAL; -		insn->imm = new_off; -		pr_debug("prog '%s': patched insn #%d (ALU/ALU64) imm %d -> %d\n", -			 bpf_program__title(prog, false), -			 insn_idx, orig_off, new_off); +		} +		orig_val = insn->imm; +		insn->imm = new_val; +		pr_debug("prog '%s': patched insn #%d (ALU/ALU64)%s imm %u -> %u\n", +			 bpf_program__title(prog, false), insn_idx, +			 failed ? " w/ failed reloc" : "", orig_val, new_val);  	} else { -		pr_warning("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", -			   bpf_program__title(prog, false), -			   insn_idx, insn->code, insn->src_reg, insn->dst_reg, -			   insn->off, insn->imm); +		pr_warn("prog '%s': trying to relocate unrecognized insn #%d, code:%x, src:%x, dst:%x, off:%x, imm:%x\n", +			bpf_program__title(prog, false), +			insn_idx, insn->code, insn->src_reg, insn->dst_reg, +			insn->off, insn->imm);  		return -EINVAL;  	} +  	return 0;  } @@ -2895,7 +3249,7 @@ static struct btf *bpf_core_find_kernel_btf(void)  		return btf;  	} -	pr_warning("failed to find valid kernel BTF\n"); +	pr_warn("failed to find valid kernel BTF\n");  	return ERR_PTR(-ESRCH);  } @@ -2919,7 +3273,8 @@ static void bpf_core_dump_spec(int level, const struct bpf_core_spec *spec)  		libbpf_print(level, "%d%s", spec->raw_spec[i],  			     i == spec->raw_len - 1 ? " => " : ":"); -	libbpf_print(level, "%u @ &x", spec->offset); +	libbpf_print(level, "%u.%u @ &x", +		     spec->bit_offset / 8, spec->bit_offset % 8);  	for (i = 0; i < spec->len; i++) {  		if (spec->spec[i].name) @@ -2976,7 +3331,7 @@ static void *u32_as_hash_key(__u32 x)   *    types should be compatible (see bpf_core_fields_are_compat for details).   * 3. It is supported and expected that there might be multiple flavors   *    matching the spec. As long as all the specs resolve to the same set of - *    offsets across all candidates, there is not error. If there is any + *    offsets across all candidates, there is no error. If there is any   *    ambiguity, CO-RE relocation will fail. This is necessary to accomodate   *    imprefection of BTF deduplication, which can cause slight duplication of   *    the same BTF type, if some directly or indirectly referenced (by @@ -2991,12 +3346,12 @@ static void *u32_as_hash_key(__u32 x)   *    CPU-wise compared to prebuilding a map from all local type names to   *    a list of candidate type names. It's also sped up by caching resolved   *    list of matching candidates per each local "root" type ID, that has at - *    least one bpf_offset_reloc associated with it. This list is shared + *    least one bpf_field_reloc associated with it. This list is shared   *    between multiple relocations for the same type ID and is updated as some   *    of the candidates are pruned due to structural incompatibility.   */ -static int bpf_core_reloc_offset(struct bpf_program *prog, -				 const struct bpf_offset_reloc *relo, +static int bpf_core_reloc_field(struct bpf_program *prog, +				 const struct bpf_field_reloc *relo,  				 int relo_idx,  				 const struct btf *local_btf,  				 const struct btf *targ_btf, @@ -3027,22 +3382,23 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,  	err = bpf_core_spec_parse(local_btf, local_id, spec_str, &local_spec);  	if (err) { -		pr_warning("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", -			   prog_name, relo_idx, local_id, local_name, spec_str, -			   err); +		pr_warn("prog '%s': relo #%d: parsing [%d] %s + %s failed: %d\n", +			prog_name, relo_idx, local_id, local_name, spec_str, +			err);  		return -EINVAL;  	} -	pr_debug("prog '%s': relo #%d: spec is ", prog_name, relo_idx); +	pr_debug("prog '%s': relo #%d: kind %d, spec is ", prog_name, relo_idx, +		 relo->kind);  	bpf_core_dump_spec(LIBBPF_DEBUG, &local_spec);  	libbpf_print(LIBBPF_DEBUG, "\n");  	if (!hashmap__find(cand_cache, type_key, (void **)&cand_ids)) {  		cand_ids = bpf_core_find_cands(local_btf, local_id, targ_btf);  		if (IS_ERR(cand_ids)) { -			pr_warning("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", -				   prog_name, relo_idx, local_id, local_name, -				   PTR_ERR(cand_ids)); +			pr_warn("prog '%s': relo #%d: target candidate search failed for [%d] %s: %ld", +				prog_name, relo_idx, local_id, local_name, +				PTR_ERR(cand_ids));  			return PTR_ERR(cand_ids);  		}  		err = hashmap__set(cand_cache, type_key, cand_ids, NULL, NULL); @@ -3064,8 +3420,8 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,  		bpf_core_dump_spec(LIBBPF_DEBUG, &cand_spec);  		libbpf_print(LIBBPF_DEBUG, ": %d\n", err);  		if (err < 0) { -			pr_warning("prog '%s': relo #%d: matching error: %d\n", -				   prog_name, relo_idx, err); +			pr_warn("prog '%s': relo #%d: matching error: %d\n", +				prog_name, relo_idx, err);  			return err;  		}  		if (err == 0) @@ -3073,31 +3429,42 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,  		if (j == 0) {  			targ_spec = cand_spec; -		} else if (cand_spec.offset != targ_spec.offset) { +		} else if (cand_spec.bit_offset != targ_spec.bit_offset) {  			/* if there are many candidates, they should all -			 * resolve to the same offset +			 * resolve to the same bit offset  			 */ -			pr_warning("prog '%s': relo #%d: offset ambiguity: %u != %u\n", -				   prog_name, relo_idx, cand_spec.offset, -				   targ_spec.offset); +			pr_warn("prog '%s': relo #%d: offset ambiguity: %u != %u\n", +				prog_name, relo_idx, cand_spec.bit_offset, +				targ_spec.bit_offset);  			return -EINVAL;  		}  		cand_ids->data[j++] = cand_spec.spec[0].type_id;  	} -	cand_ids->len = j; -	if (cand_ids->len == 0) { -		pr_warning("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", -			   prog_name, relo_idx, local_id, local_name, spec_str); +	/* +	 * For BPF_FIELD_EXISTS relo or when relaxed CO-RE reloc mode is +	 * requested, it's expected that we might not find any candidates. +	 * In this case, if field wasn't found in any candidate, the list of +	 * candidates shouldn't change at all, we'll just handle relocating +	 * appropriately, depending on relo's kind. +	 */ +	if (j > 0) +		cand_ids->len = j; + +	if (j == 0 && !prog->obj->relaxed_core_relocs && +	    relo->kind != BPF_FIELD_EXISTS) { +		pr_warn("prog '%s': relo #%d: no matching targets found for [%d] %s + %s\n", +			prog_name, relo_idx, local_id, local_name, spec_str);  		return -ESRCH;  	} -	err = bpf_core_reloc_insn(prog, relo->insn_off, -				  local_spec.offset, targ_spec.offset); +	/* bpf_core_reloc_insn should know how to handle missing targ_spec */ +	err = bpf_core_reloc_insn(prog, relo, &local_spec, +				  j ? &targ_spec : NULL);  	if (err) { -		pr_warning("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", -			   prog_name, relo_idx, relo->insn_off, err); +		pr_warn("prog '%s': relo #%d: failed to patch insn at offset %d: %d\n", +			prog_name, relo_idx, relo->insn_off, err);  		return -EINVAL;  	} @@ -3105,10 +3472,10 @@ static int bpf_core_reloc_offset(struct bpf_program *prog,  }  static int -bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path) +bpf_core_reloc_fields(struct bpf_object *obj, const char *targ_btf_path)  {  	const struct btf_ext_info_sec *sec; -	const struct bpf_offset_reloc *rec; +	const struct bpf_field_reloc *rec;  	const struct btf_ext_info *seg;  	struct hashmap_entry *entry;  	struct hashmap *cand_cache = NULL; @@ -3122,8 +3489,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)  	else  		targ_btf = bpf_core_find_kernel_btf();  	if (IS_ERR(targ_btf)) { -		pr_warning("failed to get target BTF: %ld\n", -			   PTR_ERR(targ_btf)); +		pr_warn("failed to get target BTF: %ld\n", PTR_ERR(targ_btf));  		return PTR_ERR(targ_btf);  	} @@ -3133,7 +3499,7 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)  		goto out;  	} -	seg = &obj->btf_ext->offset_reloc_info; +	seg = &obj->btf_ext->field_reloc_info;  	for_each_btf_ext_sec(seg, sec) {  		sec_name = btf__name_by_offset(obj->btf, sec->sec_name_off);  		if (str_is_empty(sec_name)) { @@ -3142,8 +3508,8 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)  		}  		prog = bpf_object__find_program_by_title(obj, sec_name);  		if (!prog) { -			pr_warning("failed to find program '%s' for CO-RE offset relocation\n", -				   sec_name); +			pr_warn("failed to find program '%s' for CO-RE offset relocation\n", +				sec_name);  			err = -EINVAL;  			goto out;  		} @@ -3152,11 +3518,11 @@ bpf_core_reloc_offsets(struct bpf_object *obj, const char *targ_btf_path)  			 sec_name, sec->num_info);  		for_each_btf_ext_rec(seg, sec, i, rec) { -			err = bpf_core_reloc_offset(prog, rec, i, obj->btf, -						    targ_btf, cand_cache); +			err = bpf_core_reloc_field(prog, rec, i, obj->btf, +						   targ_btf, cand_cache);  			if (err) { -				pr_warning("prog '%s': relo #%d: failed to relocate: %d\n", -					   sec_name, i, err); +				pr_warn("prog '%s': relo #%d: failed to relocate: %d\n", +					sec_name, i, err);  				goto out;  			}  		} @@ -3178,8 +3544,8 @@ bpf_object__relocate_core(struct bpf_object *obj, const char *targ_btf_path)  {  	int err = 0; -	if (obj->btf_ext->offset_reloc_info.len) -		err = bpf_core_reloc_offsets(obj, targ_btf_path); +	if (obj->btf_ext->field_reloc_info.len) +		err = bpf_core_reloc_fields(obj, targ_btf_path);  	return err;  } @@ -3197,23 +3563,24 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,  		return -LIBBPF_ERRNO__RELOC;  	if (prog->idx == obj->efile.text_shndx) { -		pr_warning("relo in .text insn %d into off %d\n", -			   relo->insn_idx, relo->text_off); +		pr_warn("relo in .text insn %d into off %d (insn #%d)\n", +			relo->insn_idx, relo->sym_off, relo->sym_off / 8);  		return -LIBBPF_ERRNO__RELOC;  	}  	if (prog->main_prog_cnt == 0) {  		text = bpf_object__find_prog_by_idx(obj, obj->efile.text_shndx);  		if (!text) { -			pr_warning("no .text section found yet relo into text exist\n"); +			pr_warn("no .text section found yet relo into text exist\n");  			return -LIBBPF_ERRNO__RELOC;  		}  		new_cnt = prog->insns_cnt + text->insns_cnt;  		new_insn = reallocarray(prog->insns, new_cnt, sizeof(*insn));  		if (!new_insn) { -			pr_warning("oom in prog realloc\n"); +			pr_warn("oom in prog realloc\n");  			return -ENOMEM;  		} +		prog->insns = new_insn;  		if (obj->btf_ext) {  			err = bpf_program_reloc_btf_ext(prog, obj, @@ -3225,7 +3592,6 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,  		memcpy(new_insn + prog->insns_cnt, text->insns,  		       text->insns_cnt * sizeof(*insn)); -		prog->insns = new_insn;  		prog->main_prog_cnt = prog->insns_cnt;  		prog->insns_cnt = new_cnt;  		pr_debug("added %zd insn from %s to prog %s\n", @@ -3233,7 +3599,7 @@ bpf_program__reloc_text(struct bpf_program *prog, struct bpf_object *obj,  			 prog->section_name);  	}  	insn = &prog->insns[relo->insn_idx]; -	insn->imm += prog->main_prog_cnt - relo->insn_idx; +	insn->imm += relo->sym_off / 8 + prog->main_prog_cnt - relo->insn_idx;  	return 0;  } @@ -3256,31 +3622,26 @@ bpf_program__relocate(struct bpf_program *prog, struct bpf_object *obj)  		return 0;  	for (i = 0; i < prog->nr_reloc; i++) { -		if (prog->reloc_desc[i].type == RELO_LD64 || -		    prog->reloc_desc[i].type == RELO_DATA) { -			bool relo_data = prog->reloc_desc[i].type == RELO_DATA; -			struct bpf_insn *insns = prog->insns; -			int insn_idx, map_idx; - -			insn_idx = prog->reloc_desc[i].insn_idx; -			map_idx = prog->reloc_desc[i].map_idx; - -			if (insn_idx + 1 >= (int)prog->insns_cnt) { -				pr_warning("relocation out of range: '%s'\n", -					   prog->section_name); +		struct reloc_desc *relo = &prog->reloc_desc[i]; + +		if (relo->type == RELO_LD64 || relo->type == RELO_DATA) { +			struct bpf_insn *insn = &prog->insns[relo->insn_idx]; + +			if (relo->insn_idx + 1 >= (int)prog->insns_cnt) { +				pr_warn("relocation out of range: '%s'\n", +					prog->section_name);  				return -LIBBPF_ERRNO__RELOC;  			} -			if (!relo_data) { -				insns[insn_idx].src_reg = BPF_PSEUDO_MAP_FD; +			if (relo->type != RELO_DATA) { +				insn[0].src_reg = BPF_PSEUDO_MAP_FD;  			} else { -				insns[insn_idx].src_reg = BPF_PSEUDO_MAP_VALUE; -				insns[insn_idx + 1].imm = insns[insn_idx].imm; +				insn[0].src_reg = BPF_PSEUDO_MAP_VALUE; +				insn[1].imm = insn[0].imm + relo->sym_off;  			} -			insns[insn_idx].imm = obj->maps[map_idx].fd; -		} else if (prog->reloc_desc[i].type == RELO_CALL) { -			err = bpf_program__reloc_text(prog, obj, -						      &prog->reloc_desc[i]); +			insn[0].imm = obj->maps[relo->map_idx].fd; +		} else if (relo->type == RELO_CALL) { +			err = bpf_program__reloc_text(prog, obj, relo);  			if (err)  				return err;  		} @@ -3301,8 +3662,8 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  	if (obj->btf_ext) {  		err = bpf_object__relocate_core(obj, targ_btf_path);  		if (err) { -			pr_warning("failed to perform CO-RE relocations: %d\n", -				   err); +			pr_warn("failed to perform CO-RE relocations: %d\n", +				err);  			return err;  		}  	} @@ -3311,8 +3672,7 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)  		err = bpf_program__relocate(prog, obj);  		if (err) { -			pr_warning("failed to relocate '%s'\n", -				   prog->section_name); +			pr_warn("failed to relocate '%s'\n", prog->section_name);  			return err;  		}  	} @@ -3324,24 +3684,24 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)  	int i, err;  	if (!obj_elf_valid(obj)) { -		pr_warning("Internal error: elf object is closed\n"); +		pr_warn("Internal error: elf object is closed\n");  		return -LIBBPF_ERRNO__INTERNAL;  	} -	for (i = 0; i < obj->efile.nr_reloc; i++) { -		GElf_Shdr *shdr = &obj->efile.reloc[i].shdr; -		Elf_Data *data = obj->efile.reloc[i].data; +	for (i = 0; i < obj->efile.nr_reloc_sects; i++) { +		GElf_Shdr *shdr = &obj->efile.reloc_sects[i].shdr; +		Elf_Data *data = obj->efile.reloc_sects[i].data;  		int idx = shdr->sh_info;  		struct bpf_program *prog;  		if (shdr->sh_type != SHT_REL) { -			pr_warning("internal error at %d\n", __LINE__); +			pr_warn("internal error at %d\n", __LINE__);  			return -LIBBPF_ERRNO__INTERNAL;  		}  		prog = bpf_object__find_prog_by_idx(obj, idx);  		if (!prog) { -			pr_warning("relocation failed: no section(%d)\n", idx); +			pr_warn("relocation failed: no section(%d)\n", idx);  			return -LIBBPF_ERRNO__RELOC;  		} @@ -3373,8 +3733,13 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  	load_attr.insns = insns;  	load_attr.insns_cnt = insns_cnt;  	load_attr.license = license; -	load_attr.kern_version = kern_version; -	load_attr.prog_ifindex = prog->prog_ifindex; +	if (prog->type == BPF_PROG_TYPE_TRACING) { +		load_attr.attach_prog_fd = prog->attach_prog_fd; +		load_attr.attach_btf_id = prog->attach_btf_id; +	} else { +		load_attr.kern_version = kern_version; +		load_attr.prog_ifindex = prog->prog_ifindex; +	}  	/* if .BTF.ext was loaded, kernel supports associated BTF for prog */  	if (prog->obj->btf_ext)  		btf_fd = bpf_object__btf_fd(prog->obj); @@ -3393,7 +3758,7 @@ load_program(struct bpf_program *prog, struct bpf_insn *insns, int insns_cnt,  retry_load:  	log_buf = malloc(log_buf_size);  	if (!log_buf) -		pr_warning("Alloc log buffer for bpf loader error, continue without log\n"); +		pr_warn("Alloc log buffer for bpf loader error, continue without log\n");  	ret = bpf_load_program_xattr(&load_attr, log_buf, log_buf_size); @@ -3410,36 +3775,31 @@ retry_load:  		free(log_buf);  		goto retry_load;  	} -	ret = -LIBBPF_ERRNO__LOAD; +	ret = -errno;  	cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -	pr_warning("load bpf program failed: %s\n", cp); +	pr_warn("load bpf program failed: %s\n", cp);  	if (log_buf && log_buf[0] != '\0') {  		ret = -LIBBPF_ERRNO__VERIFY; -		pr_warning("-- BEGIN DUMP LOG ---\n"); -		pr_warning("\n%s\n", log_buf); -		pr_warning("-- END LOG --\n"); +		pr_warn("-- BEGIN DUMP LOG ---\n"); +		pr_warn("\n%s\n", log_buf); +		pr_warn("-- END LOG --\n");  	} else if (load_attr.insns_cnt >= BPF_MAXINSNS) { -		pr_warning("Program too large (%zu insns), at most %d insns\n", -			   load_attr.insns_cnt, BPF_MAXINSNS); +		pr_warn("Program too large (%zu insns), at most %d insns\n", +			load_attr.insns_cnt, BPF_MAXINSNS);  		ret = -LIBBPF_ERRNO__PROG2BIG; -	} else { +	} else if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) {  		/* Wrong program type? */ -		if (load_attr.prog_type != BPF_PROG_TYPE_KPROBE) { -			int fd; - -			load_attr.prog_type = BPF_PROG_TYPE_KPROBE; -			load_attr.expected_attach_type = 0; -			fd = bpf_load_program_xattr(&load_attr, NULL, 0); -			if (fd >= 0) { -				close(fd); -				ret = -LIBBPF_ERRNO__PROGTYPE; -				goto out; -			} -		} +		int fd; -		if (log_buf) -			ret = -LIBBPF_ERRNO__KVER; +		load_attr.prog_type = BPF_PROG_TYPE_KPROBE; +		load_attr.expected_attach_type = 0; +		fd = bpf_load_program_xattr(&load_attr, NULL, 0); +		if (fd >= 0) { +			close(fd); +			ret = -LIBBPF_ERRNO__PROGTYPE; +			goto out; +		}  	}  out: @@ -3455,14 +3815,14 @@ bpf_program__load(struct bpf_program *prog,  	if (prog->instances.nr < 0 || !prog->instances.fds) {  		if (prog->preprocessor) { -			pr_warning("Internal error: can't load program '%s'\n", -				   prog->section_name); +			pr_warn("Internal error: can't load program '%s'\n", +				prog->section_name);  			return -LIBBPF_ERRNO__INTERNAL;  		}  		prog->instances.fds = malloc(sizeof(int));  		if (!prog->instances.fds) { -			pr_warning("Not enough memory for BPF fds\n"); +			pr_warn("Not enough memory for BPF fds\n");  			return -ENOMEM;  		}  		prog->instances.nr = 1; @@ -3471,8 +3831,8 @@ bpf_program__load(struct bpf_program *prog,  	if (!prog->preprocessor) {  		if (prog->instances.nr != 1) { -			pr_warning("Program '%s' is inconsistent: nr(%d) != 1\n", -				   prog->section_name, prog->instances.nr); +			pr_warn("Program '%s' is inconsistent: nr(%d) != 1\n", +				prog->section_name, prog->instances.nr);  		}  		err = load_program(prog, prog->insns, prog->insns_cnt,  				   license, kern_version, &fd); @@ -3489,8 +3849,8 @@ bpf_program__load(struct bpf_program *prog,  		err = preprocessor(prog, i, prog->insns,  				   prog->insns_cnt, &result);  		if (err) { -			pr_warning("Preprocessing the %dth instance of program '%s' failed\n", -				   i, prog->section_name); +			pr_warn("Preprocessing the %dth instance of program '%s' failed\n", +				i, prog->section_name);  			goto out;  		} @@ -3508,8 +3868,8 @@ bpf_program__load(struct bpf_program *prog,  				   license, kern_version, &fd);  		if (err) { -			pr_warning("Loading the %dth instance of program '%s' failed\n", -					i, prog->section_name); +			pr_warn("Loading the %dth instance of program '%s' failed\n", +				i, prog->section_name);  			goto out;  		} @@ -3519,8 +3879,7 @@ bpf_program__load(struct bpf_program *prog,  	}  out:  	if (err) -		pr_warning("failed to load program '%s'\n", -			   prog->section_name); +		pr_warn("failed to load program '%s'\n", prog->section_name);  	zfree(&prog->insns);  	prog->insns_cnt = 0;  	return err; @@ -3551,93 +3910,104 @@ bpf_object__load_progs(struct bpf_object *obj, int log_level)  	return 0;  } -static bool bpf_prog_type__needs_kver(enum bpf_prog_type type) -{ -	switch (type) { -	case BPF_PROG_TYPE_SOCKET_FILTER: -	case BPF_PROG_TYPE_SCHED_CLS: -	case BPF_PROG_TYPE_SCHED_ACT: -	case BPF_PROG_TYPE_XDP: -	case BPF_PROG_TYPE_CGROUP_SKB: -	case BPF_PROG_TYPE_CGROUP_SOCK: -	case BPF_PROG_TYPE_LWT_IN: -	case BPF_PROG_TYPE_LWT_OUT: -	case BPF_PROG_TYPE_LWT_XMIT: -	case BPF_PROG_TYPE_LWT_SEG6LOCAL: -	case BPF_PROG_TYPE_SOCK_OPS: -	case BPF_PROG_TYPE_SK_SKB: -	case BPF_PROG_TYPE_CGROUP_DEVICE: -	case BPF_PROG_TYPE_SK_MSG: -	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: -	case BPF_PROG_TYPE_LIRC_MODE2: -	case BPF_PROG_TYPE_SK_REUSEPORT: -	case BPF_PROG_TYPE_FLOW_DISSECTOR: -	case BPF_PROG_TYPE_UNSPEC: -	case BPF_PROG_TYPE_TRACEPOINT: -	case BPF_PROG_TYPE_RAW_TRACEPOINT: -	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE: -	case BPF_PROG_TYPE_PERF_EVENT: -	case BPF_PROG_TYPE_CGROUP_SYSCTL: -	case BPF_PROG_TYPE_CGROUP_SOCKOPT: -		return false; -	case BPF_PROG_TYPE_KPROBE: -	default: -		return true; -	} -} - -static int bpf_object__validate(struct bpf_object *obj, bool needs_kver) -{ -	if (needs_kver && obj->kern_version == 0) { -		pr_warning("%s doesn't provide kernel version\n", -			   obj->path); -		return -LIBBPF_ERRNO__KVERSION; -	} -	return 0; -} - +static int libbpf_find_attach_btf_id(const char *name, +				     enum bpf_attach_type attach_type, +				     __u32 attach_prog_fd);  static struct bpf_object * -__bpf_object__open(const char *path, void *obj_buf, size_t obj_buf_sz, -		   bool needs_kver, int flags) +__bpf_object__open(const char *path, const void *obj_buf, size_t obj_buf_sz, +		   struct bpf_object_open_opts *opts)  { +	const char *pin_root_path; +	struct bpf_program *prog;  	struct bpf_object *obj; +	const char *obj_name; +	char tmp_name[64]; +	bool relaxed_maps; +	__u32 attach_prog_fd;  	int err;  	if (elf_version(EV_CURRENT) == EV_NONE) { -		pr_warning("failed to init libelf for %s\n", path); +		pr_warn("failed to init libelf for %s\n", +			path ? : "(mem buf)");  		return ERR_PTR(-LIBBPF_ERRNO__LIBELF);  	} -	obj = bpf_object__new(path, obj_buf, obj_buf_sz); +	if (!OPTS_VALID(opts, bpf_object_open_opts)) +		return ERR_PTR(-EINVAL); + +	obj_name = OPTS_GET(opts, object_name, NULL); +	if (obj_buf) { +		if (!obj_name) { +			snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", +				 (unsigned long)obj_buf, +				 (unsigned long)obj_buf_sz); +			obj_name = tmp_name; +		} +		path = obj_name; +		pr_debug("loading object '%s' from buffer\n", obj_name); +	} + +	obj = bpf_object__new(path, obj_buf, obj_buf_sz, obj_name);  	if (IS_ERR(obj))  		return obj; +	obj->relaxed_core_relocs = OPTS_GET(opts, relaxed_core_relocs, false); +	relaxed_maps = OPTS_GET(opts, relaxed_maps, false); +	pin_root_path = OPTS_GET(opts, pin_root_path, NULL); +	attach_prog_fd = OPTS_GET(opts, attach_prog_fd, 0); +  	CHECK_ERR(bpf_object__elf_init(obj), err, out);  	CHECK_ERR(bpf_object__check_endianness(obj), err, out);  	CHECK_ERR(bpf_object__probe_caps(obj), err, out); -	CHECK_ERR(bpf_object__elf_collect(obj, flags), err, out); +	CHECK_ERR(bpf_object__elf_collect(obj, relaxed_maps, pin_root_path), +		  err, out);  	CHECK_ERR(bpf_object__collect_reloc(obj), err, out); -	CHECK_ERR(bpf_object__validate(obj, needs_kver), err, out); -  	bpf_object__elf_finish(obj); + +	bpf_object__for_each_program(prog, obj) { +		enum bpf_prog_type prog_type; +		enum bpf_attach_type attach_type; + +		err = libbpf_prog_type_by_name(prog->section_name, &prog_type, +					       &attach_type); +		if (err == -ESRCH) +			/* couldn't guess, but user might manually specify */ +			continue; +		if (err) +			goto out; + +		bpf_program__set_type(prog, prog_type); +		bpf_program__set_expected_attach_type(prog, attach_type); +		if (prog_type == BPF_PROG_TYPE_TRACING) { +			err = libbpf_find_attach_btf_id(prog->section_name, +							attach_type, +							attach_prog_fd); +			if (err <= 0) +				goto out; +			prog->attach_btf_id = err; +			prog->attach_prog_fd = attach_prog_fd; +		} +	} +  	return obj;  out:  	bpf_object__close(obj);  	return ERR_PTR(err);  } -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, -					    int flags) +static struct bpf_object * +__bpf_object__open_xattr(struct bpf_object_open_attr *attr, int flags)  { +	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, +		.relaxed_maps = flags & MAPS_RELAX_COMPAT, +	); +  	/* param validation */  	if (!attr->file)  		return NULL;  	pr_debug("loading %s\n", attr->file); - -	return __bpf_object__open(attr->file, NULL, 0, -				  bpf_prog_type__needs_kver(attr->prog_type), -				  flags); +	return __bpf_object__open(attr->file, NULL, 0, &opts);  }  struct bpf_object *bpf_object__open_xattr(struct bpf_object_open_attr *attr) @@ -3655,25 +4025,42 @@ struct bpf_object *bpf_object__open(const char *path)  	return bpf_object__open_xattr(&attr);  } -struct bpf_object *bpf_object__open_buffer(void *obj_buf, -					   size_t obj_buf_sz, -					   const char *name) +struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts)  { -	char tmp_name[64]; +	if (!path) +		return ERR_PTR(-EINVAL); -	/* param validation */ -	if (!obj_buf || obj_buf_sz <= 0) -		return NULL; +	pr_debug("loading %s\n", path); -	if (!name) { -		snprintf(tmp_name, sizeof(tmp_name), "%lx-%lx", -			 (unsigned long)obj_buf, -			 (unsigned long)obj_buf_sz); -		name = tmp_name; -	} -	pr_debug("loading object '%s' from buffer\n", name); +	return __bpf_object__open(path, NULL, 0, opts); +} + +struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, +		     struct bpf_object_open_opts *opts) +{ +	if (!obj_buf || obj_buf_sz == 0) +		return ERR_PTR(-EINVAL); + +	return __bpf_object__open(NULL, obj_buf, obj_buf_sz, opts); +} + +struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, +			const char *name) +{ +	DECLARE_LIBBPF_OPTS(bpf_object_open_opts, opts, +		.object_name = name, +		/* wrong default, but backwards-compatible */ +		.relaxed_maps = true, +	); -	return __bpf_object__open(name, obj_buf, obj_buf_sz, true, true); +	/* returning NULL is wrong, but backwards-compatible */ +	if (!obj_buf || obj_buf_sz == 0) +		return NULL; + +	return bpf_object__open_mem(obj_buf, obj_buf_sz, &opts);  }  int bpf_object__unload(struct bpf_object *obj) @@ -3695,7 +4082,7 @@ int bpf_object__unload(struct bpf_object *obj)  int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  {  	struct bpf_object *obj; -	int err; +	int err, i;  	if (!attr)  		return -EINVAL; @@ -3704,7 +4091,7 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  		return -EINVAL;  	if (obj->loaded) { -		pr_warning("object should not be loaded twice\n"); +		pr_warn("object should not be loaded twice\n");  		return -EINVAL;  	} @@ -3716,8 +4103,13 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)  	return 0;  out: +	/* unpin any maps that were auto-pinned during load */ +	for (i = 0; i < obj->nr_maps; i++) +		if (obj->maps[i].pinned && !obj->maps[i].reused) +			bpf_map__unpin(&obj->maps[i], NULL); +  	bpf_object__unload(obj); -	pr_warning("failed to load object '%s'\n", obj->path); +	pr_warn("failed to load object '%s'\n", obj->path);  	return err;  } @@ -3730,6 +4122,28 @@ int bpf_object__load(struct bpf_object *obj)  	return bpf_object__load_xattr(&attr);  } +static int make_parent_dir(const char *path) +{ +	char *cp, errmsg[STRERR_BUFSIZE]; +	char *dname, *dir; +	int err = 0; + +	dname = strdup(path); +	if (dname == NULL) +		return -ENOMEM; + +	dir = dirname(dname); +	if (mkdir(dir, 0700) && errno != EEXIST) +		err = -errno; + +	free(dname); +	if (err) { +		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); +		pr_warn("failed to mkdir %s: %s\n", path, cp); +	} +	return err; +} +  static int check_path(const char *path)  {  	char *cp, errmsg[STRERR_BUFSIZE]; @@ -3747,13 +4161,13 @@ static int check_path(const char *path)  	dir = dirname(dname);  	if (statfs(dir, &st_fs)) {  		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -		pr_warning("failed to statfs %s: %s\n", dir, cp); +		pr_warn("failed to statfs %s: %s\n", dir, cp);  		err = -errno;  	}  	free(dname);  	if (!err && st_fs.f_type != BPF_FS_MAGIC) { -		pr_warning("specified path %s is not on BPF FS\n", path); +		pr_warn("specified path %s is not on BPF FS\n", path);  		err = -EINVAL;  	} @@ -3766,24 +4180,28 @@ int bpf_program__pin_instance(struct bpf_program *prog, const char *path,  	char *cp, errmsg[STRERR_BUFSIZE];  	int err; +	err = make_parent_dir(path); +	if (err) +		return err; +  	err = check_path(path);  	if (err)  		return err;  	if (prog == NULL) { -		pr_warning("invalid program pointer\n"); +		pr_warn("invalid program pointer\n");  		return -EINVAL;  	}  	if (instance < 0 || instance >= prog->instances.nr) { -		pr_warning("invalid prog instance %d of prog %s (max %d)\n", -			   instance, prog->section_name, prog->instances.nr); +		pr_warn("invalid prog instance %d of prog %s (max %d)\n", +			instance, prog->section_name, prog->instances.nr);  		return -EINVAL;  	}  	if (bpf_obj_pin(prog->instances.fds[instance], path)) {  		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -		pr_warning("failed to pin program: %s\n", cp); +		pr_warn("failed to pin program: %s\n", cp);  		return -errno;  	}  	pr_debug("pinned program '%s'\n", path); @@ -3801,13 +4219,13 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,  		return err;  	if (prog == NULL) { -		pr_warning("invalid program pointer\n"); +		pr_warn("invalid program pointer\n");  		return -EINVAL;  	}  	if (instance < 0 || instance >= prog->instances.nr) { -		pr_warning("invalid prog instance %d of prog %s (max %d)\n", -			   instance, prog->section_name, prog->instances.nr); +		pr_warn("invalid prog instance %d of prog %s (max %d)\n", +			instance, prog->section_name, prog->instances.nr);  		return -EINVAL;  	} @@ -3819,36 +4237,25 @@ int bpf_program__unpin_instance(struct bpf_program *prog, const char *path,  	return 0;  } -static int make_dir(const char *path) -{ -	char *cp, errmsg[STRERR_BUFSIZE]; -	int err = 0; - -	if (mkdir(path, 0700) && errno != EEXIST) -		err = -errno; - -	if (err) { -		cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); -		pr_warning("failed to mkdir %s: %s\n", path, cp); -	} -	return err; -} -  int bpf_program__pin(struct bpf_program *prog, const char *path)  {  	int i, err; +	err = make_parent_dir(path); +	if (err) +		return err; +  	err = check_path(path);  	if (err)  		return err;  	if (prog == NULL) { -		pr_warning("invalid program pointer\n"); +		pr_warn("invalid program pointer\n");  		return -EINVAL;  	}  	if (prog->instances.nr <= 0) { -		pr_warning("no instances of prog %s to pin\n", +		pr_warn("no instances of prog %s to pin\n",  			   prog->section_name);  		return -EINVAL;  	} @@ -3858,10 +4265,6 @@ int bpf_program__pin(struct bpf_program *prog, const char *path)  		return bpf_program__pin_instance(prog, path, 0);  	} -	err = make_dir(path); -	if (err) -		return err; -  	for (i = 0; i < prog->instances.nr; i++) {  		char buf[PATH_MAX];  		int len; @@ -3910,12 +4313,12 @@ int bpf_program__unpin(struct bpf_program *prog, const char *path)  		return err;  	if (prog == NULL) { -		pr_warning("invalid program pointer\n"); +		pr_warn("invalid program pointer\n");  		return -EINVAL;  	}  	if (prog->instances.nr <= 0) { -		pr_warning("no instances of prog %s to pin\n", +		pr_warn("no instances of prog %s to pin\n",  			   prog->section_name);  		return -EINVAL;  	} @@ -3952,47 +4355,123 @@ int bpf_map__pin(struct bpf_map *map, const char *path)  	char *cp, errmsg[STRERR_BUFSIZE];  	int err; -	err = check_path(path); -	if (err) -		return err; -  	if (map == NULL) { -		pr_warning("invalid map pointer\n"); +		pr_warn("invalid map pointer\n");  		return -EINVAL;  	} -	if (bpf_obj_pin(map->fd, path)) { -		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg)); -		pr_warning("failed to pin map: %s\n", cp); -		return -errno; +	if (map->pin_path) { +		if (path && strcmp(path, map->pin_path)) { +			pr_warn("map '%s' already has pin path '%s' different from '%s'\n", +				bpf_map__name(map), map->pin_path, path); +			return -EINVAL; +		} else if (map->pinned) { +			pr_debug("map '%s' already pinned at '%s'; not re-pinning\n", +				 bpf_map__name(map), map->pin_path); +			return 0; +		} +	} else { +		if (!path) { +			pr_warn("missing a path to pin map '%s' at\n", +				bpf_map__name(map)); +			return -EINVAL; +		} else if (map->pinned) { +			pr_warn("map '%s' already pinned\n", bpf_map__name(map)); +			return -EEXIST; +		} + +		map->pin_path = strdup(path); +		if (!map->pin_path) { +			err = -errno; +			goto out_err; +		}  	} -	pr_debug("pinned map '%s'\n", path); +	err = make_parent_dir(map->pin_path); +	if (err) +		return err; + +	err = check_path(map->pin_path); +	if (err) +		return err; + +	if (bpf_obj_pin(map->fd, map->pin_path)) { +		err = -errno; +		goto out_err; +	} + +	map->pinned = true; +	pr_debug("pinned map '%s'\n", map->pin_path);  	return 0; + +out_err: +	cp = libbpf_strerror_r(-err, errmsg, sizeof(errmsg)); +	pr_warn("failed to pin map: %s\n", cp); +	return err;  }  int bpf_map__unpin(struct bpf_map *map, const char *path)  {  	int err; -	err = check_path(path); -	if (err) -		return err; -  	if (map == NULL) { -		pr_warning("invalid map pointer\n"); +		pr_warn("invalid map pointer\n"); +		return -EINVAL; +	} + +	if (map->pin_path) { +		if (path && strcmp(path, map->pin_path)) { +			pr_warn("map '%s' already has pin path '%s' different from '%s'\n", +				bpf_map__name(map), map->pin_path, path); +			return -EINVAL; +		} +		path = map->pin_path; +	} else if (!path) { +		pr_warn("no path to unpin map '%s' from\n", +			bpf_map__name(map));  		return -EINVAL;  	} +	err = check_path(path); +	if (err) +		return err; +  	err = unlink(path);  	if (err != 0)  		return -errno; -	pr_debug("unpinned map '%s'\n", path); +	map->pinned = false; +	pr_debug("unpinned map '%s' from '%s'\n", bpf_map__name(map), path); + +	return 0; +} + +int bpf_map__set_pin_path(struct bpf_map *map, const char *path) +{ +	char *new = NULL; + +	if (path) { +		new = strdup(path); +		if (!new) +			return -errno; +	} + +	free(map->pin_path); +	map->pin_path = new;  	return 0;  } +const char *bpf_map__get_pin_path(const struct bpf_map *map) +{ +	return map->pin_path; +} + +bool bpf_map__is_pinned(const struct bpf_map *map) +{ +	return map->pinned; +} +  int bpf_object__pin_maps(struct bpf_object *obj, const char *path)  {  	struct bpf_map *map; @@ -4002,29 +4481,32 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)  		return -ENOENT;  	if (!obj->loaded) { -		pr_warning("object not yet loaded; load it first\n"); +		pr_warn("object not yet loaded; load it first\n");  		return -ENOENT;  	} -	err = make_dir(path); -	if (err) -		return err; -  	bpf_object__for_each_map(map, obj) { +		char *pin_path = NULL;  		char buf[PATH_MAX]; -		int len; -		len = snprintf(buf, PATH_MAX, "%s/%s", path, -			       bpf_map__name(map)); -		if (len < 0) { -			err = -EINVAL; -			goto err_unpin_maps; -		} else if (len >= PATH_MAX) { -			err = -ENAMETOOLONG; -			goto err_unpin_maps; +		if (path) { +			int len; + +			len = snprintf(buf, PATH_MAX, "%s/%s", path, +				       bpf_map__name(map)); +			if (len < 0) { +				err = -EINVAL; +				goto err_unpin_maps; +			} else if (len >= PATH_MAX) { +				err = -ENAMETOOLONG; +				goto err_unpin_maps; +			} +			pin_path = buf; +		} else if (!map->pin_path) { +			continue;  		} -		err = bpf_map__pin(map, buf); +		err = bpf_map__pin(map, pin_path);  		if (err)  			goto err_unpin_maps;  	} @@ -4033,17 +4515,10 @@ int bpf_object__pin_maps(struct bpf_object *obj, const char *path)  err_unpin_maps:  	while ((map = bpf_map__prev(map, obj))) { -		char buf[PATH_MAX]; -		int len; - -		len = snprintf(buf, PATH_MAX, "%s/%s", path, -			       bpf_map__name(map)); -		if (len < 0) -			continue; -		else if (len >= PATH_MAX) +		if (!map->pin_path)  			continue; -		bpf_map__unpin(map, buf); +		bpf_map__unpin(map, NULL);  	}  	return err; @@ -4058,17 +4533,24 @@ int bpf_object__unpin_maps(struct bpf_object *obj, const char *path)  		return -ENOENT;  	bpf_object__for_each_map(map, obj) { +		char *pin_path = NULL;  		char buf[PATH_MAX]; -		int len; -		len = snprintf(buf, PATH_MAX, "%s/%s", path, -			       bpf_map__name(map)); -		if (len < 0) -			return -EINVAL; -		else if (len >= PATH_MAX) -			return -ENAMETOOLONG; +		if (path) { +			int len; + +			len = snprintf(buf, PATH_MAX, "%s/%s", path, +				       bpf_map__name(map)); +			if (len < 0) +				return -EINVAL; +			else if (len >= PATH_MAX) +				return -ENAMETOOLONG; +			pin_path = buf; +		} else if (!map->pin_path) { +			continue; +		} -		err = bpf_map__unpin(map, buf); +		err = bpf_map__unpin(map, pin_path);  		if (err)  			return err;  	} @@ -4085,14 +4567,10 @@ int bpf_object__pin_programs(struct bpf_object *obj, const char *path)  		return -ENOENT;  	if (!obj->loaded) { -		pr_warning("object not yet loaded; load it first\n"); +		pr_warn("object not yet loaded; load it first\n");  		return -ENOENT;  	} -	err = make_dir(path); -	if (err) -		return err; -  	bpf_object__for_each_program(prog, obj) {  		char buf[PATH_MAX];  		int len; @@ -4193,6 +4671,7 @@ void bpf_object__close(struct bpf_object *obj)  	for (i = 0; i < obj->nr_maps; i++) {  		zfree(&obj->maps[i].name); +		zfree(&obj->maps[i].pin_path);  		if (obj->maps[i].clear_priv)  			obj->maps[i].clear_priv(&obj->maps[i],  						obj->maps[i].priv); @@ -4236,7 +4715,7 @@ bpf_object__next(struct bpf_object *prev)  const char *bpf_object__name(const struct bpf_object *obj)  { -	return obj ? obj->path : ERR_PTR(-EINVAL); +	return obj ? obj->name : ERR_PTR(-EINVAL);  }  unsigned int bpf_object__kversion(const struct bpf_object *obj) @@ -4286,7 +4765,7 @@ __bpf_program__iter(const struct bpf_program *p, const struct bpf_object *obj,  			&obj->programs[nr_programs - 1];  	if (p->obj != obj) { -		pr_warning("error: program handler doesn't match object\n"); +		pr_warn("error: program handler doesn't match object\n");  		return NULL;  	} @@ -4349,7 +4828,7 @@ const char *bpf_program__title(const struct bpf_program *prog, bool needs_copy)  	if (needs_copy) {  		title = strdup(title);  		if (!title) { -			pr_warning("failed to strdup program title\n"); +			pr_warn("failed to strdup program title\n");  			return ERR_PTR(-ENOMEM);  		}  	} @@ -4362,6 +4841,11 @@ int bpf_program__fd(const struct bpf_program *prog)  	return bpf_program__nth_fd(prog, 0);  } +size_t bpf_program__size(const struct bpf_program *prog) +{ +	return prog->insns_cnt * sizeof(struct bpf_insn); +} +  int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,  			  bpf_program_prep_t prep)  { @@ -4371,13 +4855,13 @@ int bpf_program__set_prep(struct bpf_program *prog, int nr_instances,  		return -EINVAL;  	if (prog->instances.nr > 0 || prog->instances.fds) { -		pr_warning("Can't set pre-processor after loading\n"); +		pr_warn("Can't set pre-processor after loading\n");  		return -EINVAL;  	}  	instances_fds = malloc(sizeof(int) * nr_instances);  	if (!instances_fds) { -		pr_warning("alloc memory failed for fds\n"); +		pr_warn("alloc memory failed for fds\n");  		return -ENOMEM;  	} @@ -4398,21 +4882,26 @@ int bpf_program__nth_fd(const struct bpf_program *prog, int n)  		return -EINVAL;  	if (n >= prog->instances.nr || n < 0) { -		pr_warning("Can't get the %dth fd from program %s: only %d instances\n", -			   n, prog->section_name, prog->instances.nr); +		pr_warn("Can't get the %dth fd from program %s: only %d instances\n", +			n, prog->section_name, prog->instances.nr);  		return -EINVAL;  	}  	fd = prog->instances.fds[n];  	if (fd < 0) { -		pr_warning("%dth instance of program '%s' is invalid\n", -			   n, prog->section_name); +		pr_warn("%dth instance of program '%s' is invalid\n", +			n, prog->section_name);  		return -ENOENT;  	}  	return fd;  } +enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog) +{ +	return prog->type; +} +  void bpf_program__set_type(struct bpf_program *prog, enum bpf_prog_type type)  {  	prog->type = type; @@ -4446,6 +4935,13 @@ BPF_PROG_TYPE_FNS(tracepoint, BPF_PROG_TYPE_TRACEPOINT);  BPF_PROG_TYPE_FNS(raw_tracepoint, BPF_PROG_TYPE_RAW_TRACEPOINT);  BPF_PROG_TYPE_FNS(xdp, BPF_PROG_TYPE_XDP);  BPF_PROG_TYPE_FNS(perf_event, BPF_PROG_TYPE_PERF_EVENT); +BPF_PROG_TYPE_FNS(tracing, BPF_PROG_TYPE_TRACING); + +enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog) +{ +	return prog->expected_attach_type; +}  void bpf_program__set_expected_attach_type(struct bpf_program *prog,  					   enum bpf_attach_type type) @@ -4453,19 +4949,23 @@ void bpf_program__set_expected_attach_type(struct bpf_program *prog,  	prog->expected_attach_type = type;  } -#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, atype) \ -	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, atype } +#define BPF_PROG_SEC_IMPL(string, ptype, eatype, is_attachable, btf, atype) \ +	{ string, sizeof(string) - 1, ptype, eatype, is_attachable, btf, atype }  /* Programs that can NOT be attached. */ -#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0) +#define BPF_PROG_SEC(string, ptype) BPF_PROG_SEC_IMPL(string, ptype, 0, 0, 0, 0)  /* Programs that can be attached. */  #define BPF_APROG_SEC(string, ptype, atype) \ -	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, atype) +	BPF_PROG_SEC_IMPL(string, ptype, 0, 1, 0, atype)  /* Programs that must specify expected attach type at load time. */  #define BPF_EAPROG_SEC(string, ptype, eatype) \ -	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, eatype) +	BPF_PROG_SEC_IMPL(string, ptype, eatype, 1, 0, eatype) + +/* Programs that use BTF to identify attach point */ +#define BPF_PROG_BTF(string, ptype, eatype) \ +	BPF_PROG_SEC_IMPL(string, ptype, eatype, 0, 1, 0)  /* Programs that can be attached but attach type can't be identified by section   * name. Kept for backward compatibility. @@ -4477,16 +4977,27 @@ static const struct {  	size_t len;  	enum bpf_prog_type prog_type;  	enum bpf_attach_type expected_attach_type; -	int is_attachable; +	bool is_attachable; +	bool is_attach_btf;  	enum bpf_attach_type attach_type;  } section_names[] = {  	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),  	BPF_PROG_SEC("kprobe/",			BPF_PROG_TYPE_KPROBE), +	BPF_PROG_SEC("uprobe/",			BPF_PROG_TYPE_KPROBE),  	BPF_PROG_SEC("kretprobe/",		BPF_PROG_TYPE_KPROBE), +	BPF_PROG_SEC("uretprobe/",		BPF_PROG_TYPE_KPROBE),  	BPF_PROG_SEC("classifier",		BPF_PROG_TYPE_SCHED_CLS),  	BPF_PROG_SEC("action",			BPF_PROG_TYPE_SCHED_ACT),  	BPF_PROG_SEC("tracepoint/",		BPF_PROG_TYPE_TRACEPOINT), +	BPF_PROG_SEC("tp/",			BPF_PROG_TYPE_TRACEPOINT),  	BPF_PROG_SEC("raw_tracepoint/",		BPF_PROG_TYPE_RAW_TRACEPOINT), +	BPF_PROG_SEC("raw_tp/",			BPF_PROG_TYPE_RAW_TRACEPOINT), +	BPF_PROG_BTF("tp_btf/",			BPF_PROG_TYPE_TRACING, +						BPF_TRACE_RAW_TP), +	BPF_PROG_BTF("fentry/",			BPF_PROG_TYPE_TRACING, +						BPF_TRACE_FENTRY), +	BPF_PROG_BTF("fexit/",			BPF_PROG_TYPE_TRACING, +						BPF_TRACE_FEXIT),  	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),  	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),  	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN), @@ -4593,14 +5104,105 @@ int libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,  		*expected_attach_type = section_names[i].expected_attach_type;  		return 0;  	} -	pr_warning("failed to guess program type based on ELF section name '%s'\n", name); +	pr_warn("failed to guess program type from ELF section '%s'\n", name);  	type_names = libbpf_get_type_names(false);  	if (type_names != NULL) {  		pr_info("supported section(type) names are:%s\n", type_names);  		free(type_names);  	} -	return -EINVAL; +	return -ESRCH; +} + +#define BTF_PREFIX "btf_trace_" +int libbpf_find_vmlinux_btf_id(const char *name, +			       enum bpf_attach_type attach_type) +{ +	struct btf *btf = bpf_core_find_kernel_btf(); +	char raw_tp_btf[128] = BTF_PREFIX; +	char *dst = raw_tp_btf + sizeof(BTF_PREFIX) - 1; +	const char *btf_name; +	int err = -EINVAL; +	__u32 kind; + +	if (IS_ERR(btf)) { +		pr_warn("vmlinux BTF is not found\n"); +		return -EINVAL; +	} + +	if (attach_type == BPF_TRACE_RAW_TP) { +		/* prepend "btf_trace_" prefix per kernel convention */ +		strncat(dst, name, sizeof(raw_tp_btf) - sizeof(BTF_PREFIX)); +		btf_name = raw_tp_btf; +		kind = BTF_KIND_TYPEDEF; +	} else { +		btf_name = name; +		kind = BTF_KIND_FUNC; +	} +	err = btf__find_by_name_kind(btf, btf_name, kind); +	btf__free(btf); +	return err; +} + +static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd) +{ +	struct bpf_prog_info_linear *info_linear; +	struct bpf_prog_info *info; +	struct btf *btf = NULL; +	int err = -EINVAL; + +	info_linear = bpf_program__get_prog_info_linear(attach_prog_fd, 0); +	if (IS_ERR_OR_NULL(info_linear)) { +		pr_warn("failed get_prog_info_linear for FD %d\n", +			attach_prog_fd); +		return -EINVAL; +	} +	info = &info_linear->info; +	if (!info->btf_id) { +		pr_warn("The target program doesn't have BTF\n"); +		goto out; +	} +	if (btf__get_from_id(info->btf_id, &btf)) { +		pr_warn("Failed to get BTF of the program\n"); +		goto out; +	} +	err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC); +	btf__free(btf); +	if (err <= 0) { +		pr_warn("%s is not found in prog's BTF\n", name); +		goto out; +	} +out: +	free(info_linear); +	return err; +} + +static int libbpf_find_attach_btf_id(const char *name, +				     enum bpf_attach_type attach_type, +				     __u32 attach_prog_fd) +{ +	int i, err; + +	if (!name) +		return -EINVAL; + +	for (i = 0; i < ARRAY_SIZE(section_names); i++) { +		if (!section_names[i].is_attach_btf) +			continue; +		if (strncmp(name, section_names[i].sec, section_names[i].len)) +			continue; +		if (attach_prog_fd) +			err = libbpf_find_prog_btf_id(name + section_names[i].len, +						      attach_prog_fd); +		else +			err = libbpf_find_vmlinux_btf_id(name + section_names[i].len, +							 attach_type); +		if (err <= 0) +			pr_warn("%s is not found in vmlinux BTF\n", name); +		return err; +	} +	pr_warn("failed to identify btf_id based on ELF section name '%s'\n", name); +	return -ESRCH;  }  int libbpf_attach_type_by_name(const char *name, @@ -4620,7 +5222,7 @@ int libbpf_attach_type_by_name(const char *name,  		*attach_type = section_names[i].attach_type;  		return 0;  	} -	pr_warning("failed to guess attach type based on ELF section name '%s'\n", name); +	pr_warn("failed to guess attach type based on ELF section name '%s'\n", name);  	type_names = libbpf_get_type_names(true);  	if (type_names != NULL) {  		pr_info("attachable section(type) names are:%s\n", type_names); @@ -4630,15 +5232,6 @@ int libbpf_attach_type_by_name(const char *name,  	return -EINVAL;  } -static int -bpf_program__identify_section(struct bpf_program *prog, -			      enum bpf_prog_type *prog_type, -			      enum bpf_attach_type *expected_attach_type) -{ -	return libbpf_prog_type_by_name(prog->section_name, prog_type, -					expected_attach_type); -} -  int bpf_map__fd(const struct bpf_map *map)  {  	return map ? map->fd : -EINVAL; @@ -4703,11 +5296,11 @@ void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex)  int bpf_map__set_inner_map_fd(struct bpf_map *map, int fd)  {  	if (!bpf_map_type__is_map_in_map(map->def.type)) { -		pr_warning("error: unsupported map type\n"); +		pr_warn("error: unsupported map type\n");  		return -EINVAL;  	}  	if (map->inner_map_fd != -1) { -		pr_warning("error: inner_map_fd already specified\n"); +		pr_warn("error: inner_map_fd already specified\n");  		return -EINVAL;  	}  	map->inner_map_fd = fd; @@ -4727,8 +5320,8 @@ __bpf_map__iter(const struct bpf_map *m, const struct bpf_object *obj, int i)  	e = obj->maps + obj->nr_maps;  	if ((m < s) || (m >= e)) { -		pr_warning("error in %s: map handler doesn't belong to object\n", -			   __func__); +		pr_warn("error in %s: map handler doesn't belong to object\n", +			 __func__);  		return NULL;  	} @@ -4806,8 +5399,6 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  {  	struct bpf_object_open_attr open_attr = {};  	struct bpf_program *prog, *first_prog = NULL; -	enum bpf_attach_type expected_attach_type; -	enum bpf_prog_type prog_type;  	struct bpf_object *obj;  	struct bpf_map *map;  	int err; @@ -4825,26 +5416,27 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  		return -ENOENT;  	bpf_object__for_each_program(prog, obj) { +		enum bpf_attach_type attach_type = attr->expected_attach_type;  		/* -		 * If type is not specified, try to guess it based on -		 * section name. +		 * to preserve backwards compatibility, bpf_prog_load treats +		 * attr->prog_type, if specified, as an override to whatever +		 * bpf_object__open guessed  		 */ -		prog_type = attr->prog_type; -		prog->prog_ifindex = attr->ifindex; -		expected_attach_type = attr->expected_attach_type; -		if (prog_type == BPF_PROG_TYPE_UNSPEC) { -			err = bpf_program__identify_section(prog, &prog_type, -							    &expected_attach_type); -			if (err < 0) { -				bpf_object__close(obj); -				return -EINVAL; -			} +		if (attr->prog_type != BPF_PROG_TYPE_UNSPEC) { +			bpf_program__set_type(prog, attr->prog_type); +			bpf_program__set_expected_attach_type(prog, +							      attach_type); +		} +		if (bpf_program__get_type(prog) == BPF_PROG_TYPE_UNSPEC) { +			/* +			 * we haven't guessed from section name and user +			 * didn't provide a fallback type, too bad... +			 */ +			bpf_object__close(obj); +			return -EINVAL;  		} -		bpf_program__set_type(prog, prog_type); -		bpf_program__set_expected_attach_type(prog, -						      expected_attach_type); - +		prog->prog_ifindex = attr->ifindex;  		prog->log_level = attr->log_level;  		prog->prog_flags = attr->prog_flags;  		if (!first_prog) @@ -4857,7 +5449,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  	}  	if (!first_prog) { -		pr_warning("object file doesn't contain bpf program\n"); +		pr_warn("object file doesn't contain bpf program\n");  		bpf_object__close(obj);  		return -ENOENT;  	} @@ -4916,14 +5508,14 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,  	int prog_fd, err;  	if (pfd < 0) { -		pr_warning("program '%s': invalid perf event FD %d\n", -			   bpf_program__title(prog, false), pfd); +		pr_warn("program '%s': invalid perf event FD %d\n", +			bpf_program__title(prog, false), pfd);  		return ERR_PTR(-EINVAL);  	}  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warning("program '%s': can't attach BPF program w/o FD (did you load it?)\n", -			   bpf_program__title(prog, false)); +		pr_warn("program '%s': can't attach BPF program w/o FD (did you load it?)\n", +			bpf_program__title(prog, false));  		return ERR_PTR(-EINVAL);  	} @@ -4936,16 +5528,16 @@ struct bpf_link *bpf_program__attach_perf_event(struct bpf_program *prog,  	if (ioctl(pfd, PERF_EVENT_IOC_SET_BPF, prog_fd) < 0) {  		err = -errno;  		free(link); -		pr_warning("program '%s': failed to attach to pfd %d: %s\n", -			   bpf_program__title(prog, false), pfd, +		pr_warn("program '%s': failed to attach to pfd %d: %s\n", +			bpf_program__title(prog, false), pfd,  			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return ERR_PTR(err);  	}  	if (ioctl(pfd, PERF_EVENT_IOC_ENABLE, 0) < 0) {  		err = -errno;  		free(link); -		pr_warning("program '%s': failed to enable pfd %d: %s\n", -			   bpf_program__title(prog, false), pfd, +		pr_warn("program '%s': failed to enable pfd %d: %s\n", +			bpf_program__title(prog, false), pfd,  			   libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return ERR_PTR(err);  	} @@ -5020,9 +5612,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  	type = uprobe ? determine_uprobe_perf_type()  		      : determine_kprobe_perf_type();  	if (type < 0) { -		pr_warning("failed to determine %s perf type: %s\n", -			   uprobe ? "uprobe" : "kprobe", -			   libbpf_strerror_r(type, errmsg, sizeof(errmsg))); +		pr_warn("failed to determine %s perf type: %s\n", +			uprobe ? "uprobe" : "kprobe", +			libbpf_strerror_r(type, errmsg, sizeof(errmsg)));  		return type;  	}  	if (retprobe) { @@ -5030,10 +5622,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  				 : determine_kprobe_retprobe_bit();  		if (bit < 0) { -			pr_warning("failed to determine %s retprobe bit: %s\n", -				   uprobe ? "uprobe" : "kprobe", -				   libbpf_strerror_r(bit, errmsg, -						     sizeof(errmsg))); +			pr_warn("failed to determine %s retprobe bit: %s\n", +				uprobe ? "uprobe" : "kprobe", +				libbpf_strerror_r(bit, errmsg, sizeof(errmsg)));  			return bit;  		}  		attr.config |= 1 << bit; @@ -5050,9 +5641,9 @@ static int perf_event_open_probe(bool uprobe, bool retprobe, const char *name,  		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);  	if (pfd < 0) {  		err = -errno; -		pr_warning("%s perf_event_open() failed: %s\n", -			   uprobe ? "uprobe" : "kprobe", -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("%s perf_event_open() failed: %s\n", +			uprobe ? "uprobe" : "kprobe", +			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return err;  	}  	return pfd; @@ -5069,20 +5660,20 @@ struct bpf_link *bpf_program__attach_kprobe(struct bpf_program *prog,  	pfd = perf_event_open_probe(false /* uprobe */, retprobe, func_name,  				    0 /* offset */, -1 /* pid */);  	if (pfd < 0) { -		pr_warning("program '%s': failed to create %s '%s' perf event: %s\n", -			   bpf_program__title(prog, false), -			   retprobe ? "kretprobe" : "kprobe", func_name, -			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to create %s '%s' perf event: %s\n", +			bpf_program__title(prog, false), +			retprobe ? "kretprobe" : "kprobe", func_name, +			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link = bpf_program__attach_perf_event(prog, pfd);  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warning("program '%s': failed to attach to %s '%s': %s\n", -			   bpf_program__title(prog, false), -			   retprobe ? "kretprobe" : "kprobe", func_name, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to attach to %s '%s': %s\n", +			bpf_program__title(prog, false), +			retprobe ? "kretprobe" : "kprobe", func_name, +			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link;  	}  	return link; @@ -5100,22 +5691,22 @@ struct bpf_link *bpf_program__attach_uprobe(struct bpf_program *prog,  	pfd = perf_event_open_probe(true /* uprobe */, retprobe,  				    binary_path, func_offset, pid);  	if (pfd < 0) { -		pr_warning("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", -			   bpf_program__title(prog, false), -			   retprobe ? "uretprobe" : "uprobe", -			   binary_path, func_offset, -			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to create %s '%s:0x%zx' perf event: %s\n", +			bpf_program__title(prog, false), +			retprobe ? "uretprobe" : "uprobe", +			binary_path, func_offset, +			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link = bpf_program__attach_perf_event(prog, pfd);  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warning("program '%s': failed to attach to %s '%s:0x%zx': %s\n", -			   bpf_program__title(prog, false), -			   retprobe ? "uretprobe" : "uprobe", -			   binary_path, func_offset, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to attach to %s '%s:0x%zx': %s\n", +			bpf_program__title(prog, false), +			retprobe ? "uretprobe" : "uprobe", +			binary_path, func_offset, +			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link;  	}  	return link; @@ -5149,9 +5740,9 @@ static int perf_event_open_tracepoint(const char *tp_category,  	tp_id = determine_tracepoint_id(tp_category, tp_name);  	if (tp_id < 0) { -		pr_warning("failed to determine tracepoint '%s/%s' perf event ID: %s\n", -			   tp_category, tp_name, -			   libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg))); +		pr_warn("failed to determine tracepoint '%s/%s' perf event ID: %s\n", +			tp_category, tp_name, +			libbpf_strerror_r(tp_id, errmsg, sizeof(errmsg)));  		return tp_id;  	} @@ -5163,9 +5754,9 @@ static int perf_event_open_tracepoint(const char *tp_category,  		      -1 /* group_fd */, PERF_FLAG_FD_CLOEXEC);  	if (pfd < 0) {  		err = -errno; -		pr_warning("tracepoint '%s/%s' perf_event_open() failed: %s\n", -			   tp_category, tp_name, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("tracepoint '%s/%s' perf_event_open() failed: %s\n", +			tp_category, tp_name, +			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return err;  	}  	return pfd; @@ -5181,20 +5772,20 @@ struct bpf_link *bpf_program__attach_tracepoint(struct bpf_program *prog,  	pfd = perf_event_open_tracepoint(tp_category, tp_name);  	if (pfd < 0) { -		pr_warning("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", -			   bpf_program__title(prog, false), -			   tp_category, tp_name, -			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to create tracepoint '%s/%s' perf event: %s\n", +			bpf_program__title(prog, false), +			tp_category, tp_name, +			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link = bpf_program__attach_perf_event(prog, pfd);  	if (IS_ERR(link)) {  		close(pfd);  		err = PTR_ERR(link); -		pr_warning("program '%s': failed to attach to tracepoint '%s/%s': %s\n", -			   bpf_program__title(prog, false), -			   tp_category, tp_name, -			   libbpf_strerror_r(err, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to attach to tracepoint '%s/%s': %s\n", +			bpf_program__title(prog, false), +			tp_category, tp_name, +			libbpf_strerror_r(err, errmsg, sizeof(errmsg)));  		return link;  	}  	return link; @@ -5216,8 +5807,8 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  	prog_fd = bpf_program__fd(prog);  	if (prog_fd < 0) { -		pr_warning("program '%s': can't attach before loaded\n", -			   bpf_program__title(prog, false)); +		pr_warn("program '%s': can't attach before loaded\n", +			bpf_program__title(prog, false));  		return ERR_PTR(-EINVAL);  	} @@ -5230,9 +5821,40 @@ struct bpf_link *bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  	if (pfd < 0) {  		pfd = -errno;  		free(link); -		pr_warning("program '%s': failed to attach to raw tracepoint '%s': %s\n", -			   bpf_program__title(prog, false), tp_name, -			   libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		pr_warn("program '%s': failed to attach to raw tracepoint '%s': %s\n", +			bpf_program__title(prog, false), tp_name, +			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg))); +		return ERR_PTR(pfd); +	} +	link->fd = pfd; +	return (struct bpf_link *)link; +} + +struct bpf_link *bpf_program__attach_trace(struct bpf_program *prog) +{ +	char errmsg[STRERR_BUFSIZE]; +	struct bpf_link_fd *link; +	int prog_fd, pfd; + +	prog_fd = bpf_program__fd(prog); +	if (prog_fd < 0) { +		pr_warn("program '%s': can't attach before loaded\n", +			bpf_program__title(prog, false)); +		return ERR_PTR(-EINVAL); +	} + +	link = malloc(sizeof(*link)); +	if (!link) +		return ERR_PTR(-ENOMEM); +	link->link.destroy = &bpf_link__destroy_fd; + +	pfd = bpf_raw_tracepoint_open(NULL, prog_fd); +	if (pfd < 0) { +		pfd = -errno; +		free(link); +		pr_warn("program '%s': failed to attach to trace: %s\n", +			bpf_program__title(prog, false), +			libbpf_strerror_r(pfd, errmsg, sizeof(errmsg)));  		return ERR_PTR(pfd);  	}  	link->fd = pfd; @@ -5334,7 +5956,7 @@ static void perf_buffer__free_cpu_buf(struct perf_buffer *pb,  		return;  	if (cpu_buf->base &&  	    munmap(cpu_buf->base, pb->mmap_size + pb->page_size)) -		pr_warning("failed to munmap cpu_buf #%d\n", cpu_buf->cpu); +		pr_warn("failed to munmap cpu_buf #%d\n", cpu_buf->cpu);  	if (cpu_buf->fd >= 0) {  		ioctl(cpu_buf->fd, PERF_EVENT_IOC_DISABLE, 0);  		close(cpu_buf->fd); @@ -5384,8 +6006,8 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,  			      -1, PERF_FLAG_FD_CLOEXEC);  	if (cpu_buf->fd < 0) {  		err = -errno; -		pr_warning("failed to open perf buffer event on cpu #%d: %s\n", -			   cpu, libbpf_strerror_r(err, msg, sizeof(msg))); +		pr_warn("failed to open perf buffer event on cpu #%d: %s\n", +			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));  		goto error;  	} @@ -5395,15 +6017,15 @@ perf_buffer__open_cpu_buf(struct perf_buffer *pb, struct perf_event_attr *attr,  	if (cpu_buf->base == MAP_FAILED) {  		cpu_buf->base = NULL;  		err = -errno; -		pr_warning("failed to mmap perf buffer on cpu #%d: %s\n", -			   cpu, libbpf_strerror_r(err, msg, sizeof(msg))); +		pr_warn("failed to mmap perf buffer on cpu #%d: %s\n", +			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));  		goto error;  	}  	if (ioctl(cpu_buf->fd, PERF_EVENT_IOC_ENABLE, 0) < 0) {  		err = -errno; -		pr_warning("failed to enable perf buffer event on cpu #%d: %s\n", -			   cpu, libbpf_strerror_r(err, msg, sizeof(msg))); +		pr_warn("failed to enable perf buffer event on cpu #%d: %s\n", +			cpu, libbpf_strerror_r(err, msg, sizeof(msg)));  		goto error;  	} @@ -5463,8 +6085,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  	int err, i;  	if (page_cnt & (page_cnt - 1)) { -		pr_warning("page count should be power of two, but is %zu\n", -			   page_cnt); +		pr_warn("page count should be power of two, but is %zu\n", +			page_cnt);  		return ERR_PTR(-EINVAL);  	} @@ -5472,14 +6094,14 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  	err = bpf_obj_get_info_by_fd(map_fd, &map, &map_info_len);  	if (err) {  		err = -errno; -		pr_warning("failed to get map info for map FD %d: %s\n", -			   map_fd, libbpf_strerror_r(err, msg, sizeof(msg))); +		pr_warn("failed to get map info for map FD %d: %s\n", +			map_fd, libbpf_strerror_r(err, msg, sizeof(msg)));  		return ERR_PTR(err);  	}  	if (map.type != BPF_MAP_TYPE_PERF_EVENT_ARRAY) { -		pr_warning("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", -			   map.name); +		pr_warn("map '%s' should be BPF_MAP_TYPE_PERF_EVENT_ARRAY\n", +			map.name);  		return ERR_PTR(-EINVAL);  	} @@ -5499,8 +6121,8 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  	pb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);  	if (pb->epoll_fd < 0) {  		err = -errno; -		pr_warning("failed to create epoll instance: %s\n", -			   libbpf_strerror_r(err, msg, sizeof(msg))); +		pr_warn("failed to create epoll instance: %s\n", +			libbpf_strerror_r(err, msg, sizeof(msg)));  		goto error;  	} @@ -5519,13 +6141,13 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  	pb->events = calloc(pb->cpu_cnt, sizeof(*pb->events));  	if (!pb->events) {  		err = -ENOMEM; -		pr_warning("failed to allocate events: out of memory\n"); +		pr_warn("failed to allocate events: out of memory\n");  		goto error;  	}  	pb->cpu_bufs = calloc(pb->cpu_cnt, sizeof(*pb->cpu_bufs));  	if (!pb->cpu_bufs) {  		err = -ENOMEM; -		pr_warning("failed to allocate buffers: out of memory\n"); +		pr_warn("failed to allocate buffers: out of memory\n");  		goto error;  	} @@ -5548,9 +6170,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  					  &cpu_buf->fd, 0);  		if (err) {  			err = -errno; -			pr_warning("failed to set cpu #%d, key %d -> perf FD %d: %s\n", -				   cpu, map_key, cpu_buf->fd, -				   libbpf_strerror_r(err, msg, sizeof(msg))); +			pr_warn("failed to set cpu #%d, key %d -> perf FD %d: %s\n", +				cpu, map_key, cpu_buf->fd, +				libbpf_strerror_r(err, msg, sizeof(msg)));  			goto error;  		} @@ -5559,9 +6181,9 @@ static struct perf_buffer *__perf_buffer__new(int map_fd, size_t page_cnt,  		if (epoll_ctl(pb->epoll_fd, EPOLL_CTL_ADD, cpu_buf->fd,  			      &pb->events[i]) < 0) {  			err = -errno; -			pr_warning("failed to epoll_ctl cpu #%d perf FD %d: %s\n", -				   cpu, cpu_buf->fd, -				   libbpf_strerror_r(err, msg, sizeof(msg))); +			pr_warn("failed to epoll_ctl cpu #%d perf FD %d: %s\n", +				cpu, cpu_buf->fd, +				libbpf_strerror_r(err, msg, sizeof(msg)));  			goto error;  		}  	} @@ -5614,7 +6236,7 @@ perf_buffer__process_record(struct perf_event_header *e, void *ctx)  		break;  	}  	default: -		pr_warning("unknown perf sample type %d\n", e->type); +		pr_warn("unknown perf sample type %d\n", e->type);  		return LIBBPF_PERF_EVENT_ERROR;  	}  	return LIBBPF_PERF_EVENT_CONT; @@ -5644,7 +6266,7 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)  		err = perf_buffer__process_records(pb, cpu_buf);  		if (err) { -			pr_warning("error while processing records: %d\n", err); +			pr_warn("error while processing records: %d\n", err);  			return err;  		}  	} @@ -5708,7 +6330,8 @@ static struct bpf_prog_info_array_desc bpf_prog_info_array_desc[] = {  }; -static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offset) +static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, +					   int offset)  {  	__u32 *array = (__u32 *)info; @@ -5717,7 +6340,8 @@ static __u32 bpf_prog_info_read_offset_u32(struct bpf_prog_info *info, int offse  	return -(int)offset;  } -static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, int offset) +static __u64 bpf_prog_info_read_offset_u64(struct bpf_prog_info *info, +					   int offset)  {  	__u64 *array = (__u64 *)info; @@ -5841,13 +6465,13 @@ bpf_program__get_prog_info_linear(int fd, __u64 arrays)  		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,  						   desc->count_offset);  		if (v1 != v2) -			pr_warning("%s: mismatch in element count\n", __func__); +			pr_warn("%s: mismatch in element count\n", __func__);  		v1 = bpf_prog_info_read_offset_u32(&info, desc->size_offset);  		v2 = bpf_prog_info_read_offset_u32(&info_linear->info,  						   desc->size_offset);  		if (v1 != v2) -			pr_warning("%s: mismatch in rec size\n", __func__); +			pr_warn("%s: mismatch in rec size\n", __func__);  	}  	/* step 7: update info_len and data_len */ @@ -5915,20 +6539,19 @@ int libbpf_num_possible_cpus(void)  	fd = open(fcpu, O_RDONLY);  	if (fd < 0) {  		error = errno; -		pr_warning("Failed to open file %s: %s\n", -			   fcpu, strerror(error)); +		pr_warn("Failed to open file %s: %s\n", fcpu, strerror(error));  		return -error;  	}  	len = read(fd, buf, sizeof(buf));  	close(fd);  	if (len <= 0) {  		error = len ? errno : EINVAL; -		pr_warning("Failed to read # of possible cpus from %s: %s\n", -			   fcpu, strerror(error)); +		pr_warn("Failed to read # of possible cpus from %s: %s\n", +			fcpu, strerror(error));  		return -error;  	}  	if (len == sizeof(buf)) { -		pr_warning("File %s size overflow\n", fcpu); +		pr_warn("File %s size overflow\n", fcpu);  		return -EOVERFLOW;  	}  	buf[len] = '\0'; @@ -5939,8 +6562,8 @@ int libbpf_num_possible_cpus(void)  			buf[ir] = '\0';  			n = sscanf(&buf[il], "%u-%u", &start, &end);  			if (n <= 0) { -				pr_warning("Failed to get # CPUs from %s\n", -					   &buf[il]); +				pr_warn("Failed to get # CPUs from %s\n", +					&buf[il]);  				return -EINVAL;  			} else if (n == 1) {  				end = start; @@ -5950,7 +6573,7 @@ int libbpf_num_possible_cpus(void)  		}  	}  	if (tmp_cpus <= 0) { -		pr_warning("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu); +		pr_warn("Invalid #CPUs %d from %s\n", tmp_cpus, fcpu);  		return -EINVAL;  	} diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h index e8f70977d137..0dbf4bfba0c4 100644 --- a/tools/lib/bpf/libbpf.h +++ b/tools/lib/bpf/libbpf.h @@ -67,18 +67,80 @@ struct bpf_object_open_attr {  	enum bpf_prog_type prog_type;  }; +/* Helper macro to declare and initialize libbpf options struct + * + * This dance with uninitialized declaration, followed by memset to zero, + * followed by assignment using compound literal syntax is done to preserve + * ability to use a nice struct field initialization syntax and **hopefully** + * have all the padding bytes initialized to zero. It's not guaranteed though, + * when copying literal, that compiler won't copy garbage in literal's padding + * bytes, but that's the best way I've found and it seems to work in practice. + * + * Macro declares opts struct of given type and name, zero-initializes, + * including any extra padding, it with memset() and then assigns initial + * values provided by users in struct initializer-syntax as varargs. + */ +#define DECLARE_LIBBPF_OPTS(TYPE, NAME, ...)				    \ +	struct TYPE NAME = ({ 						    \ +		memset(&NAME, 0, sizeof(struct TYPE));			    \ +		(struct TYPE) {						    \ +			.sz = sizeof(struct TYPE),			    \ +			__VA_ARGS__					    \ +		};							    \ +	}) + +struct bpf_object_open_opts { +	/* size of this struct, for forward/backward compatiblity */ +	size_t sz; +	/* object name override, if provided: +	 * - for object open from file, this will override setting object +	 *   name from file path's base name; +	 * - for object open from memory buffer, this will specify an object +	 *   name and will override default "<addr>-<buf-size>" name; +	 */ +	const char *object_name; +	/* parse map definitions non-strictly, allowing extra attributes/data */ +	bool relaxed_maps; +	/* process CO-RE relocations non-strictly, allowing them to fail */ +	bool relaxed_core_relocs; +	/* maps that set the 'pinning' attribute in their definition will have +	 * their pin_path attribute set to a file in this directory, and be +	 * auto-pinned to that path on load; defaults to "/sys/fs/bpf". +	 */ +	const char *pin_root_path; +	__u32 attach_prog_fd; +}; +#define bpf_object_open_opts__last_field attach_prog_fd +  LIBBPF_API struct bpf_object *bpf_object__open(const char *path);  LIBBPF_API struct bpf_object * +bpf_object__open_file(const char *path, struct bpf_object_open_opts *opts); +LIBBPF_API struct bpf_object * +bpf_object__open_mem(const void *obj_buf, size_t obj_buf_sz, +		     struct bpf_object_open_opts *opts); + +/* deprecated bpf_object__open variants */ +LIBBPF_API struct bpf_object * +bpf_object__open_buffer(const void *obj_buf, size_t obj_buf_sz, +			const char *name); +LIBBPF_API struct bpf_object *  bpf_object__open_xattr(struct bpf_object_open_attr *attr); -struct bpf_object *__bpf_object__open_xattr(struct bpf_object_open_attr *attr, -					    int flags); -LIBBPF_API struct bpf_object *bpf_object__open_buffer(void *obj_buf, -						      size_t obj_buf_sz, -						      const char *name); +  int bpf_object__section_size(const struct bpf_object *obj, const char *name,  			     __u32 *size);  int bpf_object__variable_offset(const struct bpf_object *obj, const char *name,  				__u32 *off); + +enum libbpf_pin_type { +	LIBBPF_PIN_NONE, +	/* PIN_BY_NAME: pin maps by name (in /sys/fs/bpf by default) */ +	LIBBPF_PIN_BY_NAME, +}; + +/* pin_maps and unpin_maps can both be called with a NULL path, in which case + * they will use the pin_path attribute of each map (and ignore all maps that + * don't have a pin_path set). + */  LIBBPF_API int bpf_object__pin_maps(struct bpf_object *obj, const char *path);  LIBBPF_API int bpf_object__unpin_maps(struct bpf_object *obj,  				      const char *path); @@ -127,6 +189,8 @@ libbpf_prog_type_by_name(const char *name, enum bpf_prog_type *prog_type,  			 enum bpf_attach_type *expected_attach_type);  LIBBPF_API int libbpf_attach_type_by_name(const char *name,  					  enum bpf_attach_type *attach_type); +LIBBPF_API int libbpf_find_vmlinux_btf_id(const char *name, +					  enum bpf_attach_type attach_type);  /* Accessors of bpf_program */  struct bpf_program; @@ -153,6 +217,9 @@ LIBBPF_API void bpf_program__set_ifindex(struct bpf_program *prog,  LIBBPF_API const char *bpf_program__title(const struct bpf_program *prog,  					  bool needs_copy); +/* returns program size in bytes */ +LIBBPF_API size_t bpf_program__size(const struct bpf_program *prog); +  LIBBPF_API int bpf_program__load(struct bpf_program *prog, char *license,  				 __u32 kern_version);  LIBBPF_API int bpf_program__fd(const struct bpf_program *prog); @@ -187,6 +254,8 @@ LIBBPF_API struct bpf_link *  bpf_program__attach_raw_tracepoint(struct bpf_program *prog,  				   const char *tp_name); +LIBBPF_API struct bpf_link * +bpf_program__attach_trace(struct bpf_program *prog);  struct bpf_insn;  /* @@ -262,8 +331,14 @@ LIBBPF_API int bpf_program__set_sched_cls(struct bpf_program *prog);  LIBBPF_API int bpf_program__set_sched_act(struct bpf_program *prog);  LIBBPF_API int bpf_program__set_xdp(struct bpf_program *prog);  LIBBPF_API int bpf_program__set_perf_event(struct bpf_program *prog); +LIBBPF_API int bpf_program__set_tracing(struct bpf_program *prog); + +LIBBPF_API enum bpf_prog_type bpf_program__get_type(struct bpf_program *prog);  LIBBPF_API void bpf_program__set_type(struct bpf_program *prog,  				      enum bpf_prog_type type); + +LIBBPF_API enum bpf_attach_type +bpf_program__get_expected_attach_type(struct bpf_program *prog);  LIBBPF_API void  bpf_program__set_expected_attach_type(struct bpf_program *prog,  				      enum bpf_attach_type type); @@ -276,6 +351,7 @@ LIBBPF_API bool bpf_program__is_sched_cls(const struct bpf_program *prog);  LIBBPF_API bool bpf_program__is_sched_act(const struct bpf_program *prog);  LIBBPF_API bool bpf_program__is_xdp(const struct bpf_program *prog);  LIBBPF_API bool bpf_program__is_perf_event(const struct bpf_program *prog); +LIBBPF_API bool bpf_program__is_tracing(const struct bpf_program *prog);  /*   * No need for __attribute__((packed)), all members of 'bpf_map_def' @@ -335,6 +411,9 @@ LIBBPF_API int bpf_map__resize(struct bpf_map *map, __u32 max_entries);  LIBBPF_API bool bpf_map__is_offload_neutral(const struct bpf_map *map);  LIBBPF_API bool bpf_map__is_internal(const struct bpf_map *map);  LIBBPF_API void bpf_map__set_ifindex(struct bpf_map *map, __u32 ifindex); +LIBBPF_API int bpf_map__set_pin_path(struct bpf_map *map, const char *path); +LIBBPF_API const char *bpf_map__get_pin_path(const struct bpf_map *map); +LIBBPF_API bool bpf_map__is_pinned(const struct bpf_map *map);  LIBBPF_API int bpf_map__pin(struct bpf_map *map, const char *path);  LIBBPF_API int bpf_map__unpin(struct bpf_map *map, const char *path); @@ -356,8 +435,18 @@ LIBBPF_API int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,  LIBBPF_API int bpf_prog_load(const char *file, enum bpf_prog_type type,  			     struct bpf_object **pobj, int *prog_fd); +struct xdp_link_info { +	__u32 prog_id; +	__u32 drv_prog_id; +	__u32 hw_prog_id; +	__u32 skb_prog_id; +	__u8 attach_mode; +}; +  LIBBPF_API int bpf_set_link_xdp_fd(int ifindex, int fd, __u32 flags);  LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags); +LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, +				     size_t info_size, __u32 flags);  struct perf_buffer; diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map index d04c7cb623ed..8ddc2c40e482 100644 --- a/tools/lib/bpf/libbpf.map +++ b/tools/lib/bpf/libbpf.map @@ -190,3 +190,21 @@ LIBBPF_0.0.5 {  	global:  		bpf_btf_get_next_id;  } LIBBPF_0.0.4; + +LIBBPF_0.0.6 { +	global: +		bpf_get_link_xdp_info; +		bpf_map__get_pin_path; +		bpf_map__is_pinned; +		bpf_map__set_pin_path; +		bpf_object__open_file; +		bpf_object__open_mem; +		bpf_program__attach_trace; +		bpf_program__get_expected_attach_type; +		bpf_program__get_type; +		bpf_program__is_tracing; +		bpf_program__set_tracing; +		bpf_program__size; +		btf__find_by_name_kind; +		libbpf_find_vmlinux_btf_id; +} LIBBPF_0.0.5; diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h index 98216a69c32f..97ac17a64a58 100644 --- a/tools/lib/bpf/libbpf_internal.h +++ b/tools/lib/bpf/libbpf_internal.h @@ -59,10 +59,42 @@ do {				\  	libbpf_print(level, "libbpf: " fmt, ##__VA_ARGS__);	\  } while (0) -#define pr_warning(fmt, ...)	__pr(LIBBPF_WARN, fmt, ##__VA_ARGS__) +#define pr_warn(fmt, ...)	__pr(LIBBPF_WARN, fmt, ##__VA_ARGS__)  #define pr_info(fmt, ...)	__pr(LIBBPF_INFO, fmt, ##__VA_ARGS__)  #define pr_debug(fmt, ...)	__pr(LIBBPF_DEBUG, fmt, ##__VA_ARGS__) +static inline bool libbpf_validate_opts(const char *opts, +					size_t opts_sz, size_t user_sz, +					const char *type_name) +{ +	if (user_sz < sizeof(size_t)) { +		pr_warn("%s size (%zu) is too small\n", type_name, user_sz); +		return false; +	} +	if (user_sz > opts_sz) { +		size_t i; + +		for (i = opts_sz; i < user_sz; i++) { +			if (opts[i]) { +				pr_warn("%s has non-zero extra bytes", +					type_name); +				return false; +			} +		} +	} +	return true; +} + +#define OPTS_VALID(opts, type)						      \ +	(!(opts) || libbpf_validate_opts((const char *)opts,		      \ +					 offsetofend(struct type,	      \ +						     type##__last_field),     \ +					 (opts)->sz, #type)) +#define OPTS_HAS(opts, field) \ +	((opts) && opts->sz >= offsetofend(typeof(*(opts)), field)) +#define OPTS_GET(opts, field, fallback_value) \ +	(OPTS_HAS(opts, field) ? (opts)->field : fallback_value) +  int libbpf__load_raw_btf(const char *raw_types, size_t types_len,  			 const char *str_sec, size_t str_len); @@ -94,7 +126,7 @@ struct btf_ext {  	};  	struct btf_ext_info func_info;  	struct btf_ext_info line_info; -	struct btf_ext_info offset_reloc_info; +	struct btf_ext_info field_reloc_info;  	__u32 data_size;  }; @@ -119,13 +151,27 @@ struct bpf_line_info_min {  	__u32	line_col;  }; -/* The minimum bpf_offset_reloc checked by the loader +/* bpf_field_info_kind encodes which aspect of captured field has to be + * adjusted by relocations. Currently supported values are: + *   - BPF_FIELD_BYTE_OFFSET: field offset (in bytes); + *   - BPF_FIELD_EXISTS: field existence (1, if field exists; 0, otherwise); + */ +enum bpf_field_info_kind { +	BPF_FIELD_BYTE_OFFSET = 0,	/* field byte offset */ +	BPF_FIELD_BYTE_SIZE = 1, +	BPF_FIELD_EXISTS = 2,		/* field existence in target kernel */ +	BPF_FIELD_SIGNED = 3, +	BPF_FIELD_LSHIFT_U64 = 4, +	BPF_FIELD_RSHIFT_U64 = 5, +}; + +/* The minimum bpf_field_reloc checked by the loader   * - * Offset relocation captures the following data: + * Field relocation captures the following data:   * - insn_off - instruction offset (in bytes) within a BPF program that needs - *   its insn->imm field to be relocated with actual offset; + *   its insn->imm field to be relocated with actual field info;   * - type_id - BTF type ID of the "root" (containing) entity of a relocatable - *   offset; + *   field;   * - access_str_off - offset into corresponding .BTF string section. String   *   itself encodes an accessed field using a sequence of field and array   *   indicies, separated by colon (:). It's conceptually very close to LLVM's @@ -156,15 +202,16 @@ struct bpf_line_info_min {   * bpf_probe_read(&dst, sizeof(dst),   *		  __builtin_preserve_access_index(&src->a.b.c));   * - * In this case Clang will emit offset relocation recording necessary data to + * In this case Clang will emit field relocation recording necessary data to   * be able to find offset of embedded `a.b.c` field within `src` struct.   *   *   [0] https://llvm.org/docs/LangRef.html#getelementptr-instruction   */ -struct bpf_offset_reloc { +struct bpf_field_reloc {  	__u32   insn_off;  	__u32   type_id;  	__u32   access_str_off; +	enum bpf_field_info_kind kind;  };  #endif /* __LIBBPF_LIBBPF_INTERNAL_H */ diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c index 4b0b0364f5fc..a9eb8b322671 100644 --- a/tools/lib/bpf/libbpf_probes.c +++ b/tools/lib/bpf/libbpf_probes.c @@ -102,6 +102,7 @@ probe_load(enum bpf_prog_type prog_type, const struct bpf_insn *insns,  	case BPF_PROG_TYPE_FLOW_DISSECTOR:  	case BPF_PROG_TYPE_CGROUP_SYSCTL:  	case BPF_PROG_TYPE_CGROUP_SOCKOPT: +	case BPF_PROG_TYPE_TRACING:  	default:  		break;  	} diff --git a/tools/lib/bpf/netlink.c b/tools/lib/bpf/netlink.c index ce3ec81b71c0..5065c1aa1061 100644 --- a/tools/lib/bpf/netlink.c +++ b/tools/lib/bpf/netlink.c @@ -12,6 +12,7 @@  #include "bpf.h"  #include "libbpf.h" +#include "libbpf_internal.h"  #include "nlattr.h"  #ifndef SOL_NETLINK @@ -24,7 +25,7 @@ typedef int (*__dump_nlmsg_t)(struct nlmsghdr *nlmsg, libbpf_dump_nlmsg_t,  struct xdp_id_md {  	int ifindex;  	__u32 flags; -	__u32 id; +	struct xdp_link_info info;  };  int libbpf_netlink_open(__u32 *nl_pid) @@ -43,7 +44,7 @@ int libbpf_netlink_open(__u32 *nl_pid)  	if (setsockopt(sock, SOL_NETLINK, NETLINK_EXT_ACK,  		       &one, sizeof(one)) < 0) { -		fprintf(stderr, "Netlink error reporting not supported\n"); +		pr_warn("Netlink error reporting not supported\n");  	}  	if (bind(sock, (struct sockaddr *)&sa, sizeof(sa)) < 0) { @@ -202,26 +203,11 @@ static int __dump_link_nlmsg(struct nlmsghdr *nlh,  	return dump_link_nlmsg(cookie, ifi, tb);  } -static unsigned char get_xdp_id_attr(unsigned char mode, __u32 flags) -{ -	if (mode != XDP_ATTACHED_MULTI) -		return IFLA_XDP_PROG_ID; -	if (flags & XDP_FLAGS_DRV_MODE) -		return IFLA_XDP_DRV_PROG_ID; -	if (flags & XDP_FLAGS_HW_MODE) -		return IFLA_XDP_HW_PROG_ID; -	if (flags & XDP_FLAGS_SKB_MODE) -		return IFLA_XDP_SKB_PROG_ID; - -	return IFLA_XDP_UNSPEC; -} - -static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb) +static int get_xdp_info(void *cookie, void *msg, struct nlattr **tb)  {  	struct nlattr *xdp_tb[IFLA_XDP_MAX + 1];  	struct xdp_id_md *xdp_id = cookie;  	struct ifinfomsg *ifinfo = msg; -	unsigned char mode, xdp_attr;  	int ret;  	if (xdp_id->ifindex && xdp_id->ifindex != ifinfo->ifi_index) @@ -237,27 +223,40 @@ static int get_xdp_id(void *cookie, void *msg, struct nlattr **tb)  	if (!xdp_tb[IFLA_XDP_ATTACHED])  		return 0; -	mode = libbpf_nla_getattr_u8(xdp_tb[IFLA_XDP_ATTACHED]); -	if (mode == XDP_ATTACHED_NONE) -		return 0; +	xdp_id->info.attach_mode = libbpf_nla_getattr_u8( +		xdp_tb[IFLA_XDP_ATTACHED]); -	xdp_attr = get_xdp_id_attr(mode, xdp_id->flags); -	if (!xdp_attr || !xdp_tb[xdp_attr]) +	if (xdp_id->info.attach_mode == XDP_ATTACHED_NONE)  		return 0; -	xdp_id->id = libbpf_nla_getattr_u32(xdp_tb[xdp_attr]); +	if (xdp_tb[IFLA_XDP_PROG_ID]) +		xdp_id->info.prog_id = libbpf_nla_getattr_u32( +			xdp_tb[IFLA_XDP_PROG_ID]); + +	if (xdp_tb[IFLA_XDP_SKB_PROG_ID]) +		xdp_id->info.skb_prog_id = libbpf_nla_getattr_u32( +			xdp_tb[IFLA_XDP_SKB_PROG_ID]); + +	if (xdp_tb[IFLA_XDP_DRV_PROG_ID]) +		xdp_id->info.drv_prog_id = libbpf_nla_getattr_u32( +			xdp_tb[IFLA_XDP_DRV_PROG_ID]); + +	if (xdp_tb[IFLA_XDP_HW_PROG_ID]) +		xdp_id->info.hw_prog_id = libbpf_nla_getattr_u32( +			xdp_tb[IFLA_XDP_HW_PROG_ID]);  	return 0;  } -int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info, +			  size_t info_size, __u32 flags)  {  	struct xdp_id_md xdp_id = {};  	int sock, ret;  	__u32 nl_pid;  	__u32 mask; -	if (flags & ~XDP_FLAGS_MASK) +	if (flags & ~XDP_FLAGS_MASK || !info_size)  		return -EINVAL;  	/* Check whether the single {HW,DRV,SKB} mode is set */ @@ -273,14 +272,44 @@ int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags)  	xdp_id.ifindex = ifindex;  	xdp_id.flags = flags; -	ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_id, &xdp_id); -	if (!ret) -		*prog_id = xdp_id.id; +	ret = libbpf_nl_get_link(sock, nl_pid, get_xdp_info, &xdp_id); +	if (!ret) { +		size_t sz = min(info_size, sizeof(xdp_id.info)); + +		memcpy(info, &xdp_id.info, sz); +		memset((void *) info + sz, 0, info_size - sz); +	}  	close(sock);  	return ret;  } +static __u32 get_xdp_id(struct xdp_link_info *info, __u32 flags) +{ +	if (info->attach_mode != XDP_ATTACHED_MULTI) +		return info->prog_id; +	if (flags & XDP_FLAGS_DRV_MODE) +		return info->drv_prog_id; +	if (flags & XDP_FLAGS_HW_MODE) +		return info->hw_prog_id; +	if (flags & XDP_FLAGS_SKB_MODE) +		return info->skb_prog_id; + +	return 0; +} + +int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags) +{ +	struct xdp_link_info info; +	int ret; + +	ret = bpf_get_link_xdp_info(ifindex, &info, sizeof(info), flags); +	if (!ret) +		*prog_id = get_xdp_id(&info, flags); + +	return ret; +} +  int libbpf_nl_get_link(int sock, unsigned int nl_pid,  		       libbpf_dump_nlmsg_t dump_link_nlmsg, void *cookie)  { diff --git a/tools/lib/bpf/nlattr.c b/tools/lib/bpf/nlattr.c index 1e69c0c8d413..8db44bbfc66d 100644 --- a/tools/lib/bpf/nlattr.c +++ b/tools/lib/bpf/nlattr.c @@ -8,6 +8,7 @@  #include <errno.h>  #include "nlattr.h" +#include "libbpf_internal.h"  #include <linux/rtnetlink.h>  #include <string.h>  #include <stdio.h> @@ -121,8 +122,8 @@ int libbpf_nla_parse(struct nlattr *tb[], int maxtype, struct nlattr *head,  		}  		if (tb[type]) -			fprintf(stderr, "Attribute of type %#x found multiple times in message, " -				  "previous attribute is being ignored.\n", type); +			pr_warn("Attribute of type %#x found multiple times in message, " +				"previous attribute is being ignored.\n", type);  		tb[type] = nla;  	} @@ -181,15 +182,14 @@ int libbpf_nla_dump_errormsg(struct nlmsghdr *nlh)  	if (libbpf_nla_parse(tb, NLMSGERR_ATTR_MAX, attr, alen,  			     extack_policy) != 0) { -		fprintf(stderr, -			"Failed to parse extended error attributes\n"); +		pr_warn("Failed to parse extended error attributes\n");  		return 0;  	}  	if (tb[NLMSGERR_ATTR_MSG])  		errmsg = (char *) libbpf_nla_data(tb[NLMSGERR_ATTR_MSG]); -	fprintf(stderr, "Kernel error message: %s\n", errmsg); +	pr_warn("Kernel error message: %s\n", errmsg);  	return 0;  } diff --git a/tools/lib/bpf/test_libbpf.cpp b/tools/lib/bpf/test_libbpf.cpp deleted file mode 100644 index fc134873bb6d..000000000000 --- a/tools/lib/bpf/test_libbpf.cpp +++ /dev/null @@ -1,18 +0,0 @@ -/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */ -#include "libbpf.h" -#include "bpf.h" -#include "btf.h" - -/* do nothing, just make sure we can link successfully */ - -int main(int argc, char *argv[]) -{ -    /* libbpf.h */ -    libbpf_set_print(NULL); - -    /* bpf.h */ -    bpf_prog_get_fd_by_id(0); - -    /* btf.h */ -    btf__new(NULL, 0); -} diff --git a/tools/lib/bpf/xsk.c b/tools/lib/bpf/xsk.c index a902838f9fcc..8e0ffa800a71 100644 --- a/tools/lib/bpf/xsk.c +++ b/tools/lib/bpf/xsk.c @@ -73,6 +73,21 @@ struct xsk_nl_info {  	int fd;  }; +/* Up until and including Linux 5.3 */ +struct xdp_ring_offset_v1 { +	__u64 producer; +	__u64 consumer; +	__u64 desc; +}; + +/* Up until and including Linux 5.3 */ +struct xdp_mmap_offsets_v1 { +	struct xdp_ring_offset_v1 rx; +	struct xdp_ring_offset_v1 tx; +	struct xdp_ring_offset_v1 fr; +	struct xdp_ring_offset_v1 cr; +}; +  int xsk_umem__fd(const struct xsk_umem *umem)  {  	return umem ? umem->fd : -EINVAL; @@ -133,6 +148,58 @@ static int xsk_set_xdp_socket_config(struct xsk_socket_config *cfg,  	return 0;  } +static void xsk_mmap_offsets_v1(struct xdp_mmap_offsets *off) +{ +	struct xdp_mmap_offsets_v1 off_v1; + +	/* getsockopt on a kernel <= 5.3 has no flags fields. +	 * Copy over the offsets to the correct places in the >=5.4 format +	 * and put the flags where they would have been on that kernel. +	 */ +	memcpy(&off_v1, off, sizeof(off_v1)); + +	off->rx.producer = off_v1.rx.producer; +	off->rx.consumer = off_v1.rx.consumer; +	off->rx.desc = off_v1.rx.desc; +	off->rx.flags = off_v1.rx.consumer + sizeof(__u32); + +	off->tx.producer = off_v1.tx.producer; +	off->tx.consumer = off_v1.tx.consumer; +	off->tx.desc = off_v1.tx.desc; +	off->tx.flags = off_v1.tx.consumer + sizeof(__u32); + +	off->fr.producer = off_v1.fr.producer; +	off->fr.consumer = off_v1.fr.consumer; +	off->fr.desc = off_v1.fr.desc; +	off->fr.flags = off_v1.fr.consumer + sizeof(__u32); + +	off->cr.producer = off_v1.cr.producer; +	off->cr.consumer = off_v1.cr.consumer; +	off->cr.desc = off_v1.cr.desc; +	off->cr.flags = off_v1.cr.consumer + sizeof(__u32); +} + +static int xsk_get_mmap_offsets(int fd, struct xdp_mmap_offsets *off) +{ +	socklen_t optlen; +	int err; + +	optlen = sizeof(*off); +	err = getsockopt(fd, SOL_XDP, XDP_MMAP_OFFSETS, off, &optlen); +	if (err) +		return err; + +	if (optlen == sizeof(*off)) +		return 0; + +	if (optlen == sizeof(struct xdp_mmap_offsets_v1)) { +		xsk_mmap_offsets_v1(off); +		return 0; +	} + +	return -EINVAL; +} +  int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  			    __u64 size, struct xsk_ring_prod *fill,  			    struct xsk_ring_cons *comp, @@ -141,7 +208,6 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  	struct xdp_mmap_offsets off;  	struct xdp_umem_reg mr;  	struct xsk_umem *umem; -	socklen_t optlen;  	void *map;  	int err; @@ -163,6 +229,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  	umem->umem_area = umem_area;  	xsk_set_umem_config(&umem->config, usr_config); +	memset(&mr, 0, sizeof(mr));  	mr.addr = (uintptr_t)umem_area;  	mr.len = size;  	mr.chunk_size = umem->config.frame_size; @@ -189,8 +256,7 @@ int xsk_umem__create_v0_0_4(struct xsk_umem **umem_ptr, void *umem_area,  		goto out_socket;  	} -	optlen = sizeof(off); -	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); +	err = xsk_get_mmap_offsets(umem->fd, &off);  	if (err) {  		err = -errno;  		goto out_socket; @@ -273,33 +339,55 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  	/* This is the C-program:  	 * SEC("xdp_sock") int xdp_sock_prog(struct xdp_md *ctx)  	 * { -	 *     int index = ctx->rx_queue_index; +	 *     int ret, index = ctx->rx_queue_index;  	 *  	 *     // A set entry here means that the correspnding queue_id  	 *     // has an active AF_XDP socket bound to it. +	 *     ret = bpf_redirect_map(&xsks_map, index, XDP_PASS); +	 *     if (ret > 0) +	 *         return ret; +	 * +	 *     // Fallback for pre-5.3 kernels, not supporting default +	 *     // action in the flags parameter.  	 *     if (bpf_map_lookup_elem(&xsks_map, &index))  	 *         return bpf_redirect_map(&xsks_map, index, 0); -	 *  	 *     return XDP_PASS;  	 * }  	 */  	struct bpf_insn prog[] = { -		/* r1 = *(u32 *)(r1 + 16) */ -		BPF_LDX_MEM(BPF_W, BPF_REG_1, BPF_REG_1, 16), -		/* *(u32 *)(r10 - 4) = r1 */ -		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_1, -4), +		/* r2 = *(u32 *)(r1 + 16) */ +		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, 16), +		/* *(u32 *)(r10 - 4) = r2 */ +		BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -4), +		/* r1 = xskmap[] */ +		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		/* r3 = XDP_PASS */ +		BPF_MOV64_IMM(BPF_REG_3, 2), +		/* call bpf_redirect_map */ +		BPF_EMIT_CALL(BPF_FUNC_redirect_map), +		/* if w0 != 0 goto pc+13 */ +		BPF_JMP32_IMM(BPF_JSGT, BPF_REG_0, 0, 13), +		/* r2 = r10 */  		BPF_MOV64_REG(BPF_REG_2, BPF_REG_10), +		/* r2 += -4 */  		BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4), +		/* r1 = xskmap[] */  		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		/* call bpf_map_lookup_elem */  		BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem), +		/* r1 = r0 */  		BPF_MOV64_REG(BPF_REG_1, BPF_REG_0), -		BPF_MOV32_IMM(BPF_REG_0, 2), -		/* if r1 == 0 goto +5 */ +		/* r0 = XDP_PASS */ +		BPF_MOV64_IMM(BPF_REG_0, 2), +		/* if r1 == 0 goto pc+5 */  		BPF_JMP_IMM(BPF_JEQ, BPF_REG_1, 0, 5),  		/* r2 = *(u32 *)(r10 - 4) */ -		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd),  		BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_10, -4), -		BPF_MOV32_IMM(BPF_REG_3, 0), +		/* r1 = xskmap[] */ +		BPF_LD_MAP_FD(BPF_REG_1, xsk->xsks_map_fd), +		/* r3 = 0 */ +		BPF_MOV64_IMM(BPF_REG_3, 0), +		/* call bpf_redirect_map */  		BPF_EMIT_CALL(BPF_FUNC_redirect_map),  		/* The jumps are to this instruction */  		BPF_EXIT_INSN(), @@ -310,7 +398,7 @@ static int xsk_load_xdp_prog(struct xsk_socket *xsk)  				   "LGPL-2.1 or BSD-2-Clause", 0, log_buf,  				   log_buf_size);  	if (prog_fd < 0) { -		pr_warning("BPF log buffer:\n%s", log_buf); +		pr_warn("BPF log buffer:\n%s", log_buf);  		return prog_fd;  	} @@ -343,13 +431,18 @@ static int xsk_get_max_queues(struct xsk_socket *xsk)  		goto out;  	} -	if (err || channels.max_combined == 0) +	if (err) {  		/* If the device says it has no channels, then all traffic  		 * is sent to a single stream, so max queues = 1.  		 */  		ret = 1; -	else -		ret = channels.max_combined; +	} else { +		/* Take the max of rx, tx, combined. Drivers return +		 * the number of channels in different ways. +		 */ +		ret = max(channels.max_rx, channels.max_tx); +		ret = max(ret, (int)channels.max_combined); +	}  out:  	close(fd); @@ -465,6 +558,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  		}  	} else {  		xsk->prog_fd = bpf_prog_get_fd_by_id(prog_id); +		if (xsk->prog_fd < 0) +			return -errno;  		err = xsk_lookup_bpf_maps(xsk);  		if (err) {  			close(xsk->prog_fd); @@ -472,7 +567,8 @@ static int xsk_setup_xdp_prog(struct xsk_socket *xsk)  		}  	} -	err = xsk_set_bpf_maps(xsk); +	if (xsk->rx) +		err = xsk_set_bpf_maps(xsk);  	if (err) {  		xsk_delete_bpf_maps(xsk);  		close(xsk->prog_fd); @@ -491,21 +587,26 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  	struct sockaddr_xdp sxdp = {};  	struct xdp_mmap_offsets off;  	struct xsk_socket *xsk; -	socklen_t optlen;  	int err; -	if (!umem || !xsk_ptr || !rx || !tx) +	if (!umem || !xsk_ptr || !(rx || tx))  		return -EFAULT; -	if (umem->refcount) { -		pr_warning("Error: shared umems not supported by libbpf.\n"); -		return -EBUSY; -	} -  	xsk = calloc(1, sizeof(*xsk));  	if (!xsk)  		return -ENOMEM; +	err = xsk_set_xdp_socket_config(&xsk->config, usr_config); +	if (err) +		goto out_xsk_alloc; + +	if (umem->refcount && +	    !(xsk->config.libbpf_flags & XSK_LIBBPF_FLAGS__INHIBIT_PROG_LOAD)) { +		pr_warn("Error: shared umems not supported by libbpf supplied XDP program.\n"); +		err = -EBUSY; +		goto out_xsk_alloc; +	} +  	if (umem->refcount++ > 0) {  		xsk->fd = socket(AF_XDP, SOCK_RAW, 0);  		if (xsk->fd < 0) { @@ -527,10 +628,6 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  	memcpy(xsk->ifname, ifname, IFNAMSIZ - 1);  	xsk->ifname[IFNAMSIZ - 1] = '\0'; -	err = xsk_set_xdp_socket_config(&xsk->config, usr_config); -	if (err) -		goto out_socket; -  	if (rx) {  		err = setsockopt(xsk->fd, SOL_XDP, XDP_RX_RING,  				 &xsk->config.rx_size, @@ -550,8 +647,7 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  		}  	} -	optlen = sizeof(off); -	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); +	err = xsk_get_mmap_offsets(xsk->fd, &off);  	if (err) {  		err = -errno;  		goto out_socket; @@ -599,7 +695,12 @@ int xsk_socket__create(struct xsk_socket **xsk_ptr, const char *ifname,  	sxdp.sxdp_family = PF_XDP;  	sxdp.sxdp_ifindex = xsk->ifindex;  	sxdp.sxdp_queue_id = xsk->queue_id; -	sxdp.sxdp_flags = xsk->config.bind_flags; +	if (umem->refcount > 1) { +		sxdp.sxdp_flags = XDP_SHARED_UMEM; +		sxdp.sxdp_shared_umem_fd = umem->fd; +	} else { +		sxdp.sxdp_flags = xsk->config.bind_flags; +	}  	err = bind(xsk->fd, (struct sockaddr *)&sxdp, sizeof(sxdp));  	if (err) { @@ -637,7 +738,6 @@ out_xsk_alloc:  int xsk_umem__delete(struct xsk_umem *umem)  {  	struct xdp_mmap_offsets off; -	socklen_t optlen;  	int err;  	if (!umem) @@ -646,8 +746,7 @@ int xsk_umem__delete(struct xsk_umem *umem)  	if (umem->refcount)  		return -EBUSY; -	optlen = sizeof(off); -	err = getsockopt(umem->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); +	err = xsk_get_mmap_offsets(umem->fd, &off);  	if (!err) {  		munmap(umem->fill->ring - off.fr.desc,  		       off.fr.desc + umem->config.fill_size * sizeof(__u64)); @@ -665,7 +764,6 @@ void xsk_socket__delete(struct xsk_socket *xsk)  {  	size_t desc_sz = sizeof(struct xdp_desc);  	struct xdp_mmap_offsets off; -	socklen_t optlen;  	int err;  	if (!xsk) @@ -676,8 +774,7 @@ void xsk_socket__delete(struct xsk_socket *xsk)  		close(xsk->prog_fd);  	} -	optlen = sizeof(off); -	err = getsockopt(xsk->fd, SOL_XDP, XDP_MMAP_OFFSETS, &off, &optlen); +	err = xsk_get_mmap_offsets(xsk->fd, &off);  	if (!err) {  		if (xsk->rx) {  			munmap(xsk->rx->ring - off.rx.desc,  |