diff options
160 files changed, 3609 insertions, 2089 deletions
diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index e21e1b40b525..3e5ef1e0e890 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -15,6 +15,16 @@ LD ?= $(CROSS_COMPILE)ld MAKEFLAGS += --no-print-directory +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libapi.a CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) @@ -45,10 +55,23 @@ RM = rm -f API_IN := $(OUTPUT)libapi-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include all: fixdep $(LIBFILE) @@ -58,6 +81,35 @@ $(API_IN): FORCE $(LIBFILE): $(API_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,cpu.h,$(prefix)/include/api,644); \ + $(call do_install,debug.h,$(prefix)/include/api,644); \ + $(call do_install,io.h,$(prefix)/include/api,644); \ + $(call do_install,fd/array.h,$(prefix)/include/api/fd,644); \ + $(call do_install,fs/fs.h,$(prefix)/include/api/fs,644); + $(call do_install,fs/tracing_path.h,$(prefix)/include/api/fs,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) diff --git a/tools/lib/api/fs/tracing_path.c b/tools/lib/api/fs/tracing_path.c index 5afb11b30fca..b8e457c841ab 100644 --- a/tools/lib/api/fs/tracing_path.c +++ b/tools/lib/api/fs/tracing_path.c @@ -113,6 +113,22 @@ DIR *tracing_events__opendir(void) return dir; } +int tracing_events__scandir_alphasort(struct dirent ***namelist) +{ + char *path = get_tracing_file("events"); + int ret; + + if (!path) { + *namelist = NULL; + return 0; + } + + ret = scandir(path, namelist, NULL, alphasort); + put_events_file(path); + + return ret; +} + int tracing_path__strerror_open_tp(int err, char *buf, size_t size, const char *sys, const char *name) { diff --git a/tools/lib/api/fs/tracing_path.h b/tools/lib/api/fs/tracing_path.h index a19136b086dc..fc6347c11deb 100644 --- a/tools/lib/api/fs/tracing_path.h +++ b/tools/lib/api/fs/tracing_path.h @@ -6,6 +6,7 @@ #include <dirent.h> DIR *tracing_events__opendir(void); +int tracing_events__scandir_alphasort(struct dirent ***namelist); void tracing_path_set(const char *mountpoint); const char *tracing_path_mount(void); diff --git a/tools/lib/perf/Makefile b/tools/lib/perf/Makefile index 21df023a2103..1badc0a04676 100644 --- a/tools/lib/perf/Makefile +++ b/tools/lib/perf/Makefile @@ -189,13 +189,21 @@ install_lib: libs install_headers: $(call QUIET_INSTALL, headers) \ + $(call do_install,include/perf/bpf_perf.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/core.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/cpumap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/threadmap.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evlist.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/evsel.h,$(prefix)/include/perf,644); \ $(call do_install,include/perf/event.h,$(prefix)/include/perf,644); \ - $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); + $(call do_install,include/perf/mmap.h,$(prefix)/include/perf,644); \ + $(call do_install,include/internal/cpumap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evlist.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/evsel.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/lib.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/mmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/threadmap.h,$(prefix)/include/internal,644); \ + $(call do_install,include/internal/xyarray.h,$(prefix)/include/internal,644); install_pkgconfig: $(LIBPERF_PC) $(call QUIET_INSTALL, $(LIBPERF_PC)) \ diff --git a/tools/lib/perf/include/perf/cpumap.h b/tools/lib/perf/include/perf/cpumap.h index 03aceb72a783..3f43f770cdac 100644 --- a/tools/lib/perf/include/perf/cpumap.h +++ b/tools/lib/perf/include/perf/cpumap.h @@ -3,7 +3,6 @@ #define __LIBPERF_CPUMAP_H #include <perf/core.h> -#include <perf/cpumap.h> #include <stdio.h> #include <stdbool.h> @@ -12,6 +11,8 @@ struct perf_cpu { int cpu; }; +struct perf_cpu_map; + LIBPERF_API struct perf_cpu_map *perf_cpu_map__dummy_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__default_new(void); LIBPERF_API struct perf_cpu_map *perf_cpu_map__new(const char *cpu_list); diff --git a/tools/lib/subcmd/Makefile b/tools/lib/subcmd/Makefile index 8f1a09cdfd17..e96566f8991c 100644 --- a/tools/lib/subcmd/Makefile +++ b/tools/lib/subcmd/Makefile @@ -17,6 +17,15 @@ RM = rm -f MAKEFLAGS += --no-print-directory +INSTALL = install + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + LIBFILE = $(OUTPUT)libsubcmd.a CFLAGS := -ggdb3 -Wall -Wextra -std=gnu99 -fPIC @@ -48,6 +57,18 @@ CFLAGS += $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) SUBCMD_IN := $(OUTPUT)libsubcmd-in.o +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + all: export srctree OUTPUT CC LD CFLAGS V @@ -61,6 +82,34 @@ $(SUBCMD_IN): FORCE $(LIBFILE): $(SUBCMD_IN) $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SUBCMD_IN) +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,exec-cmd.h,$(prefix)/include/subcmd,644); \ + $(call do_install,help.h,$(prefix)/include/subcmd,644); \ + $(call do_install,pager.h,$(prefix)/include/subcmd,644); \ + $(call do_install,parse-options.h,$(prefix)/include/subcmd,644); \ + $(call do_install,run-command.h,$(prefix)/include/subcmd,644); + +install: install_lib install_headers + clean: $(call QUIET_CLEAN, libsubcmd) $(RM) $(LIBFILE); \ find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) diff --git a/tools/lib/symbol/Build b/tools/lib/symbol/Build new file mode 100644 index 000000000000..9b9a9c78d3c9 --- /dev/null +++ b/tools/lib/symbol/Build @@ -0,0 +1 @@ +libsymbol-y += kallsyms.o diff --git a/tools/lib/symbol/Makefile b/tools/lib/symbol/Makefile new file mode 100644 index 000000000000..4c1d6b53032d --- /dev/null +++ b/tools/lib/symbol/Makefile @@ -0,0 +1,115 @@ +# SPDX-License-Identifier: GPL-2.0 +include ../../scripts/Makefile.include +include ../../scripts/utilities.mak # QUIET_CLEAN + +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(CURDIR))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + +CC ?= $(CROSS_COMPILE)gcc +AR ?= $(CROSS_COMPILE)ar +LD ?= $(CROSS_COMPILE)ld + +MAKEFLAGS += --no-print-directory + +INSTALL = install + + +# Use DESTDIR for installing into a different root directory. +# This is useful for building a package. The program will be +# installed in this directory as if it was the root directory. +# Then the build tool can move it later. +DESTDIR ?= +DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' + +LIBFILE = $(OUTPUT)libsymbol.a + +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu11 -U_FORTIFY_SOURCE -fPIC + +ifeq ($(DEBUG),0) +ifeq ($(CC_NO_CLANG), 0) + CFLAGS += -O3 +else + CFLAGS += -O6 +endif +endif + +ifeq ($(DEBUG),0) + CFLAGS += -D_FORTIFY_SOURCE +endif + +# Treat warnings as errors unless directed not to +ifneq ($(WERROR),0) + CFLAGS += -Werror +endif + +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 + +CFLAGS += -I$(srctree)/tools/lib +CFLAGS += -I$(srctree)/tools/include + +RM = rm -f + +SYMBOL_IN := $(OUTPUT)libsymbol-in.o + +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + +prefix ?= +libdir = $(prefix)/$(libdir_relative) + +# Shell quotes +libdir_SQ = $(subst ','\'',$(libdir)) + +all: + +export srctree OUTPUT CC LD CFLAGS V +include $(srctree)/tools/build/Makefile.include +include $(srctree)/tools/scripts/Makefile.include + +all: fixdep $(LIBFILE) + +$(SYMBOL_IN): FORCE + $(MAKE) $(build)=libsymbol V=1 + +$(LIBFILE): $(SYMBOL_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(SYMBOL_IN) + +define do_install_mkdir + if [ ! -d '$(DESTDIR_SQ)$1' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$1'; \ + fi +endef + +define do_install + if [ ! -d '$(DESTDIR_SQ)$2' ]; then \ + $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$2'; \ + fi; \ + $(INSTALL) $1 $(if $3,-m $3,) '$(DESTDIR_SQ)$2' +endef + +install_lib: $(LIBFILE) + $(call QUIET_INSTALL, $(LIBFILE)) \ + $(call do_install_mkdir,$(libdir_SQ)); \ + cp -fpR $(LIBFILE) $(DESTDIR)$(libdir_SQ) + +install_headers: + $(call QUIET_INSTALL, headers) \ + $(call do_install,kallsyms.h,$(prefix)/include/symbol,644); + +install: install_lib install_headers + +clean: + $(call QUIET_CLEAN, libsymbol) $(RM) $(LIBFILE); \ + find $(or $(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: + +.PHONY: clean FORCE diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index fd7a6ff9e7aa..05806ecfc33c 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -39,7 +39,12 @@ trace/beauty/generated/ pmu-events/pmu-events.c pmu-events/jevents feature/ +libapi/ libbpf/ +libperf/ +libsubcmd/ +libsymbol/ +libtraceevent/ +libtraceevent_plugins/ fixdep -libtraceevent-dynamic-list Documentation/doc.dep diff --git a/tools/perf/Documentation/perf-annotate.txt b/tools/perf/Documentation/perf-annotate.txt index 18fcc52809fb..980fe2c29275 100644 --- a/tools/perf/Documentation/perf-annotate.txt +++ b/tools/perf/Documentation/perf-annotate.txt @@ -41,7 +41,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index be65bd55ab2a..f3067a4af294 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -75,7 +75,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -f:: --force:: diff --git a/tools/perf/Documentation/perf-intel-pt.txt b/tools/perf/Documentation/perf-intel-pt.txt index 92464a5d7eaf..7b6ccd2fa3bf 100644 --- a/tools/perf/Documentation/perf-intel-pt.txt +++ b/tools/perf/Documentation/perf-intel-pt.txt @@ -189,8 +189,16 @@ There is also script intel-pt-events.py which provides an example of how to unpack the raw data for power events and PTWRITE. The script also displays branches, and supports 2 additional modes selected by option: - --insn-trace - instruction trace - --src-trace - source trace + - --insn-trace - instruction trace + - --src-trace - source trace + +The intel-pt-events.py script also has options: + + - --all-switch-events - display all switch events, not only the last consecutive. + - --interleave [<n>] - interleave sample output for the same timestamp so that + no more than n samples for a CPU are displayed in a row. 'n' defaults to 4. + Note this only affects the order of output, and only when the timestamp is the + same. As mentioned above, it is easy to capture too much data. One way to limit the data captured is to use 'snapshot' mode which is explained further below. diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 57384a97c04f..44a819af573d 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -39,9 +39,9 @@ any extra expressions computed by perf stat. --deprecated:: Print deprecated events. By default the deprecated events are hidden. ---cputype:: -Print events applying cpu with this type for hybrid platform -(e.g. --cputype core or --cputype atom) +--unit:: +Print PMU events and metrics limited to the specific PMU name. +(e.g. --unit cpu, --unit msr, --unit cpu_core, --unit cpu_atom) [[EVENT_MODIFIERS]] EVENT MODIFIERS diff --git a/tools/perf/Documentation/perf-lock.txt b/tools/perf/Documentation/perf-lock.txt index 3b1e16563b79..4958a1ffa1cc 100644 --- a/tools/perf/Documentation/perf-lock.txt +++ b/tools/perf/Documentation/perf-lock.txt @@ -42,7 +42,7 @@ COMMON OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -D:: --dump-raw-trace:: diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index 080981d38d7b..7f8e8ba3a787 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -57,7 +57,7 @@ OPTIONS -q:: --quiet:: - Be quiet (do not show any messages including errors). + Do not show any warnings or messages. Can not use with -v. -a:: diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index e41ae950fdc3..9ea6d44aca58 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -282,7 +282,7 @@ OPTIONS -q:: --quiet:: - Don't print any message, useful for scripting. + Don't print any warnings or messages, useful for scripting. -v:: --verbose:: diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index 4533db2ee56b..4fa509b15948 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -27,7 +27,7 @@ OPTIONS -q:: --quiet:: - Do not show any message. (Suppress -v) + Do not show any warnings or messages. (Suppress -v) -n:: --show-nr-samples:: diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index d7ff1867feda..18abdc1dce05 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -354,8 +354,8 @@ forbids the event merging logic from sharing events between groups and may be used to increase accuracy in this case. --quiet:: -Don't print output. This is useful with perf stat record below to only -write data to the perf.data file. +Don't print output, warnings or messages. This is useful with perf stat +record below to only write data to the perf.data file. STAT RECORD ----------- diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index f5d72f936a6b..c8e8e05b4ff1 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -13,8 +13,7 @@ tools/lib/ctype.c tools/lib/hweight.c tools/lib/rbtree.c tools/lib/string.c -tools/lib/symbol/kallsyms.c -tools/lib/symbol/kallsyms.h +tools/lib/symbol tools/lib/find_bit.c tools/lib/bitmap.c tools/lib/list_sort.c diff --git a/tools/perf/Makefile.config b/tools/perf/Makefile.config index 898226ea8cad..9cc3c48f3288 100644 --- a/tools/perf/Makefile.config +++ b/tools/perf/Makefile.config @@ -307,7 +307,7 @@ CORE_CFLAGS += -ggdb3 CORE_CFLAGS += -funwind-tables CORE_CFLAGS += -Wall CORE_CFLAGS += -Wextra -CORE_CFLAGS += -std=gnu99 +CORE_CFLAGS += -std=gnu11 CXXFLAGS += -std=gnu++14 -fno-exceptions -fno-rtti CXXFLAGS += -Wall @@ -349,7 +349,6 @@ ifeq ($(DEBUG),0) endif endif -INC_FLAGS += -I$(srctree)/tools/lib/perf/include INC_FLAGS += -I$(src-perf)/util/include INC_FLAGS += -I$(src-perf)/arch/$(SRCARCH)/include INC_FLAGS += -I$(srctree)/tools/include/ @@ -367,7 +366,6 @@ endif INC_FLAGS += -I$(src-perf)/util INC_FLAGS += -I$(src-perf) -INC_FLAGS += -I$(srctree)/tools/lib/ CORE_CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE @@ -1216,7 +1214,7 @@ endif # Among the variables below, these: # perfexecdir -# perf_include_dir +# libbpf_include_dir # perf_examples_dir # template_dir # mandir @@ -1239,7 +1237,8 @@ includedir = $(abspath $(prefix)/$(includedir_relative)) mandir = share/man infodir = share/info perfexecdir = libexec/perf-core -perf_include_dir = lib/perf/include +# FIXME: system's libbpf header directory, where we expect to find bpf/bpf_helpers.h, for instance +libbpf_include_dir = /usr/include perf_examples_dir = lib/perf/examples sharedir = $(prefix)/share template_dir = share/perf-core/templates @@ -1272,7 +1271,7 @@ includedir_SQ = $(subst ','\'',$(includedir)) mandir_SQ = $(subst ','\'',$(mandir)) infodir_SQ = $(subst ','\'',$(infodir)) perfexecdir_SQ = $(subst ','\'',$(perfexecdir)) -perf_include_dir_SQ = $(subst ','\'',$(perf_include_dir)) +libbpf_include_dir_SQ = $(subst ','\'',$(libbpf_include_dir)) perf_examples_dir_SQ = $(subst ','\'',$(perf_examples_dir)) template_dir_SQ = $(subst ','\'',$(template_dir)) htmldir_SQ = $(subst ','\'',$(htmldir)) @@ -1284,13 +1283,13 @@ srcdir_SQ = $(subst ','\'',$(srcdir)) ifneq ($(filter /%,$(firstword $(perfexecdir))),) perfexec_instdir = $(perfexecdir) -perf_include_instdir = $(perf_include_dir) +perf_include_instdir = $(libbpf_include_dir) perf_examples_instdir = $(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(STRACE_GROUPS_DIR) tip_instdir = $(tipdir) else perfexec_instdir = $(prefix)/$(perfexecdir) -perf_include_instdir = $(prefix)/$(perf_include_dir) +perf_include_instdir = $(prefix)/$(libbpf_include_dir) perf_examples_instdir = $(prefix)/$(perf_examples_dir) STRACE_GROUPS_INSTDIR = $(prefix)/$(STRACE_GROUPS_DIR) tip_instdir = $(prefix)/$(tipdir) @@ -1352,7 +1351,7 @@ $(call detected_var,ETC_PERFCONFIG_SQ) $(call detected_var,STRACE_GROUPS_DIR_SQ) $(call detected_var,prefix_SQ) $(call detected_var,perfexecdir_SQ) -$(call detected_var,perf_include_dir_SQ) +$(call detected_var,libbpf_include_dir_SQ) $(call detected_var,perf_examples_dir_SQ) $(call detected_var,tipdir_SQ) $(call detected_var,srcdir_SQ) diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index a432e59afc42..8c0df762fb02 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -241,10 +241,12 @@ sub-make: fixdep else # force_fixdep -LIB_DIR = $(srctree)/tools/lib/api/ -TRACE_EVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBAPI_DIR = $(srctree)/tools/lib/api/ +LIBTRACEEVENT_DIR = $(srctree)/tools/lib/traceevent/ +LIBTRACEEVENT_PLUGINS_DIR = $(LIBTRACEEVENT_DIR)/plugins LIBBPF_DIR = $(srctree)/tools/lib/bpf/ -SUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSUBCMD_DIR = $(srctree)/tools/lib/subcmd/ +LIBSYMBOL_DIR = $(srctree)/tools/lib/symbol/ LIBPERF_DIR = $(srctree)/tools/lib/perf/ DOC_DIR = $(srctree)/tools/perf/Documentation/ @@ -292,26 +294,18 @@ grep-libs = $(filter -l%,$(1)) strip-libs = $(filter-out -l%,$(1)) ifneq ($(OUTPUT),) - TE_PATH=$(OUTPUT) - PLUGINS_PATH=$(OUTPUT) - SUBCMD_PATH=$(OUTPUT) - LIBPERF_PATH=$(OUTPUT) -ifneq ($(subdir),) - API_PATH=$(OUTPUT)/../lib/api/ + LIBTRACEEVENT_OUTPUT = $(abspath $(OUTPUT))/libtraceevent else - API_PATH=$(OUTPUT) + LIBTRACEEVENT_OUTPUT = $(CURDIR)/libtraceevent endif -else - TE_PATH=$(TRACE_EVENT_DIR) - PLUGINS_PATH=$(TRACE_EVENT_DIR)plugins/ - API_PATH=$(LIB_DIR) - SUBCMD_PATH=$(SUBCMD_DIR) - LIBPERF_PATH=$(LIBPERF_DIR) -endif - -LIBTRACEEVENT = $(TE_PATH)libtraceevent.a +LIBTRACEEVENT_PLUGINS_OUTPUT = $(LIBTRACEEVENT_OUTPUT)_plugins +LIBTRACEEVENT_DESTDIR = $(LIBTRACEEVENT_OUTPUT) +LIBTRACEEVENT_PLUGINS_DESTDIR = $(LIBTRACEEVENT_PLUGINS_OUTPUT) +LIBTRACEEVENT_INCLUDE = $(LIBTRACEEVENT_DESTDIR)/include +LIBTRACEEVENT = $(LIBTRACEEVENT_OUTPUT)/libtraceevent.a export LIBTRACEEVENT -LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST = $(LIBTRACEEVENT_PLUGINS_OUTPUT)/libtraceevent-dynamic-list +CFLAGS += -I$(LIBTRACEEVENT_OUTPUT)/include # # The static build has no dynsym table, so this does not work for @@ -320,8 +314,16 @@ LIBTRACEEVENT_DYNAMIC_LIST = $(PLUGINS_PATH)libtraceevent-dynamic-list DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = $(if $(findstring -static,$(LDFLAGS)),,$(DYNAMIC_LIST_LDFLAGS)) -LIBAPI = $(API_PATH)libapi.a +ifneq ($(OUTPUT),) + LIBAPI_OUTPUT = $(abspath $(OUTPUT))/libapi +else + LIBAPI_OUTPUT = $(CURDIR)/libapi +endif +LIBAPI_DESTDIR = $(LIBAPI_OUTPUT) +LIBAPI_INCLUDE = $(LIBAPI_DESTDIR)/include +LIBAPI = $(LIBAPI_OUTPUT)/libapi.a export LIBAPI +CFLAGS += -I$(LIBAPI_OUTPUT)/include ifneq ($(OUTPUT),) LIBBPF_OUTPUT = $(abspath $(OUTPUT))/libbpf @@ -331,11 +333,38 @@ endif LIBBPF_DESTDIR = $(LIBBPF_OUTPUT) LIBBPF_INCLUDE = $(LIBBPF_DESTDIR)/include LIBBPF = $(LIBBPF_OUTPUT)/libbpf.a +CFLAGS += -I$(LIBBPF_OUTPUT)/include -LIBSUBCMD = $(SUBCMD_PATH)libsubcmd.a +ifneq ($(OUTPUT),) + LIBSUBCMD_OUTPUT = $(abspath $(OUTPUT))/libsubcmd +else + LIBSUBCMD_OUTPUT = $(CURDIR)/libsubcmd +endif +LIBSUBCMD_DESTDIR = $(LIBSUBCMD_OUTPUT) +LIBSUBCMD_INCLUDE = $(LIBSUBCMD_DESTDIR)/include +LIBSUBCMD = $(LIBSUBCMD_OUTPUT)/libsubcmd.a +CFLAGS += -I$(LIBSUBCMD_OUTPUT)/include -LIBPERF = $(LIBPERF_PATH)libperf.a +ifneq ($(OUTPUT),) + LIBSYMBOL_OUTPUT = $(abspath $(OUTPUT))/libsymbol +else + LIBSYMBOL_OUTPUT = $(CURDIR)/libsymbol +endif +LIBSYMBOL_DESTDIR = $(LIBSYMBOL_OUTPUT) +LIBSYMBOL_INCLUDE = $(LIBSYMBOL_DESTDIR)/include +LIBSYMBOL = $(LIBSYMBOL_OUTPUT)/libsymbol.a +CFLAGS += -I$(LIBSYMBOL_OUTPUT)/include + +ifneq ($(OUTPUT),) + LIBPERF_OUTPUT = $(abspath $(OUTPUT))/libperf +else + LIBPERF_OUTPUT = $(CURDIR)/libperf +endif +LIBPERF_DESTDIR = $(LIBPERF_OUTPUT) +LIBPERF_INCLUDE = $(LIBPERF_DESTDIR)/include +LIBPERF = $(LIBPERF_OUTPUT)/libperf.a export LIBPERF +CFLAGS += -I$(LIBPERF_OUTPUT)/include # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -385,7 +414,7 @@ endif export PERL_PATH -PERFLIBS = $(LIBAPI) $(LIBSUBCMD) $(LIBPERF) +PERFLIBS = $(LIBAPI) $(LIBPERF) $(LIBSUBCMD) $(LIBSYMBOL) ifndef NO_LIBBPF ifndef LIBBPF_DYNAMIC PERFLIBS += $(LIBBPF) @@ -668,14 +697,14 @@ build := -f $(srctree)/tools/build/Makefile.build dir=. obj $(PERF_IN): prepare FORCE $(Q)$(MAKE) $(build)=perf -$(PMU_EVENTS_IN): FORCE +$(PMU_EVENTS_IN): FORCE prepare $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=pmu-events obj=pmu-events $(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(PMU_EVENTS_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ $(PERF_IN) $(PMU_EVENTS_IN) $(LIBS) -o $@ -$(GTK_IN): FORCE +$(GTK_IN): FORCE prepare $(Q)$(MAKE) $(build)=gtk $(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) @@ -751,6 +780,12 @@ prepare: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h archheaders $(drm_ioc $(rename_flags_array) \ $(arch_errno_name_array) \ $(sync_file_range_arrays) \ + $(LIBAPI) \ + $(LIBBPF) \ + $(LIBPERF) \ + $(LIBSUBCMD) \ + $(LIBSYMBOL) \ + $(LIBTRACEEVENT) \ bpf-skel $(OUTPUT)%.o: %.c prepare FORCE @@ -810,28 +845,42 @@ $(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) 'EXTRA_CFLAGS=$(EXTRA_CFLAGS)' 'LDFLAGS=$(filter-out -static,$(LDFLAGS))' -$(LIBTRACEEVENT): FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a - -libtraceevent_plugins: FORCE - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins - -$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR)plugins $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list +$(LIBTRACEEVENT): FORCE | $(LIBTRACEEVENT_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_DIR) O=$(LIBTRACEEVENT_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_DESTDIR) prefix= \ + $@ install_headers $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBTRACEEVENT_OUTPUT) + +libtraceevent_plugins: FORCE | $(LIBTRACEEVENT_PLUGINS_OUTPUT) + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + plugins + +libtraceevent_plugins-clean: + $(call QUIET_CLEAN, libtraceevent_plugins) + $(Q)$(RM) -r -- $(LIBTRACEEVENT_PLUGINS_OUTPUT) + +$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) $@ install-traceevent-plugins: libtraceevent_plugins - $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins + $(Q)$(MAKE) -C $(LIBTRACEEVENT_PLUGINS_DIR) O=$(LIBTRACEEVENT_PLUGINS_OUTPUT) \ + DESTDIR=$(LIBTRACEEVENT_PLUGINS_DESTDIR) prefix= \ + $(LIBTRACEEVENT_FLAGS) install -$(LIBAPI): FORCE - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a +$(LIBAPI): FORCE | $(LIBAPI_OUTPUT) + $(Q)$(MAKE) -C $(LIBAPI_DIR) O=$(LIBAPI_OUTPUT) \ + DESTDIR=$(LIBAPI_DESTDIR) prefix= \ + $@ install_headers $(LIBAPI)-clean: $(call QUIET_CLEAN, libapi) - $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBAPI_OUTPUT) $(LIBBPF): FORCE | $(LIBBPF_OUTPUT) $(Q)$(MAKE) -C $(LIBBPF_DIR) FEATURES_DUMP=$(FEATURE_DUMP_EXPORT) \ @@ -842,18 +891,32 @@ $(LIBBPF)-clean: $(call QUIET_CLEAN, libbpf) $(Q)$(RM) -r -- $(LIBBPF_OUTPUT) -$(LIBPERF): FORCE - $(Q)$(MAKE) -C $(LIBPERF_DIR) EXTRA_CFLAGS="$(LIBPERF_CFLAGS)" O=$(OUTPUT) $(OUTPUT)libperf.a +$(LIBPERF): FORCE | $(LIBPERF_OUTPUT) + $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(LIBPERF_OUTPUT) \ + DESTDIR=$(LIBPERF_DESTDIR) prefix= \ + $@ install_headers $(LIBPERF)-clean: $(call QUIET_CLEAN, libperf) - $(Q)$(MAKE) -C $(LIBPERF_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(RM) -r -- $(LIBPERF_OUTPUT) -$(LIBSUBCMD): FORCE - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) $(OUTPUT)libsubcmd.a +$(LIBSUBCMD): FORCE | $(LIBSUBCMD_OUTPUT) + $(Q)$(MAKE) -C $(LIBSUBCMD_DIR) O=$(LIBSUBCMD_OUTPUT) \ + DESTDIR=$(LIBSUBCMD_DESTDIR) prefix= \ + $@ install_headers $(LIBSUBCMD)-clean: - $(Q)$(MAKE) -C $(SUBCMD_DIR) O=$(OUTPUT) clean + $(call QUIET_CLEAN, libsubcmd) + $(Q)$(RM) -r -- $(LIBSUBCMD_OUTPUT) + +$(LIBSYMBOL): FORCE | $(LIBSYMBOL_OUTPUT) + $(Q)$(MAKE) -C $(LIBSYMBOL_DIR) O=$(LIBSYMBOL_OUTPUT) \ + DESTDIR=$(LIBSYMBOL_DESTDIR) prefix= \ + $@ install_headers + +$(LIBSYMBOL)-clean: + $(call QUIET_CLEAN, libsymbol) + $(Q)$(RM) -r -- $(LIBSYMBOL_OUTPUT) help: @echo 'Perf make targets:' @@ -960,11 +1023,6 @@ endif $(call QUIET_INSTALL, libexec) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)' ifndef NO_LIBBPF - $(call QUIET_INSTALL, bpf-headers) \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux'; \ - $(INSTALL) include/bpf/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf'; \ - $(INSTALL) include/bpf/linux/*.h -m 644 -t '$(DESTDIR_SQ)$(perf_include_instdir_SQ)/bpf/linux' $(call QUIET_INSTALL, bpf-examples) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf'; \ $(INSTALL) examples/bpf/*.c -m 644 -t '$(DESTDIR_SQ)$(perf_examples_instdir_SQ)/bpf' @@ -1044,9 +1102,14 @@ SKELETONS += $(SKEL_OUT)/bperf_cgroup.skel.h $(SKEL_OUT)/func_latency.skel.h SKELETONS += $(SKEL_OUT)/off_cpu.skel.h $(SKEL_OUT)/lock_contention.skel.h SKELETONS += $(SKEL_OUT)/kwork_trace.skel.h -$(SKEL_TMP_OUT) $(LIBBPF_OUTPUT): +$(SKEL_TMP_OUT) $(LIBAPI_OUTPUT) $(LIBBPF_OUTPUT) $(LIBPERF_OUTPUT) $(LIBSUBCMD_OUTPUT) $(LIBSYMBOL_OUTPUT): $(Q)$(MKDIR) -p $@ +ifndef LIBTRACEEVENT_DYNAMIC +$(LIBTRACEEVENT_OUTPUT) $(LIBTRACEEVENT_PLUGINS_OUTPUT): + $(Q)$(MKDIR) -p $@ +endif + ifdef BUILD_BPF_SKEL BPFTOOL := $(SKEL_TMP_OUT)/bootstrap/bpftool BPF_INCLUDE := -I$(SKEL_TMP_OUT)/.. -I$(LIBBPF_INCLUDE) @@ -1089,7 +1152,7 @@ endif # BUILD_BPF_SKEL bpf-skel-clean: $(call QUIET_CLEAN, bpf-skel) $(RM) -r $(SKEL_TMP_OUT) $(SKELETONS) -clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean +clean:: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean $(LIBBPF)-clean $(LIBSUBCMD)-clean $(LIBSYMBOL)-clean $(LIBPERF)-clean fixdep-clean python-clean bpf-skel-clean tests-coresight-targets-clean libtraceevent_plugins-clean $(call QUIET_CLEAN, core-objs) $(RM) $(LIBPERF_A) $(OUTPUT)perf-archive $(OUTPUT)perf-iostat $(LANG_BINDINGS) $(Q)find $(or $(OUTPUT),.) -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete $(Q)$(RM) $(OUTPUT).config-detected diff --git a/tools/perf/arch/arm/util/unwind-libdw.c b/tools/perf/arch/arm/util/unwind-libdw.c index b7692cb0c733..1834a0cd9ce3 100644 --- a/tools/perf/arch/arm/util/unwind-libdw.c +++ b/tools/perf/arch/arm/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include <elfutils/libdwfl.h> #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/arm64/util/machine.c b/tools/perf/arch/arm64/util/machine.c index 41c1596e5207..235a0a1e1ec7 100644 --- a/tools/perf/arch/arm64/util/machine.c +++ b/tools/perf/arch/arm64/util/machine.c @@ -7,6 +7,7 @@ #include "symbol.h" #include "callchain.h" #include "record.h" +#include "util/perf_regs.h" void arch__add_leaf_frame_record_opts(struct record_opts *opts) { diff --git a/tools/perf/arch/arm64/util/pmu.c b/tools/perf/arch/arm64/util/pmu.c index f849b1e88d43..477e513972a4 100644 --- a/tools/perf/arch/arm64/util/pmu.c +++ b/tools/perf/arch/arm64/util/pmu.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 +#include <internal/cpumap.h> #include "../../../util/cpumap.h" #include "../../../util/pmu.h" diff --git a/tools/perf/arch/arm64/util/unwind-libdw.c b/tools/perf/arch/arm64/util/unwind-libdw.c index a50941629649..09385081bb03 100644 --- a/tools/perf/arch/arm64/util/unwind-libdw.c +++ b/tools/perf/arch/arm64/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include <elfutils/libdwfl.h> #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/arch/powerpc/util/event.c b/tools/perf/arch/powerpc/util/event.c index cf430a4c55b9..77d8cc2b5691 100644 --- a/tools/perf/arch/powerpc/util/event.c +++ b/tools/perf/arch/powerpc/util/event.c @@ -9,6 +9,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "../../../util/sample.h" void arch_perf_parse_sample_weight(struct perf_sample *data, const __u64 *array, u64 type) diff --git a/tools/perf/arch/powerpc/util/unwind-libdw.c b/tools/perf/arch/powerpc/util/unwind-libdw.c index 7b2d96ec28e3..e616642c754c 100644 --- a/tools/perf/arch/powerpc/util/unwind-libdw.c +++ b/tools/perf/arch/powerpc/util/unwind-libdw.c @@ -3,7 +3,7 @@ #include <linux/kernel.h> #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "../../../util/sample.h" /* See backends/ppc_initreg.c and backends/ppc_regs.c in elfutils. */ static const int special_regs[3][2] = { diff --git a/tools/perf/arch/riscv/util/Build b/tools/perf/arch/riscv/util/Build index 7d3050134ae0..603dbb5ae4dc 100644 --- a/tools/perf/arch/riscv/util/Build +++ b/tools/perf/arch/riscv/util/Build @@ -1,4 +1,5 @@ perf-y += perf_regs.o +perf-y += header.o perf-$(CONFIG_DWARF) += dwarf-regs.o perf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/riscv/util/header.c b/tools/perf/arch/riscv/util/header.c new file mode 100644 index 000000000000..4a41856938a8 --- /dev/null +++ b/tools/perf/arch/riscv/util/header.c @@ -0,0 +1,104 @@ +// SPDX-License-Identifier: GPL-2.0-only +/* + * Implementation of get_cpuid(). + * + * Author: Nikita Shubin <[email protected]> + */ + +#include <stdio.h> +#include <stdlib.h> +#include <api/fs/fs.h> +#include <errno.h> +#include "../../util/debug.h" +#include "../../util/header.h" + +#define CPUINFO_MVEN "mvendorid" +#define CPUINFO_MARCH "marchid" +#define CPUINFO_MIMP "mimpid" +#define CPUINFO "/proc/cpuinfo" + +static char *_get_field(const char *line) +{ + char *line2, *nl; + + line2 = strrchr(line, ' '); + if (!line2) + return NULL; + + line2++; + nl = strrchr(line, '\n'); + if (!nl) + return NULL; + + return strndup(line2, nl - line2); +} + +static char *_get_cpuid(void) +{ + char *line = NULL; + char *mvendorid = NULL; + char *marchid = NULL; + char *mimpid = NULL; + char *cpuid = NULL; + int read; + unsigned long line_sz; + FILE *cpuinfo; + + cpuinfo = fopen(CPUINFO, "r"); + if (cpuinfo == NULL) + return cpuid; + + while ((read = getline(&line, &line_sz, cpuinfo)) != -1) { + if (!strncmp(line, CPUINFO_MVEN, strlen(CPUINFO_MVEN))) { + mvendorid = _get_field(line); + if (!mvendorid) + goto free; + } else if (!strncmp(line, CPUINFO_MARCH, strlen(CPUINFO_MARCH))) { + marchid = _get_field(line); + if (!marchid) + goto free; + } else if (!strncmp(line, CPUINFO_MIMP, strlen(CPUINFO_MIMP))) { + mimpid = _get_field(line); + if (!mimpid) + goto free; + + break; + } + } + + if (!mvendorid || !marchid || !mimpid) + goto free; + + if (asprintf(&cpuid, "%s-%s-%s", mvendorid, marchid, mimpid) < 0) + cpuid = NULL; + +free: + fclose(cpuinfo); + free(mvendorid); + free(marchid); + free(mimpid); + + return cpuid; +} + +int get_cpuid(char *buffer, size_t sz) +{ + char *cpuid = _get_cpuid(); + int ret = 0; + + if (sz < strlen(cpuid)) { + ret = -EINVAL; + goto free; + } + + scnprintf(buffer, sz, "%s", cpuid); +free: + free(cpuid); + return ret; +} + +char * +get_cpuid_str(struct perf_pmu *pmu __maybe_unused) +{ + return _get_cpuid(); +} diff --git a/tools/perf/arch/s390/util/unwind-libdw.c b/tools/perf/arch/s390/util/unwind-libdw.c index 387c698cdd1b..7d92452d5287 100644 --- a/tools/perf/arch/s390/util/unwind-libdw.c +++ b/tools/perf/arch/s390/util/unwind-libdw.c @@ -3,6 +3,7 @@ #include "../../util/unwind-libdw.h" #include "../../util/perf_regs.h" #include "../../util/event.h" +#include "../../util/sample.h" #include "dwarf-regs-table.h" diff --git a/tools/perf/arch/x86/include/arch-tests.h b/tools/perf/arch/x86/include/arch-tests.h index 6a1a1b3c0827..902e9ea9b99e 100644 --- a/tools/perf/arch/x86/include/arch-tests.h +++ b/tools/perf/arch/x86/include/arch-tests.h @@ -8,6 +8,7 @@ struct test_suite; int test__rdpmc(struct test_suite *test, int subtest); int test__insn_x86(struct test_suite *test, int subtest); int test__intel_pt_pkt_decoder(struct test_suite *test, int subtest); +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest); int test__bp_modify(struct test_suite *test, int subtest); int test__x86_sample_parsing(struct test_suite *test, int subtest); diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build index 70b5bcbc15df..6f4e8636c3bf 100644 --- a/tools/perf/arch/x86/tests/Build +++ b/tools/perf/arch/x86/tests/Build @@ -3,5 +3,5 @@ perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o perf-y += arch-tests.o perf-y += sample-parsing.o -perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-pkt-decoder-test.o +perf-$(CONFIG_AUXTRACE) += insn-x86.o intel-pt-test.o perf-$(CONFIG_X86_64) += bp-modify.o diff --git a/tools/perf/arch/x86/tests/arch-tests.c b/tools/perf/arch/x86/tests/arch-tests.c index 04018b8aa85b..aae6ea0fe52b 100644 --- a/tools/perf/arch/x86/tests/arch-tests.c +++ b/tools/perf/arch/x86/tests/arch-tests.c @@ -5,7 +5,18 @@ #ifdef HAVE_AUXTRACE_SUPPORT DEFINE_SUITE("x86 instruction decoder - new instructions", insn_x86); -DEFINE_SUITE("Intel PT packet decoder", intel_pt_pkt_decoder); + +static struct test_case intel_pt_tests[] = { + TEST_CASE("Intel PT packet decoder", intel_pt_pkt_decoder), + TEST_CASE("Intel PT hybrid CPU compatibility", intel_pt_hybrid_compat), + { .name = NULL, } +}; + +struct test_suite suite__intel_pt = { + .desc = "Intel PT", + .test_cases = intel_pt_tests, +}; + #endif #if defined(__x86_64__) DEFINE_SUITE("x86 bp modify", bp_modify); @@ -18,7 +29,7 @@ struct test_suite *arch_tests[] = { #endif #ifdef HAVE_AUXTRACE_SUPPORT &suite__insn_x86, - &suite__intel_pt_pkt_decoder, + &suite__intel_pt, #endif #if defined(__x86_64__) &suite__bp_modify, diff --git a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c b/tools/perf/arch/x86/tests/intel-pt-test.c index 42237656f453..70b7f79396b1 100644 --- a/tools/perf/arch/x86/tests/intel-pt-pkt-decoder-test.c +++ b/tools/perf/arch/x86/tests/intel-pt-test.c @@ -1,12 +1,17 @@ // SPDX-License-Identifier: GPL-2.0 +#include <linux/compiler.h> +#include <linux/bits.h> #include <string.h> +#include <cpuid.h> +#include <sched.h> #include "intel-pt-decoder/intel-pt-pkt-decoder.h" #include "debug.h" #include "tests/tests.h" #include "arch-tests.h" +#include "cpumap.h" /** * struct test_data - Test data. @@ -313,3 +318,152 @@ int test__intel_pt_pkt_decoder(struct test_suite *test __maybe_unused, int subte return TEST_OK; } + +static int setaffinity(int cpu) +{ + cpu_set_t cpu_set; + + CPU_ZERO(&cpu_set); + CPU_SET(cpu, &cpu_set); + if (sched_setaffinity(0, sizeof(cpu_set), &cpu_set)) { + pr_debug("sched_setaffinity() failed for CPU %d\n", cpu); + return -1; + } + return 0; +} + +#define INTEL_PT_ADDR_FILT_CNT_MASK GENMASK(2, 0) +#define INTEL_PT_SUBLEAF_CNT 2 +#define CPUID_REG_CNT 4 + +struct cpuid_result { + union { + struct { + unsigned int eax; + unsigned int ebx; + unsigned int ecx; + unsigned int edx; + }; + unsigned int reg[CPUID_REG_CNT]; + }; +}; + +struct pt_caps { + struct cpuid_result subleaf[INTEL_PT_SUBLEAF_CNT]; +}; + +static int get_pt_caps(int cpu, struct pt_caps *caps) +{ + struct cpuid_result r; + int i; + + if (setaffinity(cpu)) + return -1; + + memset(caps, 0, sizeof(*caps)); + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + __get_cpuid_count(20, i, &r.eax, &r.ebx, &r.ecx, &r.edx); + pr_debug("CPU %d CPUID leaf 20 subleaf %d\n", cpu, i); + pr_debug("eax = 0x%08x\n", r.eax); + pr_debug("ebx = 0x%08x\n", r.ebx); + pr_debug("ecx = 0x%08x\n", r.ecx); + pr_debug("edx = 0x%08x\n", r.edx); + caps->subleaf[i] = r; + } + + return 0; +} + +static bool is_hydrid(void) +{ + unsigned int eax, ebx, ecx, edx = 0; + bool result; + + __get_cpuid_count(7, 0, &eax, &ebx, &ecx, &edx); + result = edx & BIT(15); + pr_debug("Is %shybrid : CPUID leaf 7 subleaf 0 edx %#x (bit-15 indicates hybrid)\n", + result ? "" : "not ", edx); + return result; +} + +static int compare_caps(int cpu, struct pt_caps *caps, struct pt_caps *caps0) +{ + struct pt_caps mask = { /* Mask of bits to check*/ + .subleaf = { + [0] = { + .ebx = GENMASK(8, 0), + .ecx = GENMASK(3, 0), + }, + [1] = { + .eax = GENMASK(31, 16), + .ebx = GENMASK(31, 0), + } + } + }; + unsigned int m, reg, reg0; + int ret = 0; + int i, j; + + for (i = 0; i < INTEL_PT_SUBLEAF_CNT; i++) { + for (j = 0; j < CPUID_REG_CNT; j++) { + m = mask.subleaf[i].reg[j]; + reg = m & caps->subleaf[i].reg[j]; + reg0 = m & caps0->subleaf[i].reg[j]; + if ((reg & reg0) != reg0) { + pr_debug("CPU %d subleaf %d reg %d FAIL %#x vs %#x\n", + cpu, i, j, reg, reg0); + ret = -1; + } + } + } + + m = INTEL_PT_ADDR_FILT_CNT_MASK; + reg = m & caps->subleaf[1].eax; + reg0 = m & caps0->subleaf[1].eax; + if (reg < reg0) { + pr_debug("CPU %d subleaf 1 reg 0 FAIL address filter count %#x vs %#x\n", + cpu, reg, reg0); + ret = -1; + } + + if (!ret) + pr_debug("CPU %d OK\n", cpu); + + return ret; +} + +int test__intel_pt_hybrid_compat(struct test_suite *test, int subtest) +{ + int max_cpu = cpu__max_cpu().cpu; + struct pt_caps last_caps; + struct pt_caps caps0; + int ret = TEST_OK; + int cpu; + + if (!is_hydrid()) { + test->test_cases[subtest].skip_reason = "not hybrid"; + return TEST_SKIP; + } + + if (get_pt_caps(0, &caps0)) + return TEST_FAIL; + + for (cpu = 1, last_caps = caps0; cpu < max_cpu; cpu++) { + struct pt_caps caps; + + if (get_pt_caps(cpu, &caps)) { + pr_debug("CPU %d not found\n", cpu); + continue; + } + if (!memcmp(&caps, &last_caps, sizeof(caps))) { + pr_debug("CPU %d same caps as previous CPU\n", cpu); + continue; + } + if (compare_caps(cpu, &caps, &caps0)) + ret = TEST_FAIL; + last_caps = caps; + } + + return ret; +} diff --git a/tools/perf/arch/x86/tests/sample-parsing.c b/tools/perf/arch/x86/tests/sample-parsing.c index bfbd3662b69e..690c7c07e90d 100644 --- a/tools/perf/arch/x86/tests/sample-parsing.c +++ b/tools/perf/arch/x86/tests/sample-parsing.c @@ -10,6 +10,7 @@ #include "event.h" #include "evsel.h" #include "debug.h" +#include "util/sample.h" #include "util/synthetic-events.h" #include "tests/tests.h" diff --git a/tools/perf/arch/x86/util/event.c b/tools/perf/arch/x86/util/event.c index e670f3547581..a3acefe6d0c6 100644 --- a/tools/perf/arch/x86/util/event.c +++ b/tools/perf/arch/x86/util/event.c @@ -2,6 +2,7 @@ #include <linux/types.h> #include <linux/string.h> #include <linux/zalloc.h> +#include <stdlib.h> #include "../../../util/event.h" #include "../../../util/synthetic-events.h" @@ -9,6 +10,7 @@ #include "../../../util/tool.h" #include "../../../util/map.h" #include "../../../util/debug.h" +#include "util/sample.h" #if defined(__x86_64__) diff --git a/tools/perf/arch/x86/util/tsc.c b/tools/perf/arch/x86/util/tsc.c index eb2b5195bd02..9b99f48b923c 100644 --- a/tools/perf/arch/x86/util/tsc.c +++ b/tools/perf/arch/x86/util/tsc.c @@ -2,6 +2,7 @@ #include <linux/types.h> #include <math.h> #include <string.h> +#include <stdlib.h> #include "../../../util/debug.h" #include "../../../util/tsc.h" diff --git a/tools/perf/arch/x86/util/unwind-libdw.c b/tools/perf/arch/x86/util/unwind-libdw.c index eea2bf87232b..ef71e8bf80bf 100644 --- a/tools/perf/arch/x86/util/unwind-libdw.c +++ b/tools/perf/arch/x86/util/unwind-libdw.c @@ -2,7 +2,7 @@ #include <elfutils/libdwfl.h> #include "../../../util/unwind-libdw.h" #include "../../../util/perf_regs.h" -#include "../../../util/event.h" +#include "util/sample.h" bool libdw__arch_set_initial_registers(Dwfl_Thread *thread, void *arg) { diff --git a/tools/perf/bench/inject-buildid.c b/tools/perf/bench/inject-buildid.c index 17672790f123..4561bda0ce6a 100644 --- a/tools/perf/bench/inject-buildid.c +++ b/tools/perf/bench/inject-buildid.c @@ -19,10 +19,10 @@ #include "util/data.h" #include "util/stat.h" #include "util/debug.h" -#include "util/event.h" #include "util/symbol.h" #include "util/session.h" #include "util/build-id.h" +#include "util/sample.h" #include "util/synthetic-events.h" #define MMAP_DEV_MAJOR 8 diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index e78dedf9e682..9717c6c17433 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -16,6 +16,7 @@ #include <sched.h> #include <stdio.h> #include <assert.h> +#include <debug.h> #include <malloc.h> #include <signal.h> #include <stdlib.h> @@ -116,7 +117,6 @@ struct params { long bytes_thread; int nr_tasks; - bool show_quiet; bool show_convergence; bool measure_convergence; @@ -197,7 +197,8 @@ static const struct option options[] = { OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details, " "convergence is reached when each process (all its threads) is running on a single NUMA node."), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), + OPT_BOOLEAN('q', "quiet" , &quiet, + "quiet mode (do not show any warnings or messages)"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -1474,7 +1475,7 @@ static int init(void) /* char array in count_process_nodes(): */ BUG_ON(g->p.nr_nodes < 0); - if (g->p.show_quiet && !g->p.show_details) + if (quiet && !g->p.show_details) g->p.show_details = -1; /* Some memory should be specified: */ @@ -1553,7 +1554,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (!g->p.show_quiet) + if (!quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index f839e69492e8..517d928c00e3 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -525,7 +525,7 @@ int cmd_annotate(int argc, const char **argv) OPT_BOOLEAN('f', "force", &data.force, "don't complain, do it"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "do now show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "do now show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), #ifdef HAVE_GTK2_SUPPORT diff --git a/tools/perf/builtin-daemon.c b/tools/perf/builtin-daemon.c index 6cb3f6cc36d0..7036ec92d47d 100644 --- a/tools/perf/builtin-daemon.c +++ b/tools/perf/builtin-daemon.c @@ -100,12 +100,12 @@ static struct daemon __daemon = { }; static const char * const daemon_usage[] = { - "perf daemon start [<options>]", + "perf daemon {start|signal|stop|ping} [<options>]", "perf daemon [<options>]", NULL }; -static bool done; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index d925096dd7f0..ed07cc6cca56 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -1260,7 +1260,7 @@ static const char * const diff_usage[] = { static const struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('b', "baseline-only", &show_baseline_only, "Show only items with match in baseline"), OPT_CALLBACK('c', "compute", &compute, diff --git a/tools/perf/builtin-ftrace.c b/tools/perf/builtin-ftrace.c index 7de07bb16d23..d7fe00f66b83 100644 --- a/tools/perf/builtin-ftrace.c +++ b/tools/perf/builtin-ftrace.c @@ -36,8 +36,8 @@ #define DEFAULT_TRACER "function_graph" -static volatile int workload_exec_errno; -static bool done; +static volatile sig_atomic_t workload_exec_errno; +static volatile sig_atomic_t done; static void sig_handler(int sig __maybe_unused) { diff --git a/tools/perf/builtin-kwork.c b/tools/perf/builtin-kwork.c index fb8c63656ad8..4ffbf5908070 100644 --- a/tools/perf/builtin-kwork.c +++ b/tools/perf/builtin-kwork.c @@ -6,10 +6,15 @@ */ #include "builtin.h" +#include "perf.h" #include "util/data.h" +#include "util/evlist.h" +#include "util/evsel.h" +#include "util/header.h" #include "util/kwork.h" #include "util/debug.h" +#include "util/session.h" #include "util/symbol.h" #include "util/thread.h" #include "util/string2.h" @@ -21,6 +26,7 @@ #include <errno.h> #include <inttypes.h> +#include <signal.h> #include <linux/err.h> #include <linux/time64.h> #include <linux/zalloc.h> diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 58e1ec1654ef..cc84ced6da26 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -21,7 +21,6 @@ static bool desc_flag = true; static bool details_flag; -static const char *hybrid_type; int cmd_list(int argc, const char **argv) { @@ -30,6 +29,8 @@ int cmd_list(int argc, const char **argv) bool long_desc_flag = false; bool deprecated = false; char *pmu_name = NULL; + const char *hybrid_name = NULL; + const char *unit_name = NULL; struct option list_options[] = { OPT_BOOLEAN(0, "raw-dump", &raw_dump, "Dump raw events"), OPT_BOOLEAN('d', "desc", &desc_flag, @@ -40,9 +41,10 @@ int cmd_list(int argc, const char **argv) "Print information on the perf event names and expressions used internally by events."), OPT_BOOLEAN(0, "deprecated", &deprecated, "Print deprecated events."), - OPT_STRING(0, "cputype", &hybrid_type, "hybrid cpu type", - "Print events applying cpu with this type for hybrid platform " - "(e.g. core or atom)"), + OPT_STRING(0, "cputype", &hybrid_name, "hybrid cpu type", + "Limit PMU or metric printing to the given hybrid PMU (e.g. core or atom)."), + OPT_STRING(0, "unit", &unit_name, "PMU name", + "Limit PMU or metric printing to the specified PMU."), OPT_INCR(0, "debug", &verbose, "Enable debugging output"), OPT_END() @@ -53,6 +55,8 @@ int cmd_list(int argc, const char **argv) }; set_option_flag(list_options, 0, "raw-dump", PARSE_OPT_HIDDEN); + /* Hide hybrid flag for the more generic 'unit' flag. */ + set_option_flag(list_options, 0, "cputype", PARSE_OPT_HIDDEN); argc = parse_options(argc, argv, list_options, list_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -62,8 +66,10 @@ int cmd_list(int argc, const char **argv) if (!raw_dump && pager_in_use()) printf("\nList of pre-defined events (to be used in -e or -M):\n\n"); - if (hybrid_type) { - pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_type); + if (unit_name) + pmu_name = strdup(unit_name); + else if (hybrid_name) { + pmu_name = perf_pmu__hybrid_type_to_pmu(hybrid_name); if (!pmu_name) pr_warning("WARNING: hybrid cputype is not supported!\n"); } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index 9722d4ab2e55..0d280093b19a 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -24,6 +24,7 @@ #include "util/data.h" #include "util/string2.h" #include "util/map.h" +#include "util/util.h" #include <sys/types.h> #include <sys/prctl.h> @@ -1389,6 +1390,34 @@ static int dump_info(void) return rc; } +static const struct evsel_str_handler lock_tracepoints[] = { + { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ + { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ + { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ +}; + +static const struct evsel_str_handler contention_tracepoints[] = { + { "lock:contention_begin", evsel__process_contention_begin, }, + { "lock:contention_end", evsel__process_contention_end, }, +}; + +static int process_event_update(struct perf_tool *tool, + union perf_event *event, + struct evlist **pevlist) +{ + int ret; + + ret = perf_event__process_event_update(tool, event, pevlist); + if (ret < 0) + return ret; + + /* this can return -EEXIST since we call it for each evsel */ + perf_session__set_tracepoints_handlers(session, lock_tracepoints); + perf_session__set_tracepoints_handlers(session, contention_tracepoints); + return 0; +} + typedef int (*tracepoint_handler)(struct evsel *evsel, struct perf_sample *sample); @@ -1544,28 +1573,19 @@ next: print_bad_events(bad, total); } -static const struct evsel_str_handler lock_tracepoints[] = { - { "lock:lock_acquire", evsel__process_lock_acquire, }, /* CONFIG_LOCKDEP */ - { "lock:lock_acquired", evsel__process_lock_acquired, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_contended", evsel__process_lock_contended, }, /* CONFIG_LOCKDEP, CONFIG_LOCK_STAT */ - { "lock:lock_release", evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ -}; - -static const struct evsel_str_handler contention_tracepoints[] = { - { "lock:contention_begin", evsel__process_contention_begin, }, - { "lock:contention_end", evsel__process_contention_end, }, -}; - static bool force; static int __cmd_report(bool display_info) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, .namespaces = perf_event__process_namespaces, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1584,17 +1604,19 @@ static int __cmd_report(bool display_info) symbol_conf.sort_by_name = true; symbol__init(&session->header.env); - if (!perf_session__has_traces(session, "lock record")) - goto out_delete; + if (!data.is_pipe) { + if (!perf_session__has_traces(session, "lock record")) + goto out_delete; - if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; - } + if (perf_session__set_tracepoints_handlers(session, lock_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } - if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { - pr_err("Initializing perf session tracepoint handlers failed\n"); - goto out_delete; + if (perf_session__set_tracepoints_handlers(session, contention_tracepoints)) { + pr_err("Initializing perf session tracepoint handlers failed\n"); + goto out_delete; + } } if (setup_output_field(false, output_fields)) @@ -1632,9 +1654,12 @@ static int __cmd_contention(int argc, const char **argv) { int err = -EINVAL; struct perf_tool eops = { + .attr = perf_event__process_attr, + .event_update = process_event_update, .sample = process_sample_event, .comm = perf_event__process_comm, .mmap = perf_event__process_mmap, + .tracing_data = perf_event__process_tracing_data, .ordered_events = true, }; struct perf_data data = { @@ -1697,7 +1722,7 @@ static int __cmd_contention(int argc, const char **argv) pr_err("lock contention BPF setup failed\n"); goto out_delete; } - } else { + } else if (!data.is_pipe) { if (!perf_session__has_traces(session, "lock record")) goto out_delete; @@ -1858,6 +1883,29 @@ static int parse_map_entry(const struct option *opt, const char *str, return 0; } +static int parse_max_stack(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + unsigned long *len = (unsigned long *)opt->value; + long val; + char *endptr; + + errno = 0; + val = strtol(str, &endptr, 0); + if (*endptr != '\0' || errno != 0) { + pr_err("invalid max stack depth: %s\n", str); + return -1; + } + + if (val < 0 || val > sysctl__max_stack()) { + pr_err("invalid max stack depth: %ld\n", val); + return -1; + } + + *len = val; + return 0; +} + int cmd_lock(int argc, const char **argv) { const struct option lock_options[] = { @@ -1869,7 +1917,7 @@ int cmd_lock(int argc, const char **argv) "file", "vmlinux pathname"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_END() }; @@ -1913,9 +1961,9 @@ int cmd_lock(int argc, const char **argv) "Trace on existing thread id (exclusive to --pid)"), OPT_CALLBACK(0, "map-nr-entries", &bpf_map_entries, "num", "Max number of BPF map entries", parse_map_entry), - OPT_INTEGER(0, "max-stack", &max_stack_depth, - "Set the maximum stack depth when collecting lock contention, " - "Default: " __stringify(CONTENTION_STACK_DEPTH)), + OPT_CALLBACK(0, "max-stack", &max_stack_depth, "num", + "Set the maximum stack depth when collecting lopck contention, " + "Default: " __stringify(CONTENTION_STACK_DEPTH), parse_max_stack), OPT_INTEGER(0, "stack-skip", &stack_skip, "Set the number of stack depth to skip when finding a lock caller, " "Default: " __stringify(CONTENTION_STACK_SKIP)), diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 923fb8316fda..dedd612eae5e 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -20,6 +20,7 @@ #include "util/symbol.h" #include "util/pmu.h" #include "util/pmu-hybrid.h" +#include "util/sample.h" #include "util/string2.h" #include <linux/err.h> diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index f62298f5db3b..2ae50fc9e597 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -40,7 +40,6 @@ static struct { int command; /* Command short_name */ bool list_events; bool uprobes; - bool quiet; bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; @@ -514,8 +513,8 @@ __cmd_probe(int argc, const char **argv) struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), - OPT_BOOLEAN('q', "quiet", ¶ms.quiet, - "be quiet (do not show any messages)"), + OPT_BOOLEAN('q', "quiet", &quiet, + "be quiet (do not show any warnings or messages)"), OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT", "list up probe events", opt_set_filter_with_command, DEFAULT_LIST_FILTER), @@ -634,7 +633,7 @@ __cmd_probe(int argc, const char **argv) if (ret) return ret; - if (params.quiet) { + if (quiet) { if (verbose != 0) { pr_err(" Error: -v and -q are exclusive.\n"); return -EINVAL; diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index e128b855ddde..bd462a3f2bbd 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -646,10 +646,10 @@ static int record__pushfn(struct mmap *map, void *to, void *bf, size_t size) return record__write(rec, map, bf, size); } -static volatile int signr = -1; -static volatile int child_finished; +static volatile sig_atomic_t signr = -1; +static volatile sig_atomic_t child_finished; #ifdef HAVE_EVENTFD_SUPPORT -static volatile int done_fd = -1; +static volatile sig_atomic_t done_fd = -1; #endif static void sig_handler(int sig) @@ -1926,7 +1926,7 @@ static void record__read_lost_samples(struct record *rec) } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 @@ -3388,7 +3388,7 @@ static struct option __record_options[] = { &record_parse_callchain_opt), OPT_INCR('v', "verbose", &verbose, "be more verbose (show counter open errors, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "don't print any warnings or messages"), OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 8361890176c2..b6d77d3da64f 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -1222,7 +1222,7 @@ int cmd_report(int argc, const char **argv) "input file name"), OPT_INCR('v', "verbose", &verbose, "be more verbose (show symbol address, etc)"), - OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any message"), + OPT_BOOLEAN('q', "quiet", &quiet, "Do not show any warnings or messages"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN(0, "stats", &report.stats_mode, "Display event stats"), diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index 7ca238277d83..d7ec8c1af293 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -2049,7 +2049,7 @@ static void perf_sample__fprint_metric(struct perf_script *script, u64 val; if (!evsel->stats) - evlist__alloc_stats(script->session->evlist, false); + evlist__alloc_stats(&stat_config, script->session->evlist, /*alloc_raw=*/false); if (evsel_script(leader)->gnum++ == 0) perf_stat__reset_shadow_stats(); val = sample->period * evsel->scale; @@ -3632,7 +3632,7 @@ static int set_maps(struct perf_script *script) perf_evlist__set_maps(&evlist->core, script->cpus, script->threads); - if (evlist__alloc_stats(evlist, true)) + if (evlist__alloc_stats(&stat_config, evlist, /*alloc_raw=*/true)) return -ENOMEM; script->allocated = true; diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index 265b05157972..d040fbcdcc5a 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -93,6 +93,7 @@ #include <linux/ctype.h> #include <perf/evlist.h> +#include <internal/threadmap.h> #define DEFAULT_SEPARATOR " " #define FREEZE_ON_SMI_PATH "devices/cpu/freeze_on_smi" @@ -173,7 +174,7 @@ static struct target target = { #define METRIC_ONLY_LEN 20 -static volatile pid_t child_pid = -1; +static volatile sig_atomic_t child_pid = -1; static int detailed_run = 0; static bool transaction_run; static bool topdown_run = false; @@ -208,7 +209,7 @@ struct perf_stat { static struct perf_stat perf_stat; #define STAT_RECORD perf_stat.record -static volatile int done = 0; +static volatile sig_atomic_t done = 0; static struct perf_stat_config stat_config = { .aggr_mode = AGGR_GLOBAL, @@ -465,15 +466,19 @@ static int read_bpf_map_counters(void) return 0; } -static void read_counters(struct timespec *rs) +static int read_counters(struct timespec *rs) { - struct evsel *counter; - if (!stat_config.stop_read_counter) { if (read_bpf_map_counters() || read_affinity_counters(rs)) - return; + return -1; } + return 0; +} + +static void process_counters(void) +{ + struct evsel *counter; evlist__for_each_entry(evsel_list, counter) { if (counter->err) @@ -482,6 +487,10 @@ static void read_counters(struct timespec *rs) pr_warning("failed to process counter %s\n", counter->name); counter->err = 0; } + + perf_stat_merge_counters(&stat_config, evsel_list); + perf_stat_process_percore(&stat_config, evsel_list); + perf_stat_process_shadow_stats(&stat_config, evsel_list); } static void process_interval(void) @@ -492,7 +501,10 @@ static void process_interval(void) diff_timespec(&rs, &ts, &ref_time); perf_stat__reset_shadow_per_stat(&rt_stat); - read_counters(&rs); + evlist__reset_aggr_stats(evsel_list); + + if (read_counters(&rs) == 0) + process_counters(); if (STAT_RECORD) { if (WRITE_STAT_ROUND_EVENT(rs.tv_sec * NSEC_PER_SEC + rs.tv_nsec, INTERVAL)) @@ -569,7 +581,7 @@ static void disable_counters(void) } } -static volatile int workload_exec_errno; +static volatile sig_atomic_t workload_exec_errno; /* * evlist__prepare_workload will send a SIGUSR1 @@ -963,11 +975,9 @@ try_again_reset: init_stats(&walltime_nsecs_stats); update_stats(&walltime_nsecs_stats, t1 - t0); - if (stat_config.aggr_mode == AGGR_GLOBAL) - evlist__save_aggr_prev_raw_counts(evsel_list); - evlist__copy_prev_raw_counts(evsel_list); evlist__reset_prev_raw_counts(evsel_list); + evlist__reset_aggr_stats(evsel_list); perf_stat__reset_shadow_per_stat(&rt_stat); } else { update_stats(&walltime_nsecs_stats, t1 - t0); @@ -980,7 +990,8 @@ try_again_reset: * avoid arbitrary skew, we must read all counters before closing any * group leaders. */ - read_counters(&(struct timespec) { .tv_nsec = t1-t0 }); + if (read_counters(&(struct timespec) { .tv_nsec = t1-t0 }) == 0) + process_counters(); /* * We need to keep evsel_list alive, because it's processed @@ -1023,13 +1034,13 @@ static void print_counters(struct timespec *ts, int argc, const char **argv) /* Do not print anything if we record to the pipe. */ if (STAT_RECORD && perf_stat.data.is_pipe) return; - if (stat_config.quiet) + if (quiet) return; evlist__print_counters(evsel_list, &stat_config, &target, ts, argc, argv); } -static volatile int signr = -1; +static volatile sig_atomic_t signr = -1; static void skip_signal(int signo) { @@ -1273,8 +1284,8 @@ static struct option stat_options[] = { "print summary for interval mode"), OPT_BOOLEAN(0, "no-csv-summary", &stat_config.no_csv_summary, "don't print 'summary' for CSV summary output"), - OPT_BOOLEAN(0, "quiet", &stat_config.quiet, - "don't print output (useful with record)"), + OPT_BOOLEAN(0, "quiet", &quiet, + "don't print any output, messages or warnings (useful with record)"), OPT_CALLBACK(0, "cputype", &evsel_list, "hybrid cpu type", "Only enable events on applying cpu with this type " "for hybrid platform (e.g. core or atom)", @@ -1330,10 +1341,26 @@ static struct aggr_cpu_id perf_stat__get_node(struct perf_stat_config *config __ return aggr_cpu_id__node(cpu, /*data=*/NULL); } +static struct aggr_cpu_id perf_stat__get_global(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__global(cpu, /*data=*/NULL); +} + +static struct aggr_cpu_id perf_stat__get_cpu(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return aggr_cpu_id__cpu(cpu, /*data=*/NULL); +} + static struct aggr_cpu_id perf_stat__get_aggr(struct perf_stat_config *config, aggr_get_id_t get_id, struct perf_cpu cpu) { - struct aggr_cpu_id id = aggr_cpu_id__empty(); + struct aggr_cpu_id id; + + /* per-process mode - should use global aggr mode */ + if (cpu.cpu == -1) + return get_id(config, cpu); if (aggr_cpu_id__is_empty(&config->cpus_aggr_map->map[cpu.cpu])) config->cpus_aggr_map->map[cpu.cpu] = get_id(config, cpu); @@ -1366,16 +1393,16 @@ static struct aggr_cpu_id perf_stat__get_node_cached(struct perf_stat_config *co return perf_stat__get_aggr(config, perf_stat__get_node, cpu); } -static bool term_percore_set(void) +static struct aggr_cpu_id perf_stat__get_global_cached(struct perf_stat_config *config, + struct perf_cpu cpu) { - struct evsel *counter; - - evlist__for_each_entry(evsel_list, counter) { - if (counter->percore) - return true; - } + return perf_stat__get_aggr(config, perf_stat__get_global, cpu); +} - return false; +static struct aggr_cpu_id perf_stat__get_cpu_cached(struct perf_stat_config *config, + struct perf_cpu cpu) +{ + return perf_stat__get_aggr(config, perf_stat__get_cpu, cpu); } static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) @@ -1390,11 +1417,9 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr(enum aggr_mode aggr_mode) case AGGR_NODE: return aggr_cpu_id__node; case AGGR_NONE: - if (term_percore_set()) - return aggr_cpu_id__core; - - return NULL; + return aggr_cpu_id__cpu; case AGGR_GLOBAL: + return aggr_cpu_id__global; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1415,11 +1440,9 @@ static aggr_get_id_t aggr_mode__get_id(enum aggr_mode aggr_mode) case AGGR_NODE: return perf_stat__get_node_cached; case AGGR_NONE: - if (term_percore_set()) { - return perf_stat__get_core_cached; - } - return NULL; + return perf_stat__get_cpu_cached; case AGGR_GLOBAL: + return perf_stat__get_global_cached; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1434,8 +1457,9 @@ static int perf_stat_init_aggr_mode(void) aggr_cpu_id_get_t get_id = aggr_mode__get_aggr(stat_config.aggr_mode); if (get_id) { + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, - get_id, /*data=*/NULL); + get_id, /*data=*/NULL, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1443,6 +1467,21 @@ static int perf_stat_init_aggr_mode(void) stat_config.aggr_get_id = aggr_mode__get_id(stat_config.aggr_mode); } + if (stat_config.aggr_mode == AGGR_THREAD) { + nr = perf_thread_map__nr(evsel_list->core.threads); + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } + /* * The evsel_list->cpus is the base we operate on, * taking the highest cpu number to be the size of @@ -1527,6 +1566,26 @@ static struct aggr_cpu_id perf_env__get_core_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_cpu_aggr_by_cpu(struct perf_cpu cpu, void *data) +{ + struct perf_env *env = data; + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + if (cpu.cpu != -1) { + /* + * core_id is relative to socket and die, + * we need a global id. So we set + * socket, die id and core id + */ + id.socket = env->cpu[cpu.cpu].socket_id; + id.die = env->cpu[cpu.cpu].die_id; + id.core = env->cpu[cpu.cpu].core_id; + id.cpu = cpu; + } + + return id; +} + static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, void *data) { struct aggr_cpu_id id = aggr_cpu_id__empty(); @@ -1535,6 +1594,16 @@ static struct aggr_cpu_id perf_env__get_node_aggr_by_cpu(struct perf_cpu cpu, vo return id; } +static struct aggr_cpu_id perf_env__get_global_aggr_by_cpu(struct perf_cpu cpu __maybe_unused, + void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + id.cpu = (struct perf_cpu){ .cpu = 0 }; + return id; +} + static struct aggr_cpu_id perf_stat__get_socket_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { @@ -1552,12 +1621,24 @@ static struct aggr_cpu_id perf_stat__get_core_file(struct perf_stat_config *conf return perf_env__get_core_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_cpu_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_cpu_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static struct aggr_cpu_id perf_stat__get_node_file(struct perf_stat_config *config __maybe_unused, struct perf_cpu cpu) { return perf_env__get_node_aggr_by_cpu(cpu, &perf_stat.session->header.env); } +static struct aggr_cpu_id perf_stat__get_global_file(struct perf_stat_config *config __maybe_unused, + struct perf_cpu cpu) +{ + return perf_env__get_global_aggr_by_cpu(cpu, &perf_stat.session->header.env); +} + static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) { switch (aggr_mode) { @@ -1569,8 +1650,10 @@ static aggr_cpu_id_get_t aggr_mode__get_aggr_file(enum aggr_mode aggr_mode) return perf_env__get_core_aggr_by_cpu; case AGGR_NODE: return perf_env__get_node_aggr_by_cpu; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_env__get_global_aggr_by_cpu; + case AGGR_NONE: + return perf_env__get_cpu_aggr_by_cpu; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1590,8 +1673,10 @@ static aggr_get_id_t aggr_mode__get_id_file(enum aggr_mode aggr_mode) return perf_stat__get_core_file; case AGGR_NODE: return perf_stat__get_node_file; - case AGGR_NONE: case AGGR_GLOBAL: + return perf_stat__get_global_file; + case AGGR_NONE: + return perf_stat__get_cpu_file; case AGGR_THREAD: case AGGR_UNSET: case AGGR_MAX: @@ -1604,11 +1689,29 @@ static int perf_stat_init_aggr_mode_file(struct perf_stat *st) { struct perf_env *env = &st->session->header.env; aggr_cpu_id_get_t get_id = aggr_mode__get_aggr_file(stat_config.aggr_mode); + bool needs_sort = stat_config.aggr_mode != AGGR_NONE; + + if (stat_config.aggr_mode == AGGR_THREAD) { + int nr = perf_thread_map__nr(evsel_list->core.threads); + + stat_config.aggr_map = cpu_aggr_map__empty_new(nr); + if (stat_config.aggr_map == NULL) + return -ENOMEM; + + for (int s = 0; s < nr; s++) { + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + id.thread_idx = s; + stat_config.aggr_map->map[s] = id; + } + return 0; + } if (!get_id) return 0; - stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, get_id, env); + stat_config.aggr_map = cpu_aggr_map__new(evsel_list->core.user_requested_cpus, + get_id, env, needs_sort); if (!stat_config.aggr_map) { pr_err("cannot build %s map", aggr_mode__string[stat_config.aggr_mode]); return -1; @@ -1991,13 +2094,11 @@ static int process_stat_round_event(struct perf_session *session, union perf_event *event) { struct perf_record_stat_round *stat_round = &event->stat_round; - struct evsel *counter; struct timespec tsh, *ts = NULL; const char **argv = session->header.env.cmdline_argv; int argc = session->header.env.nr_cmdline; - evlist__for_each_entry(evsel_list, counter) - perf_stat_process_counter(&stat_config, counter); + process_counters(); if (stat_round->type == PERF_STAT_ROUND_TYPE__FINAL) update_stats(&walltime_nsecs_stats, stat_round->time); @@ -2024,17 +2125,23 @@ int process_stat_config_event(struct perf_session *session, if (perf_cpu_map__empty(st->cpus)) { if (st->aggr_mode != AGGR_UNSET) pr_warning("warning: processing task data, aggregation mode not set\n"); - return 0; - } - - if (st->aggr_mode != AGGR_UNSET) + } else if (st->aggr_mode != AGGR_UNSET) { stat_config.aggr_mode = st->aggr_mode; + } if (perf_stat.data.is_pipe) perf_stat_init_aggr_mode(); else perf_stat_init_aggr_mode_file(st); + if (stat_config.aggr_map) { + int nr_aggr = stat_config.aggr_map->nr; + + if (evlist__alloc_aggr_stats(session->evlist, nr_aggr) < 0) { + pr_err("cannot allocate aggr counts\n"); + return -1; + } + } return 0; } @@ -2048,7 +2155,7 @@ static int set_maps(struct perf_stat *st) perf_evlist__set_maps(&evsel_list->core, st->cpus, st->threads); - if (evlist__alloc_stats(evsel_list, true)) + if (evlist__alloc_stats(&stat_config, evsel_list, /*alloc_raw=*/true)) return -ENOMEM; st->maps_allocated = true; @@ -2277,7 +2384,7 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (!output && !stat_config.quiet) { + if (!output && !quiet) { struct timespec tm; mode = append_file ? "a" : "w"; @@ -2297,6 +2404,14 @@ int cmd_stat(int argc, const char **argv) } } + if (stat_config.interval_clear && !isatty(fileno(output))) { + fprintf(stderr, "--interval-clear does not work with output\n"); + parse_options_usage(stat_usage, stat_options, "o", 1); + parse_options_usage(NULL, stat_options, "log-fd", 0); + parse_options_usage(NULL, stat_options, "interval-clear", 0); + return -1; + } + stat_config.output = output; /* @@ -2495,10 +2610,10 @@ int cmd_stat(int argc, const char **argv) goto out; } - if (evlist__alloc_stats(evsel_list, interval)) + if (perf_stat_init_aggr_mode()) goto out; - if (perf_stat_init_aggr_mode()) + if (evlist__alloc_stats(&stat_config, evsel_list, interval)) goto out; /* diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index 4b3ff7687236..bb5bd241246b 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -87,8 +87,8 @@ #include <linux/ctype.h> #include <perf/mmap.h> -static volatile int done; -static volatile int resize; +static volatile sig_atomic_t done; +static volatile sig_atomic_t resize; #define HEADER_LINE_NR 5 diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index d3c757769b96..3257da5cad23 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -924,6 +924,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_PTR, /* brk */ }, }, }, { .name = "clock_gettime", .arg = { [0] = STRARRAY(clk_id, clockid), }, }, + { .name = "clock_nanosleep", + .arg = { [2] = { .scnprintf = SCA_TIMESPEC, /* rqtp */ }, }, }, { .name = "clone", .errpid = true, .nr_args = 5, .arg = { [0] = { .name = "flags", .scnprintf = SCA_CLONE_FLAGS, }, [1] = { .name = "child_stack", .scnprintf = SCA_HEX, }, @@ -1053,7 +1055,8 @@ static struct syscall_fmt syscall_fmts[] = { .arg = { [0] = { .scnprintf = SCA_FDAT, /* dfd */ }, [2] = { .scnprintf = SCA_OPEN_FLAGS, /* flags */ }, }, }, { .name = "perf_event_open", - .arg = { [2] = { .scnprintf = SCA_INT, /* cpu */ }, + .arg = { [0] = { .scnprintf = SCA_PERF_ATTR, /* attr */ }, + [2] = { .scnprintf = SCA_INT, /* cpu */ }, [3] = { .scnprintf = SCA_FD, /* group_fd */ }, [4] = { .scnprintf = SCA_PERF_FLAGS, /* flags */ }, }, }, { .name = "pipe2", @@ -1535,8 +1538,8 @@ static size_t trace__fprintf_tstamp(struct trace *trace, u64 tstamp, FILE *fp) } static pid_t workload_pid = -1; -static bool done = false; -static bool interrupted = false; +static volatile sig_atomic_t done = false; +static volatile sig_atomic_t interrupted = false; static void sighandler_interrupt(int sig __maybe_unused) { @@ -4092,8 +4095,8 @@ static int trace__run(struct trace *trace, int argc, const char **argv) } trace->multiple_threads = perf_thread_map__pid(evlist->core.threads, 0) == -1 || - evlist->core.threads->nr > 1 || - evlist__first(evlist)->core.attr.inherit; + perf_thread_map__nr(evlist->core.threads) > 1 || + evlist__first(evlist)->core.attr.inherit; /* * Now that we already used evsel->core.attr to ask the kernel to setup the diff --git a/tools/perf/examples/bpf/5sec.c b/tools/perf/examples/bpf/5sec.c index e6b6181c6dc6..3bd7fc17631f 100644 --- a/tools/perf/examples/bpf/5sec.c +++ b/tools/perf/examples/bpf/5sec.c @@ -39,13 +39,15 @@ Copyright (C) 2018 Red Hat, Inc., Arnaldo Carvalho de Melo <[email protected]> */ -#include <bpf.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> #define NSEC_PER_SEC 1000000000L -int probe(hrtimer_nanosleep, rqtp)(void *ctx, int err, long long sec) +SEC("hrtimer_nanosleep=hrtimer_nanosleep rqtp") +int hrtimer_nanosleep(void *ctx, int err, long long sec) { return sec / NSEC_PER_SEC == 5ULL; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/augmented_raw_syscalls.c b/tools/perf/examples/bpf/augmented_raw_syscalls.c index a262dcd020f4..7dc24c9173a7 100644 --- a/tools/perf/examples/bpf/augmented_raw_syscalls.c +++ b/tools/perf/examples/bpf/augmented_raw_syscalls.c @@ -14,13 +14,28 @@ * code that will combine entry/exit in a strace like way. */ -#include <unistd.h> +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> #include <linux/limits.h> -#include <linux/socket.h> -#include <pid_filter.h> + +// FIXME: These should come from system headers +typedef char bool; +typedef int pid_t; +typedef long long int __s64; +typedef __s64 time64_t; + +struct timespec64 { + time64_t tv_sec; + long int tv_nsec; +}; /* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); +struct __augmented_syscalls__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __augmented_syscalls__ SEC(".maps"); /* * string_args_len: one per syscall arg, 0 means not a string or don't copy it, @@ -29,24 +44,39 @@ bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); */ struct syscall { bool enabled; - u16 string_args_len[6]; + __u16 string_args_len[6]; }; -bpf_map(syscalls, ARRAY, int, struct syscall, 512); +struct syscalls { + __uint(type, BPF_MAP_TYPE_ARRAY); + __type(key, int); + __type(value, struct syscall); + __uint(max_entries, 512); +} syscalls SEC(".maps"); /* * What to augment at entry? * * Pointer arg payloads (filenames, etc) passed from userspace to the kernel */ -bpf_map(syscalls_sys_enter, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_enter { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_enter SEC(".maps"); /* * What to augment at exit? * * Pointer arg payloads returned from the kernel (struct stat, etc) to userspace. */ -bpf_map(syscalls_sys_exit, PROG_ARRAY, u32, u32, 512); +struct syscalls_sys_exit { + __uint(type, BPF_MAP_TYPE_PROG_ARRAY); + __type(key, __u32); + __type(value, __u32); + __uint(max_entries, 512); +} syscalls_sys_exit SEC(".maps"); struct syscall_enter_args { unsigned long long common_tp_fields; @@ -66,7 +96,38 @@ struct augmented_arg { char value[PATH_MAX]; }; -pid_filter(pids_filtered); +struct pids_filtered { + __uint(type, BPF_MAP_TYPE_HASH); + __type(key, pid_t); + __type(value, bool); + __uint(max_entries, 64); +} pids_filtered SEC(".maps"); + +/* + * Desired design of maximum size and alignment (see RFC2553) + */ +#define SS_MAXSIZE 128 /* Implementation specific max size */ + +typedef unsigned short sa_family_t; + +/* + * FIXME: Should come from system headers + * + * The definition uses anonymous union and struct in order to control the + * default alignment. + */ +struct sockaddr_storage { + union { + struct { + sa_family_t ss_family; /* address family */ + /* Following field(s) are implementation specific */ + char __data[SS_MAXSIZE - sizeof(unsigned short)]; + /* space to achieve desired size, */ + /* _SS_MAXSIZE value minus size of ss_family */ + }; + void *__align; /* implementation specific desired alignment */ + }; +}; struct augmented_args_payload { struct syscall_enter_args args; @@ -75,11 +136,17 @@ struct augmented_args_payload { struct augmented_arg arg, arg2; }; struct sockaddr_storage saddr; + char __data[sizeof(struct augmented_arg)]; }; }; // We need more tmp space than the BPF stack can give us -bpf_map(augmented_args_tmp, PERCPU_ARRAY, int, struct augmented_args_payload, 1); +struct augmented_args_tmp { + __uint(type, BPF_MAP_TYPE_PERCPU_ARRAY); + __type(key, int); + __type(value, struct augmented_args_payload); + __uint(max_entries, 1); +} augmented_args_tmp SEC(".maps"); static inline struct augmented_args_payload *augmented_args_payload(void) { @@ -90,14 +157,14 @@ static inline struct augmented_args_payload *augmented_args_payload(void) static inline int augmented__output(void *ctx, struct augmented_args_payload *args, int len) { /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ - return perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); + return bpf_perf_event_output(ctx, &__augmented_syscalls__, BPF_F_CURRENT_CPU, args, len); } static inline unsigned int augmented_arg__read_str(struct augmented_arg *augmented_arg, const void *arg, unsigned int arg_len) { unsigned int augmented_len = sizeof(*augmented_arg); - int string_len = probe_read_str(&augmented_arg->value, arg_len, arg); + int string_len = bpf_probe_read_str(&augmented_arg->value, arg_len, arg); augmented_arg->size = augmented_arg->err = 0; /* @@ -146,7 +213,7 @@ int sys_enter_connect(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -165,7 +232,7 @@ int sys_enter_sendto(struct syscall_enter_args *args) if (socklen > sizeof(augmented_args->saddr)) socklen = sizeof(augmented_args->saddr); - probe_read(&augmented_args->saddr, socklen, sockaddr_arg); + bpf_probe_read(&augmented_args->saddr, socklen, sockaddr_arg); return augmented__output(args, augmented_args, len + socklen); } @@ -234,6 +301,80 @@ int sys_enter_renameat(struct syscall_enter_args *args) return augmented__output(args, augmented_args, len); } +#define PERF_ATTR_SIZE_VER0 64 /* sizeof first published struct */ + +// we need just the start, get the size to then copy it +struct perf_event_attr_size { + __u32 type; + /* + * Size of the attr structure, for fwd/bwd compat. + */ + __u32 size; +}; + +SEC("!syscalls:sys_enter_perf_event_open") +int sys_enter_perf_event_open(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const struct perf_event_attr_size *attr = (const struct perf_event_attr_size *)args->args[0], *attr_read; + unsigned int len = sizeof(augmented_args->args); + + if (augmented_args == NULL) + goto failure; + + if (bpf_probe_read(&augmented_args->__data, sizeof(*attr), attr) < 0) + goto failure; + + attr_read = (const struct perf_event_attr_size *)augmented_args->__data; + + __u32 size = attr_read->size; + + if (!size) + size = PERF_ATTR_SIZE_VER0; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + // Now that we read attr->size and tested it against the size limits, read it completely + if (bpf_probe_read(&augmented_args->__data, size, attr) < 0) + goto failure; + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +SEC("!syscalls:sys_enter_clock_nanosleep") +int sys_enter_clock_nanosleep(struct syscall_enter_args *args) +{ + struct augmented_args_payload *augmented_args = augmented_args_payload(); + const void *rqtp_arg = (const void *)args->args[2]; + unsigned int len = sizeof(augmented_args->args); + __u32 size = sizeof(struct timespec64); + + if (augmented_args == NULL) + goto failure; + + if (size > sizeof(augmented_args->__data)) + goto failure; + + bpf_probe_read(&augmented_args->__data, size, rqtp_arg); + + return augmented__output(args, augmented_args, len + size); +failure: + return 1; /* Failure: don't filter */ +} + +static pid_t getpid(void) +{ + return bpf_get_current_pid_tgid(); +} + +static bool pid_filter__has(struct pids_filtered *pids, pid_t pid) +{ + return bpf_map_lookup_elem(pids, &pid) != NULL; +} + SEC("raw_syscalls:sys_enter") int sys_enter(struct syscall_enter_args *args) { @@ -257,7 +398,7 @@ int sys_enter(struct syscall_enter_args *args) if (augmented_args == NULL) return 1; - probe_read(&augmented_args->args, sizeof(augmented_args->args), args); + bpf_probe_read(&augmented_args->args, sizeof(augmented_args->args), args); /* * Jump to syscall specific augmenter, even if the default one, @@ -278,7 +419,7 @@ int sys_exit(struct syscall_exit_args *args) if (pid_filter__has(&pids_filtered, getpid())) return 0; - probe_read(&exit_args, sizeof(exit_args), args); + bpf_probe_read(&exit_args, sizeof(exit_args), args); /* * Jump to syscall specific return augmenter, even if the default one, * "!raw_syscalls:unaugmented" that will just return 1 to return the @@ -291,4 +432,4 @@ int sys_exit(struct syscall_exit_args *args) return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/augmented_syscalls.c b/tools/perf/examples/bpf/augmented_syscalls.c deleted file mode 100644 index 524fdb8534b3..000000000000 --- a/tools/perf/examples/bpf/augmented_syscalls.c +++ /dev/null @@ -1,169 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment syscalls with the contents of the pointer arguments. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them, that will - * check if perf_sample->raw_data is more than what is expected for each - * syscalls:sys_{enter,exit}_SYSCALL tracepoint, uing the extra data as the - * contents of pointer arguments. - */ - -#include <stdio.h> -#include <linux/socket.h> - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct syscall_exit_args { - unsigned long long common_tp_fields; - long syscall_nr; - long ret; -}; - -struct augmented_filename { - unsigned int size; - int reserved; - char value[256]; -}; - -#define augmented_filename_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - unsigned int len = sizeof(augmented_args); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (augmented_args.filename.size < sizeof(augmented_args.filename.value)) { \ - len -= sizeof(augmented_args.filename.value) - augmented_args.filename.size; \ - len &= sizeof(augmented_args.filename.value) - 1; \ - } \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, len); \ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall(open); - -struct syscall_enter_inotify_add_watch_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - char *filename_ptr; - long mask; -}; - -augmented_filename_syscall(inotify_add_watch); - -struct statbuf; - -struct syscall_enter_newstat_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - struct stat *statbuf; -}; - -augmented_filename_syscall(newstat); - -#ifndef _K_SS_MAXSIZE -#define _K_SS_MAXSIZE 128 -#endif - -#define augmented_sockaddr_syscall(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct sockaddr_storage addr; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - struct augmented_enter_##syscall##_args augmented_args; \ - unsigned long addrlen = sizeof(augmented_args.addr); \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ -/* FIXME_CLANG_OPTIMIZATION_THAT_ACCESSES_USER_CONTROLLED_ADDRLEN_DESPITE_THIS_CHECK */ \ -/* if (addrlen > augmented_args.args.addrlen) */ \ -/* addrlen = augmented_args.args.addrlen; */ \ -/* */ \ - probe_read(&augmented_args.addr, addrlen, args->addr_ptr); \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - sizeof(augmented_args) - sizeof(augmented_args.addr) + addrlen);\ -} \ -int syscall_exit(syscall)(struct syscall_exit_args *args) \ -{ \ - return 1; /* 0 as soon as we start copying data returned by the kernel, e.g. 'read' */ \ -} - -struct sockaddr; - -struct syscall_enter_bind_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(bind); - -struct syscall_enter_connect_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - struct sockaddr *addr_ptr; - unsigned long addrlen; -}; - -augmented_sockaddr_syscall(connect); - -struct syscall_enter_sendto_args { - unsigned long long common_tp_fields; - long syscall_nr; - long fd; - void *buff; - long len; - unsigned long flags; - struct sockaddr *addr_ptr; - long addr_len; -}; - -augmented_sockaddr_syscall(sendto); - -license(GPL); diff --git a/tools/perf/examples/bpf/empty.c b/tools/perf/examples/bpf/empty.c index 7d7fb0c9fe76..3e296c0c53d7 100644 --- a/tools/perf/examples/bpf/empty.c +++ b/tools/perf/examples/bpf/empty.c @@ -1,3 +1,12 @@ -#include <bpf/bpf.h> +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> -license(GPL); +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) +{ + return 0; +} +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/examples/bpf/etcsnoop.c b/tools/perf/examples/bpf/etcsnoop.c deleted file mode 100644 index e81b535346c0..000000000000 --- a/tools/perf/examples/bpf/etcsnoop.c +++ /dev/null @@ -1,76 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -/* - * Augment the filename syscalls with the contents of the filename pointer argument - * filtering only those that do not start with /etc/. - * - * Test it with: - * - * perf trace -e tools/perf/examples/bpf/augmented_syscalls.c cat /etc/passwd > /dev/null - * - * It'll catch some openat syscalls related to the dynamic linked and - * the last one should be the one for '/etc/passwd'. - * - * This matches what is marshalled into the raw_syscall:sys_enter payload - * expected by the 'perf trace' beautifiers, and can be used by them unmodified, - * which will be done as that feature is implemented in the next csets, for now - * it will appear in a dump done by the default tracepoint handler in 'perf trace', - * that uses bpf_output__fprintf() to just dump those contents, as done with - * the bpf-output event associated with the __bpf_output__ map declared in - * tools/perf/include/bpf/stdio.h. - */ - -#include <stdio.h> - -/* bpf-output associated map */ -bpf_map(__augmented_syscalls__, PERF_EVENT_ARRAY, int, u32, __NR_CPUS__); - -struct augmented_filename { - int size; - int reserved; - char value[64]; -}; - -#define augmented_filename_syscall_enter(syscall) \ -struct augmented_enter_##syscall##_args { \ - struct syscall_enter_##syscall##_args args; \ - struct augmented_filename filename; \ -}; \ -int syscall_enter(syscall)(struct syscall_enter_##syscall##_args *args) \ -{ \ - char etc[6] = "/etc/"; \ - struct augmented_enter_##syscall##_args augmented_args = { .filename.reserved = 0, }; \ - probe_read(&augmented_args.args, sizeof(augmented_args.args), args); \ - augmented_args.filename.size = probe_read_str(&augmented_args.filename.value, \ - sizeof(augmented_args.filename.value), \ - args->filename_ptr); \ - if (__builtin_memcmp(augmented_args.filename.value, etc, 4) != 0) \ - return 0; \ - /* If perf_event_output fails, return non-zero so that it gets recorded unaugmented */ \ - return perf_event_output(args, &__augmented_syscalls__, BPF_F_CURRENT_CPU, \ - &augmented_args, \ - (sizeof(augmented_args) - sizeof(augmented_args.filename.value) + \ - augmented_args.filename.size)); \ -} - -struct syscall_enter_openat_args { - unsigned long long common_tp_fields; - long syscall_nr; - long dfd; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(openat); - -struct syscall_enter_open_args { - unsigned long long common_tp_fields; - long syscall_nr; - char *filename_ptr; - long flags; - long mode; -}; - -augmented_filename_syscall_enter(open); - -license(GPL); diff --git a/tools/perf/examples/bpf/hello.c b/tools/perf/examples/bpf/hello.c index cf3c2fdc7f79..e9080b0df158 100644 --- a/tools/perf/examples/bpf/hello.c +++ b/tools/perf/examples/bpf/hello.c @@ -1,9 +1,27 @@ -#include <stdio.h> +// SPDX-License-Identifier: GPL-2.0 +#include <linux/bpf.h> +#include <bpf/bpf_helpers.h> -int syscall_enter(openat)(void *args) +struct __bpf_stdout__ { + __uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY); + __type(key, int); + __type(value, __u32); + __uint(max_entries, __NR_CPUS__); +} __bpf_stdout__ SEC(".maps"); + +#define puts(from) \ + ({ const int __len = sizeof(from); \ + char __from[sizeof(from)] = from; \ + bpf_perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ + &__from, __len & (sizeof(from) - 1)); }) + +struct syscall_enter_args; + +SEC("raw_syscalls:sys_enter") +int sys_enter(struct syscall_enter_args *args) { puts("Hello, world\n"); return 0; } -license(GPL); +char _license[] SEC("license") = "GPL"; diff --git a/tools/perf/include/bpf/bpf.h b/tools/perf/include/bpf/bpf.h deleted file mode 100644 index b422aeef5339..000000000000 --- a/tools/perf/include/bpf/bpf.h +++ /dev/null @@ -1,70 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 -#ifndef _PERF_BPF_H -#define _PERF_BPF_H - -#include <uapi/linux/bpf.h> - -/* - * A helper structure used by eBPF C program to describe map attributes to - * elf_bpf loader, taken from tools/testing/selftests/bpf/bpf_helpers.h: - */ -struct bpf_map { - unsigned int type; - unsigned int key_size; - unsigned int value_size; - unsigned int max_entries; - unsigned int map_flags; - unsigned int inner_map_idx; - unsigned int numa_node; -}; - -#define bpf_map(name, _type, type_key, type_val, _max_entries) \ -struct bpf_map SEC("maps") name = { \ - .type = BPF_MAP_TYPE_##_type, \ - .key_size = sizeof(type_key), \ - .value_size = sizeof(type_val), \ - .max_entries = _max_entries, \ -}; \ -struct ____btf_map_##name { \ - type_key key; \ - type_val value; \ -}; \ -struct ____btf_map_##name __attribute__((section(".maps." #name), used)) \ - ____btf_map_##name = { } - -/* - * FIXME: this should receive .max_entries as a parameter, as careful - * tuning of these limits is needed to avoid hitting limits that - * prevents other BPF constructs, such as tracepoint handlers, - * to get installed, with cryptic messages from libbpf, etc. - * For the current need, 'perf trace --filter-pids', 64 should - * be good enough, but this surely needs to be revisited. - */ -#define pid_map(name, value_type) bpf_map(name, HASH, pid_t, value_type, 64) - -static int (*bpf_map_update_elem)(struct bpf_map *map, void *key, void *value, u64 flags) = (void *)BPF_FUNC_map_update_elem; -static void *(*bpf_map_lookup_elem)(struct bpf_map *map, void *key) = (void *)BPF_FUNC_map_lookup_elem; - -static void (*bpf_tail_call)(void *ctx, void *map, int index) = (void *)BPF_FUNC_tail_call; - -#define SEC(NAME) __attribute__((section(NAME), used)) - -#define probe(function, vars) \ - SEC(#function "=" #function " " #vars) function - -#define syscall_enter(name) \ - SEC("syscalls:sys_enter_" #name) syscall_enter_ ## name - -#define syscall_exit(name) \ - SEC("syscalls:sys_exit_" #name) syscall_exit_ ## name - -#define license(name) \ -char _license[] SEC("license") = #name; \ -int _version SEC("version") = LINUX_VERSION_CODE; - -static int (*probe_read)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read; -static int (*probe_read_str)(void *dst, int size, const void *unsafe_addr) = (void *)BPF_FUNC_probe_read_str; - -static int (*perf_event_output)(void *, struct bpf_map *, int, void *, unsigned long) = (void *)BPF_FUNC_perf_event_output; - -#endif /* _PERF_BPF_H */ diff --git a/tools/perf/include/bpf/linux/socket.h b/tools/perf/include/bpf/linux/socket.h deleted file mode 100644 index 7f844568dab8..000000000000 --- a/tools/perf/include/bpf/linux/socket.h +++ /dev/null @@ -1,24 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */ -#ifndef _UAPI_LINUX_SOCKET_H -#define _UAPI_LINUX_SOCKET_H - -/* - * Desired design of maximum size and alignment (see RFC2553) - */ -#define _K_SS_MAXSIZE 128 /* Implementation specific max size */ -#define _K_SS_ALIGNSIZE (__alignof__ (struct sockaddr *)) - /* Implementation specific desired alignment */ - -typedef unsigned short __kernel_sa_family_t; - -struct __kernel_sockaddr_storage { - __kernel_sa_family_t ss_family; /* address family */ - /* Following field(s) are implementation specific */ - char __data[_K_SS_MAXSIZE - sizeof(unsigned short)]; - /* space to achieve desired size, */ - /* _SS_MAXSIZE value minus size of ss_family */ -} __attribute__ ((aligned(_K_SS_ALIGNSIZE))); /* force desired alignment */ - -#define sockaddr_storage __kernel_sockaddr_storage - -#endif /* _UAPI_LINUX_SOCKET_H */ diff --git a/tools/perf/include/bpf/pid_filter.h b/tools/perf/include/bpf/pid_filter.h deleted file mode 100644 index 6e61c4bdf548..000000000000 --- a/tools/perf/include/bpf/pid_filter.h +++ /dev/null @@ -1,21 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#ifndef _PERF_BPF_PID_FILTER_ -#define _PERF_BPF_PID_FILTER_ - -#include <bpf.h> - -#define pid_filter(name) pid_map(name, bool) - -static int pid_filter__add(struct bpf_map *pids, pid_t pid) -{ - bool value = true; - return bpf_map_update_elem(pids, &pid, &value, BPF_NOEXIST); -} - -static bool pid_filter__has(struct bpf_map *pids, pid_t pid) -{ - return bpf_map_lookup_elem(pids, &pid) != NULL; -} - -#endif // _PERF_BPF_PID_FILTER_ diff --git a/tools/perf/include/bpf/stdio.h b/tools/perf/include/bpf/stdio.h deleted file mode 100644 index 316af5b2ff35..000000000000 --- a/tools/perf/include/bpf/stdio.h +++ /dev/null @@ -1,16 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0 - -#include <bpf.h> - -struct bpf_map SEC("maps") __bpf_stdout__ = { - .type = BPF_MAP_TYPE_PERF_EVENT_ARRAY, - .key_size = sizeof(int), - .value_size = sizeof(u32), - .max_entries = __NR_CPUS__, -}; - -#define puts(from) \ - ({ const int __len = sizeof(from); \ - char __from[__len] = from; \ - perf_event_output(args, &__bpf_stdout__, BPF_F_CURRENT_CPU, \ - &__from, __len & (sizeof(from) - 1)); }) diff --git a/tools/perf/include/bpf/unistd.h b/tools/perf/include/bpf/unistd.h deleted file mode 100644 index ca7877f9a976..000000000000 --- a/tools/perf/include/bpf/unistd.h +++ /dev/null @@ -1,10 +0,0 @@ -// SPDX-License-Identifier: LGPL-2.1 - -#include <bpf.h> - -static int (*bpf_get_current_pid_tgid)(void) = (void *)BPF_FUNC_get_current_pid_tgid; - -static pid_t getpid(void) -{ - return bpf_get_current_pid_tgid(); -} diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json index 79f2016c53b0..79f2016c53b0 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/branch.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/branch.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json index 579c1c993d17..579c1c993d17 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/bus.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/bus.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json index 0141f749bff3..0141f749bff3 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/cache.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/cache.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json index 344a2d552ad5..344a2d552ad5 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/exception.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/exception.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json index e57cd55937c6..e57cd55937c6 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/instruction.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/instruction.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json index 7b2b21ac150f..7b2b21ac150f 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/memory.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/memory.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json index f9fae15f7555..f9fae15f7555 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/pipeline.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/pipeline.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json index 20f2165c85fe..20f2165c85fe 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/spe.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/spe.json diff --git a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json index 3116135c59e2..3116135c59e2 100644 --- a/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2/trace.json +++ b/tools/perf/pmu-events/arch/arm64/arm/neoverse-n2-v2/trace.json diff --git a/tools/perf/pmu-events/arch/arm64/mapfile.csv b/tools/perf/pmu-events/arch/arm64/mapfile.csv index ad502d00f460..f134e833c069 100644 --- a/tools/perf/pmu-events/arch/arm64/mapfile.csv +++ b/tools/perf/pmu-events/arch/arm64/mapfile.csv @@ -34,7 +34,8 @@ 0x00000000410fd460,v1,arm/cortex-a510,core 0x00000000410fd470,v1,arm/cortex-a710,core 0x00000000410fd480,v1,arm/cortex-x2,core -0x00000000410fd490,v1,arm/neoverse-n2,core +0x00000000410fd490,v1,arm/neoverse-n2-v2,core +0x00000000410fd4f0,v1,arm/neoverse-n2-v2,core 0x00000000420f5160,v1,cavium/thunderx2,core 0x00000000430f0af0,v1,cavium/thunderx2,core 0x00000000460f0010,v1,fujitsu/a64fx,core diff --git a/tools/perf/pmu-events/arch/riscv/mapfile.csv b/tools/perf/pmu-events/arch/riscv/mapfile.csv new file mode 100644 index 000000000000..c61b3d6ef616 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/mapfile.csv @@ -0,0 +1,17 @@ +# Format: +# MVENDORID-MARCHID-MIMPID,Version,JSON/file/pathname,Type +# +# where +# MVENDORID JEDEC code of the core provider +# MARCHID base microarchitecture of the hart +# MIMPID unique encoding of the version +# of the processor implementation +# Version could be used to track version of JSON file +# but currently unused. +# JSON/file/pathname is the path to JSON file, relative +# to tools/perf/pmu-events/arch/riscv/. +# Type is core, uncore etc +# +# +#MVENDORID-MARCHID-MIMPID,Version,Filename,EventType +0x489-0x8000000000000007-0x[[:xdigit:]]+,v1,sifive/u74,core diff --git a/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json new file mode 100644 index 000000000000..a9939823b14b --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/riscv-sbi-firmware.json @@ -0,0 +1,134 @@ +[ + { + "PublicDescription": "Misaligned load trap", + "ConfigCode": "0x8000000000000000", + "EventName": "FW_MISALIGNED_LOAD", + "BriefDescription": "Misaligned load trap event" + }, + { + "PublicDescription": "Misaligned store trap", + "ConfigCode": "0x8000000000000001", + "EventName": "FW_MISALIGNED_STORE", + "BriefDescription": "Misaligned store trap event" + }, + { + "PublicDescription": "Load access trap", + "ConfigCode": "0x8000000000000002", + "EventName": "FW_ACCESS_LOAD", + "BriefDescription": "Load access trap event" + }, + { + "PublicDescription": "Store access trap", + "ConfigCode": "0x8000000000000003", + "EventName": "FW_ACCESS_STORE", + "BriefDescription": "Store access trap event" + }, + { + "PublicDescription": "Illegal instruction trap", + "ConfigCode": "0x8000000000000004", + "EventName": "FW_ILLEGAL_INSN", + "BriefDescription": "Illegal instruction trap event" + }, + { + "PublicDescription": "Set timer event", + "ConfigCode": "0x8000000000000005", + "EventName": "FW_SET_TIMER", + "BriefDescription": "Set timer event" + }, + { + "PublicDescription": "Sent IPI to other HART event", + "ConfigCode": "0x8000000000000006", + "EventName": "FW_IPI_SENT", + "BriefDescription": "Sent IPI to other HART event" + }, + { + "PublicDescription": "Received IPI from other HART event", + "ConfigCode": "0x8000000000000007", + "EventName": "FW_IPI_RECEIVED", + "BriefDescription": "Received IPI from other HART event" + }, + { + "PublicDescription": "Sent FENCE.I request to other HART event", + "ConfigCode": "0x8000000000000008", + "EventName": "FW_FENCE_I_SENT", + "BriefDescription": "Sent FENCE.I request to other HART event" + }, + { + "PublicDescription": "Received FENCE.I request from other HART event", + "ConfigCode": "0x8000000000000009", + "EventName": "FW_FENCE_I_RECEIVED", + "BriefDescription": "Received FENCE.I request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA request to other HART event", + "ConfigCode": "0x800000000000000a", + "EventName": "FW_SFENCE_VMA_SENT", + "BriefDescription": "Sent SFENCE.VMA request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA request from other HART event", + "ConfigCode": "0x800000000000000b", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Received SFENCE.VMA request from other HART event" + }, + { + "PublicDescription": "Sent SFENCE.VMA with ASID request to other HART event", + "ConfigCode": "0x800000000000000c", + "EventName": "FW_SFENCE_VMA_RECEIVED", + "BriefDescription": "Sent SFENCE.VMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received SFENCE.VMA with ASID request from other HART event", + "ConfigCode": "0x800000000000000d", + "EventName": "FW_SFENCE_VMA_ASID_RECEIVED", + "BriefDescription": "Received SFENCE.VMA with ASID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA request to other HART event", + "ConfigCode": "0x800000000000000e", + "EventName": "FW_HFENCE_GVMA_SENT", + "BriefDescription": "Sent HFENCE.GVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA request from other HART event", + "ConfigCode": "0x800000000000000f", + "EventName": "FW_HFENCE_GVMA_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.GVMA with VMID request to other HART event", + "ConfigCode": "0x8000000000000010", + "EventName": "FW_HFENCE_GVMA_VMID_SENT", + "BriefDescription": "Sent HFENCE.GVMA with VMID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.GVMA with VMID request from other HART event", + "ConfigCode": "0x8000000000000011", + "EventName": "FW_HFENCE_GVMA_VMID_RECEIVED", + "BriefDescription": "Received HFENCE.GVMA with VMID request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA request to other HART event", + "ConfigCode": "0x8000000000000012", + "EventName": "FW_HFENCE_VVMA_SENT", + "BriefDescription": "Sent HFENCE.VVMA request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA request from other HART event", + "ConfigCode": "0x8000000000000013", + "EventName": "FW_HFENCE_VVMA_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA request from other HART event" + }, + { + "PublicDescription": "Sent HFENCE.VVMA with ASID request to other HART event", + "ConfigCode": "0x8000000000000014", + "EventName": "FW_HFENCE_VVMA_ASID_SENT", + "BriefDescription": "Sent HFENCE.VVMA with ASID request to other HART event" + }, + { + "PublicDescription": "Received HFENCE.VVMA with ASID request from other HART event", + "ConfigCode": "0x8000000000000015", + "EventName": "FW_HFENCE_VVMA_ASID_RECEIVED", + "BriefDescription": "Received HFENCE.VVMA with ASID request from other HART event" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json new file mode 100644 index 000000000000..9b4a032186a7 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/firmware.json @@ -0,0 +1,68 @@ +[ + { + "ArchStdEvent": "FW_MISALIGNED_LOAD" + }, + { + "ArchStdEvent": "FW_MISALIGNED_STORE" + }, + { + "ArchStdEvent": "FW_ACCESS_LOAD" + }, + { + "ArchStdEvent": "FW_ACCESS_STORE" + }, + { + "ArchStdEvent": "FW_ILLEGAL_INSN" + }, + { + "ArchStdEvent": "FW_SET_TIMER" + }, + { + "ArchStdEvent": "FW_IPI_SENT" + }, + { + "ArchStdEvent": "FW_IPI_RECEIVED" + }, + { + "ArchStdEvent": "FW_FENCE_I_SENT" + }, + { + "ArchStdEvent": "FW_FENCE_I_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_SENT" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_SFENCE_VMA_ASID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_GVMA_VMID_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_RECEIVED" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_SENT" + }, + { + "ArchStdEvent": "FW_HFENCE_VVMA_ASID_RECEIVED" + } +] diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json new file mode 100644 index 000000000000..5eab718c9256 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/instructions.json @@ -0,0 +1,92 @@ +[ + { + "EventName": "EXCEPTION_TAKEN", + "EventCode": "0x0000100", + "BriefDescription": "Exception taken" + }, + { + "EventName": "INTEGER_LOAD_RETIRED", + "EventCode": "0x0000200", + "BriefDescription": "Integer load instruction retired" + }, + { + "EventName": "INTEGER_STORE_RETIRED", + "EventCode": "0x0000400", + "BriefDescription": "Integer store instruction retired" + }, + { + "EventName": "ATOMIC_MEMORY_RETIRED", + "EventCode": "0x0000800", + "BriefDescription": "Atomic memory operation retired" + }, + { + "EventName": "SYSTEM_INSTRUCTION_RETIRED", + "EventCode": "0x0001000", + "BriefDescription": "System instruction retired" + }, + { + "EventName": "INTEGER_ARITHMETIC_RETIRED", + "EventCode": "0x0002000", + "BriefDescription": "Integer arithmetic instruction retired" + }, + { + "EventName": "CONDITIONAL_BRANCH_RETIRED", + "EventCode": "0x0004000", + "BriefDescription": "Conditional branch retired" + }, + { + "EventName": "JAL_INSTRUCTION_RETIRED", + "EventCode": "0x0008000", + "BriefDescription": "JAL instruction retired" + }, + { + "EventName": "JALR_INSTRUCTION_RETIRED", + "EventCode": "0x0010000", + "BriefDescription": "JALR instruction retired" + }, + { + "EventName": "INTEGER_MULTIPLICATION_RETIRED", + "EventCode": "0x0020000", + "BriefDescription": "Integer multiplication instruction retired" + }, + { + "EventName": "INTEGER_DIVISION_RETIRED", + "EventCode": "0x0040000", + "BriefDescription": "Integer division instruction retired" + }, + { + "EventName": "FP_LOAD_RETIRED", + "EventCode": "0x0080000", + "BriefDescription": "Floating-point load instruction retired" + }, + { + "EventName": "FP_STORE_RETIRED", + "EventCode": "0x0100000", + "BriefDescription": "Floating-point store instruction retired" + }, + { + "EventName": "FP_ADDITION_RETIRED", + "EventCode": "0x0200000", + "BriefDescription": "Floating-point addition retired" + }, + { + "EventName": "FP_MULTIPLICATION_RETIRED", + "EventCode": "0x0400000", + "BriefDescription": "Floating-point multiplication retired" + }, + { + "EventName": "FP_FUSEDMADD_RETIRED", + "EventCode": "0x0800000", + "BriefDescription": "Floating-point fused multiply-add retired" + }, + { + "EventName": "FP_DIV_SQRT_RETIRED", + "EventCode": "0x1000000", + "BriefDescription": "Floating-point division or square-root retired" + }, + { + "EventName": "OTHER_FP_RETIRED", + "EventCode": "0x2000000", + "BriefDescription": "Other floating-point instruction retired" + } +]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json new file mode 100644 index 000000000000..be1a46312ac3 --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/memory.json @@ -0,0 +1,32 @@ +[ + { + "EventName": "ICACHE_RETIRED", + "EventCode": "0x0000102", + "BriefDescription": "Instruction cache miss" + }, + { + "EventName": "DCACHE_MISS_MMIO_ACCESSES", + "EventCode": "0x0000202", + "BriefDescription": "Data cache miss or memory-mapped I/O access" + }, + { + "EventName": "DCACHE_WRITEBACK", + "EventCode": "0x0000402", + "BriefDescription": "Data cache write-back" + }, + { + "EventName": "INST_TLB_MISS", + "EventCode": "0x0000802", + "BriefDescription": "Instruction TLB miss" + }, + { + "EventName": "DATA_TLB_MISS", + "EventCode": "0x0001002", + "BriefDescription": "Data TLB miss" + }, + { + "EventName": "UTLB_MISS", + "EventCode": "0x0002002", + "BriefDescription": "UTLB miss" + } +]
\ No newline at end of file diff --git a/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json new file mode 100644 index 000000000000..50ffa55418cb --- /dev/null +++ b/tools/perf/pmu-events/arch/riscv/sifive/u74/microarch.json @@ -0,0 +1,57 @@ +[ + { + "EventName": "ADDRESSGEN_INTERLOCK", + "EventCode": "0x0000101", + "BriefDescription": "Address-generation interlock" + }, + { + "EventName": "LONGLAT_INTERLOCK", + "EventCode": "0x0000201", + "BriefDescription": "Long-latency interlock" + }, + { + "EventName": "CSR_READ_INTERLOCK", + "EventCode": "0x0000401", + "BriefDescription": "CSR read interlock" + }, + { + "EventName": "ICACHE_ITIM_BUSY", + "EventCode": "0x0000801", + "BriefDescription": "Instruction cache/ITIM busy" + }, + { + "EventName": "DCACHE_DTIM_BUSY", + "EventCode": "0x0001001", + "BriefDescription": "Data cache/DTIM busy" + }, + { + "EventName": "BRANCH_DIRECTION_MISPREDICTION", + "EventCode": "0x0002001", + "BriefDescription": "Branch direction misprediction" + }, + { + "EventName": "BRANCH_TARGET_MISPREDICTION", + "EventCode": "0x0004001", + "BriefDescription": "Branch/jump target misprediction" + }, + { + "EventName": "PIPE_FLUSH_CSR_WRITE", + "EventCode": "0x0008001", + "BriefDescription": "Pipeline flush from CSR write" + }, + { + "EventName": "PIPE_FLUSH_OTHER_EVENT", + "EventCode": "0x0010001", + "BriefDescription": "Pipeline flush from other event" + }, + { + "EventName": "INTEGER_MULTIPLICATION_INTERLOCK", + "EventCode": "0x0020001", + "BriefDescription": "Integer multiplication interlock" + }, + { + "EventName": "FP_INTERLOCK", + "EventCode": "0x0040001", + "BriefDescription": "Floating-point interlock" + } +]
\ No newline at end of file diff --git a/tools/perf/scripts/python/intel-pt-events.py b/tools/perf/scripts/python/intel-pt-events.py index 6be7fd8fd615..08862a2582f4 100644 --- a/tools/perf/scripts/python/intel-pt-events.py +++ b/tools/perf/scripts/python/intel-pt-events.py @@ -13,10 +13,12 @@ from __future__ import print_function +import io import os import sys import struct import argparse +import contextlib from libxed import LibXED from ctypes import create_string_buffer, addressof @@ -39,6 +41,11 @@ glb_src = False glb_source_file_name = None glb_line_number = None glb_dso = None +glb_stash_dict = {} +glb_output = None +glb_output_pos = 0 +glb_cpu = -1 +glb_time = 0 def get_optional_null(perf_dict, field): if field in perf_dict: @@ -70,6 +77,7 @@ def trace_begin(): ap.add_argument("--insn-trace", action='store_true') ap.add_argument("--src-trace", action='store_true') ap.add_argument("--all-switch-events", action='store_true') + ap.add_argument("--interleave", type=int, nargs='?', const=4, default=0) global glb_args global glb_insn global glb_src @@ -94,11 +102,39 @@ def trace_begin(): perf_set_itrace_options(perf_script_context, itrace) def trace_end(): + if glb_args.interleave: + flush_stashed_output() print("End") def trace_unhandled(event_name, context, event_fields_dict): print(' '.join(['%s=%s'%(k,str(v))for k,v in sorted(event_fields_dict.items())])) +def stash_output(): + global glb_stash_dict + global glb_output_pos + output_str = glb_output.getvalue()[glb_output_pos:] + n = len(output_str) + if n: + glb_output_pos += n + if glb_cpu not in glb_stash_dict: + glb_stash_dict[glb_cpu] = [] + glb_stash_dict[glb_cpu].append(output_str) + +def flush_stashed_output(): + global glb_stash_dict + while glb_stash_dict: + cpus = list(glb_stash_dict.keys()) + # Output at most glb_args.interleave output strings per cpu + for cpu in cpus: + items = glb_stash_dict[cpu] + countdown = glb_args.interleave + while len(items) and countdown: + sys.stdout.write(items[0]) + del items[0] + countdown -= 1 + if not items: + del glb_stash_dict[cpu] + def print_ptwrite(raw_buf): data = struct.unpack_from("<IQ", raw_buf) flags = data[0] @@ -375,15 +411,40 @@ def do_process_event(param_dict): print_common_start(comm, sample, name) print_common_ip(param_dict, sample, symbol, dso) +def interleave_events(param_dict): + global glb_cpu + global glb_time + global glb_output + global glb_output_pos + + sample = param_dict["sample"] + glb_cpu = sample["cpu"] + ts = sample["time"] + + if glb_time != ts: + glb_time = ts + flush_stashed_output() + + glb_output_pos = 0 + with contextlib.redirect_stdout(io.StringIO()) as glb_output: + do_process_event(param_dict) + + stash_output() + def process_event(param_dict): try: - do_process_event(param_dict) + if glb_args.interleave: + interleave_events(param_dict) + else: + do_process_event(param_dict) except broken_pipe_exception: # Stop python printing broken pipe errors and traceback sys.stdout = open(os.devnull, 'w') sys.exit(1) def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): + if glb_args.interleave: + flush_stashed_output() if len(x) >= 2 and x[0]: machine_pid = x[0] vcpu = x[1] @@ -403,6 +464,8 @@ def auxtrace_error(typ, code, cpu, pid, tid, ip, ts, msg, cpumode, *x): sys.exit(1) def context_switch(ts, cpu, pid, tid, np_pid, np_tid, machine_pid, out, out_preempt, *x): + if glb_args.interleave: + flush_stashed_output() if out: out_str = "Switch out " else: diff --git a/tools/perf/tests/cpumap.c b/tools/perf/tests/cpumap.c index 7c873c6ae3eb..3150fc1fed6f 100644 --- a/tools/perf/tests/cpumap.c +++ b/tools/perf/tests/cpumap.c @@ -6,7 +6,7 @@ #include "util/synthetic-events.h" #include <string.h> #include <linux/bitops.h> -#include <perf/cpumap.h> +#include <internal/cpumap.h> #include "debug.h" struct machine; diff --git a/tools/perf/tests/dlfilter-test.c b/tools/perf/tests/dlfilter-test.c index 84352d55347d..99aa72e425e4 100644 --- a/tools/perf/tests/dlfilter-test.c +++ b/tools/perf/tests/dlfilter-test.c @@ -33,6 +33,7 @@ #include "archinsn.h" #include "dlfilter.h" #include "tests.h" +#include "util/sample.h" #define MAP_START 0x400000 diff --git a/tools/perf/tests/expr.c b/tools/perf/tests/expr.c index 6512f5e22045..b6667501ebb4 100644 --- a/tools/perf/tests/expr.c +++ b/tools/perf/tests/expr.c @@ -2,6 +2,7 @@ #include "util/cputopo.h" #include "util/debug.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/header.h" #include "util/smt.h" #include "tests.h" diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 8322fc2295fa..e68ca6229756 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -5,11 +5,13 @@ #include <perf/cpumap.h> #include "debug.h" +#include "event.h" #include "evlist.h" #include "evsel.h" #include "thread_map.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include <linux/err.h> #include <linux/kernel.h> #include <linux/string.h> diff --git a/tools/perf/tests/openat-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index a7b2800652e4..888df8eca981 100644 --- a/tools/perf/tests/openat-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -14,6 +14,7 @@ #include "util/mmap.h" #include <errno.h> #include <perf/mmap.h> +#include "util/sample.h" #ifndef O_DIRECTORY #define O_DIRECTORY 00200000 diff --git a/tools/perf/tests/openat-syscall.c b/tools/perf/tests/openat-syscall.c index 7e05b8b5cc95..131b62271bfa 100644 --- a/tools/perf/tests/openat-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -7,6 +7,7 @@ #include <sys/types.h> #include <sys/stat.h> #include <fcntl.h> +#include <unistd.h> #include "thread_map.h" #include "evsel.h" #include "debug.h" diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 459afdb256a1..3440dd2616b0 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -2237,6 +2237,19 @@ static int test__pmu_events(struct test_suite *test __maybe_unused, int subtest pr_debug("Test PMU event failed for '%s'", name); ret = combine_test_results(ret, test_ret); } + /* + * Names containing '-' are recognized as prefixes and suffixes + * due to '-' being a legacy PMU separator. This fails when the + * prefix or suffix collides with an existing legacy token. For + * example, branch-brs has a prefix (branch) that collides with + * a PE_NAME_CACHE_TYPE token causing a parse error as a suffix + * isn't expected after this. As event names in the config + * slashes are allowed a '-' in the name we check this works + * above. + */ + if (strchr(ent->d_name, '-')) + continue; + snprintf(name, sizeof(name), "%s:u,cpu/event=%s/u", ent->d_name, ent->d_name); e.name = name; e.check = test__checkevent_pmu_events_mix; diff --git a/tools/perf/tests/parse-metric.c b/tools/perf/tests/parse-metric.c index 68f5a2a03242..21b7ac00d798 100644 --- a/tools/perf/tests/parse-metric.c +++ b/tools/perf/tests/parse-metric.c @@ -103,7 +103,7 @@ static int __compute_metric(const char *name, struct value *vals, if (err) goto out; - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out; diff --git a/tools/perf/tests/parse-no-sample-id-all.c b/tools/perf/tests/parse-no-sample-id-all.c index d62e31595ab2..202f0a9a6796 100644 --- a/tools/perf/tests/parse-no-sample-id-all.c +++ b/tools/perf/tests/parse-no-sample-id-all.c @@ -8,6 +8,7 @@ #include "evlist.h" #include "header.h" #include "debug.h" +#include "util/sample.h" static int process_event(struct evlist **pevlist, union perf_event *event) { diff --git a/tools/perf/tests/perf-record.c b/tools/perf/tests/perf-record.c index 7aa946aa886d..1c4feec1adff 100644 --- a/tools/perf/tests/perf-record.c +++ b/tools/perf/tests/perf-record.c @@ -5,12 +5,14 @@ #include <sched.h> #include <perf/mmap.h> +#include "event.h" #include "evlist.h" #include "evsel.h" #include "debug.h" #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" static int sched__get_first_possible_cpu(pid_t pid, cpu_set_t *maskp) { diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index c3aaa1ddff29..efcd71c2738a 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -20,6 +20,7 @@ #include "tsc.h" #include "mmap.h" #include "tests.h" +#include "util/sample.h" /* * Except x86_64/i386 and Arm64, other archs don't support TSC in perf. Just diff --git a/tools/perf/tests/pmu-events.c b/tools/perf/tests/pmu-events.c index 097e05c796ab..f7b9dbbad97f 100644 --- a/tools/perf/tests/pmu-events.c +++ b/tools/perf/tests/pmu-events.c @@ -12,6 +12,7 @@ #include <perf/evlist.h> #include "util/evlist.h" #include "util/expr.h" +#include "util/hashmap.h" #include "util/parse-events.h" #include "metricgroup.h" #include "stat.h" @@ -889,7 +890,7 @@ static int test__parsing_callback(const struct pmu_event *pe, const struct pmu_e goto out_err; } - err = evlist__alloc_stats(evlist, false); + err = evlist__alloc_stats(/*config=*/NULL, evlist, /*alloc_raw=*/false); if (err) goto out_err; /* diff --git a/tools/perf/tests/shell/lock_contention.sh b/tools/perf/tests/shell/lock_contention.sh index 04bf604e3c6f..f7bd0d8eb5c3 100755 --- a/tools/perf/tests/shell/lock_contention.sh +++ b/tools/perf/tests/shell/lock_contention.sh @@ -53,7 +53,7 @@ test_bpf() if ! perf lock con -b true > /dev/null 2>&1 ; then echo "[Skip] No BPF support" - exit + return fi # the perf lock contention output goes to the stderr @@ -65,9 +65,22 @@ test_bpf() fi } +test_record_concurrent() +{ + echo "Testing perf lock record and perf lock contention at the same time" + perf lock record -o- -- perf bench sched messaging 2> /dev/null | \ + perf lock contention -i- -E 1 -q 2> ${result} + if [ $(cat "${result}" | wc -l) != "1" ]; then + echo "[Fail] Recorded result count is not 1:" $(cat "${result}" | wc -l) + err=1 + exit + fi +} + check test_record test_bpf +test_record_concurrent exit ${err} diff --git a/tools/perf/tests/shell/record.sh b/tools/perf/tests/shell/record.sh index 301f95427159..e93b3a8871fe 100755 --- a/tools/perf/tests/shell/record.sh +++ b/tools/perf/tests/shell/record.sh @@ -4,67 +4,140 @@ set -e +shelldir=$(dirname "$0") +. "${shelldir}"/lib/waiting.sh + err=0 perfdata=$(mktemp /tmp/__perf_test.perf.data.XXXXX) +testprog=$(mktemp /tmp/__perf_test.prog.XXXXXX) +testsym="test_loop" cleanup() { - rm -f ${perfdata} - rm -f ${perfdata}.old - trap - exit term int + rm -rf "${perfdata}" + rm -rf "${perfdata}".old + + if [ "${testprog}" != "true" ]; then + rm -f "${testprog}" + fi + + trap - EXIT TERM INT } trap_cleanup() { cleanup exit 1 } -trap trap_cleanup exit term int +trap trap_cleanup EXIT TERM INT + +build_test_program() { + if ! [ -x "$(command -v cc)" ]; then + # No CC found. Fall back to 'true' + testprog=true + testsym=true + return + fi + + echo "Build a test program" + cat <<EOF | cc -o ${testprog} -xc - -pthread +#include <stdio.h> +#include <stdlib.h> +#include <pthread.h> + +void test_loop(void) { + volatile int count = 1000000; + + while (count--) + continue; +} + +void *thfunc(void *arg) { + int forever = *(int *)arg; + + do { + test_loop(); + } while (forever); + + return NULL; +} + +int main(int argc, char *argv[]) { + pthread_t th; + int forever = 0; + + if (argc > 1) + forever = atoi(argv[1]); + + pthread_create(&th, NULL, thfunc, &forever); + test_loop(); + pthread_join(th, NULL); + + return 0; +} +EOF +} test_per_thread() { echo "Basic --per-thread mode test" - if ! perf record -e instructions:u -o ${perfdata} --quiet true 2> /dev/null + if ! perf record -o /dev/null --quiet ${testprog} 2> /dev/null then - echo "Per-thread record [Skipped instructions:u not supported]" - if [ $err -ne 1 ] - then - err=2 - fi + echo "Per-thread record [Skipped event not supported]" return fi - if ! perf record -e instructions:u --per-thread -o ${perfdata} true 2> /dev/null + if ! perf record --per-thread -o "${perfdata}" ${testprog} 2> /dev/null then - echo "Per-thread record of instructions:u [Failed]" + echo "Per-thread record [Failed record]" err=1 return fi - if ! perf report -i ${perfdata} -q | egrep -q true + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" then echo "Per-thread record [Failed missing output]" err=1 return fi + + # run the test program in background (forever) + ${testprog} 1 & + TESTPID=$! + + rm -f "${perfdata}" + + wait_for_threads ${TESTPID} 2 + perf record -p "${TESTPID}" --per-thread -o "${perfdata}" sleep 1 2> /dev/null + kill ${TESTPID} + + if [ ! -e "${perfdata}" ] + then + echo "Per-thread record [Failed record -p]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Per-thread record [Failed -p missing output]" + err=1 + return + fi + echo "Basic --per-thread mode test [Success]" } test_register_capture() { echo "Register capture test" - if ! perf list | egrep -q 'br_inst_retired.near_call' + if ! perf list | grep -q 'br_inst_retired.near_call' then - echo "Register capture test [Skipped missing instruction]" - if [ $err -ne 1 ] - then - err=2 - fi + echo "Register capture test [Skipped missing event]" return fi - if ! perf record --intr-regs=\? 2>&1 | egrep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' + if ! perf record --intr-regs=\? 2>&1 | grep -q 'available registers: AX BX CX DX SI DI BP SP IP FLAGS CS SS R8 R9 R10 R11 R12 R13 R14 R15' then echo "Register capture test [Skipped missing registers]" return fi if ! perf record -o - --intr-regs=di,r8,dx,cx -e br_inst_retired.near_call:p \ - -c 1000 --per-thread true 2> /dev/null \ + -c 1000 --per-thread ${testprog} 2> /dev/null \ | perf script -F ip,sym,iregs -i - 2> /dev/null \ - | egrep -q "DI:" + | grep -q "DI:" then echo "Register capture test [Failed missing output]" err=1 @@ -73,8 +146,71 @@ test_register_capture() { echo "Register capture test [Success]" } +test_system_wide() { + echo "Basic --system-wide mode test" + if ! perf record -aB --synth=no -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Skipped not supported]" + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed missing output]" + err=1 + return + fi + if ! perf record -aB --synth=no -e cpu-clock,cs --threads=cpu \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "System-wide record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "System-wide record [Failed --threads missing output]" + err=1 + return + fi + echo "Basic --system-wide mode test [Success]" +} + +test_workload() { + echo "Basic target workload test" + if ! perf record -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed missing output]" + err=1 + return + fi + if ! perf record -e cpu-clock,cs --threads=package \ + -o "${perfdata}" ${testprog} 2> /dev/null + then + echo "Workload record [Failed record --threads option]" + err=1 + return + fi + if ! perf report -i "${perfdata}" -q | grep -q "${testsym}" + then + echo "Workload record [Failed --threads missing output]" + err=1 + return + fi + echo "Basic target workload test [Success]" +} + +build_test_program + test_per_thread test_register_capture +test_system_wide +test_workload cleanup exit $err diff --git a/tools/perf/tests/sw-clock.c b/tools/perf/tests/sw-clock.c index 9cd6fec375ee..4d7493fa0105 100644 --- a/tools/perf/tests/sw-clock.c +++ b/tools/perf/tests/sw-clock.c @@ -13,6 +13,7 @@ #include "util/evlist.h" #include "util/cpumap.h" #include "util/mmap.h" +#include "util/sample.h" #include "util/thread_map.h" #include <perf/evlist.h> #include <perf/mmap.h> diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index 87f565c7f650..b3bd14b025a8 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -19,6 +19,7 @@ #include "record.h" #include "tests.h" #include "util/mmap.h" +#include "util/sample.h" #include "pmu.h" static int spin_sleep(void) diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c index e413c1387fcb..74308c1368fe 100644 --- a/tools/perf/tests/thread-map.c +++ b/tools/perf/tests/thread-map.c @@ -11,6 +11,7 @@ #include "util/synthetic-events.h" #include <linux/zalloc.h> #include <perf/event.h> +#include <internal/threadmap.h> struct perf_sample; struct perf_tool; diff --git a/tools/perf/trace/beauty/Build b/tools/perf/trace/beauty/Build index 433dc39053a7..d11ce256f511 100644 --- a/tools/perf/trace/beauty/Build +++ b/tools/perf/trace/beauty/Build @@ -17,4 +17,5 @@ perf-y += sockaddr.o perf-y += socket.o perf-y += statx.o perf-y += sync_file_range.o +perf-y += timespec.o perf-y += tracepoints/ diff --git a/tools/perf/trace/beauty/beauty.h b/tools/perf/trace/beauty/beauty.h index f527a46ab4e7..4c59edddd6a8 100644 --- a/tools/perf/trace/beauty/beauty.h +++ b/tools/perf/trace/beauty/beauty.h @@ -244,6 +244,9 @@ size_t syscall_arg__scnprintf_statx_mask(char *bf, size_t size, struct syscall_a size_t syscall_arg__scnprintf_sync_file_range_flags(char *bf, size_t size, struct syscall_arg *arg); #define SCA_SYNC_FILE_RANGE_FLAGS syscall_arg__scnprintf_sync_file_range_flags +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg); +#define SCA_TIMESPEC syscall_arg__scnprintf_timespec + size_t open__scnprintf_flags(unsigned long flags, char *bf, size_t size, bool show_prefix); void syscall_arg__set_ret_scnprintf(struct syscall_arg *arg, diff --git a/tools/perf/trace/beauty/perf_event_open.c b/tools/perf/trace/beauty/perf_event_open.c index 11d47dbe63bd..01ee15fe9d0c 100644 --- a/tools/perf/trace/beauty/perf_event_open.c +++ b/tools/perf/trace/beauty/perf_event_open.c @@ -44,3 +44,47 @@ static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, } #define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags + +struct attr_fprintf_args { + size_t size, printed; + char *bf; + bool first; +}; + +static int attr__fprintf(FILE *fp __maybe_unused, const char *name, const char *val, void *priv) +{ + struct attr_fprintf_args *args = priv; + size_t printed = scnprintf(args->bf + args->printed , args->size - args->printed, "%s%s: %s", args->first ? "" : ", ", name, val); + + args->first = false; + args->printed += printed; + return printed; +} + +static size_t perf_event_attr___scnprintf(struct perf_event_attr *attr, char *bf, size_t size, bool show_zeros __maybe_unused) +{ + struct attr_fprintf_args args = { + .printed = scnprintf(bf, size, "{ "), + .size = size, + .first = true, + .bf = bf, + }; + + perf_event_attr__fprintf(stdout, attr, attr__fprintf, &args); + return args.printed + scnprintf(bf + args.printed, size - args.printed, " }"); +} + +static size_t syscall_arg__scnprintf_augmented_perf_event_attr(struct syscall_arg *arg, char *bf, size_t size) +{ + return perf_event_attr___scnprintf((void *)arg->augmented.args, bf, size, arg->trace->show_zeros); +} + +static size_t syscall_arg__scnprintf_perf_event_attr(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_perf_event_attr(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} + +#define SCA_PERF_ATTR syscall_arg__scnprintf_perf_event_attr diff --git a/tools/perf/trace/beauty/timespec.c b/tools/perf/trace/beauty/timespec.c new file mode 100644 index 000000000000..e1a61f092aad --- /dev/null +++ b/tools/perf/trace/beauty/timespec.c @@ -0,0 +1,21 @@ +// SPDX-License-Identifier: LGPL-2.1 +// Copyright (C) 2022, Red Hat Inc, Arnaldo Carvalho de Melo <[email protected]> + +#include "trace/beauty/beauty.h" +#include <inttypes.h> +#include <time.h> + +static size_t syscall_arg__scnprintf_augmented_timespec(struct syscall_arg *arg, char *bf, size_t size) +{ + struct timespec *ts = (struct timespec *)arg->augmented.args; + + return scnprintf(bf, size, "{ .tv_sec: %" PRIu64 ", .tv_nsec: %" PRIu64 " }", ts->tv_sec, ts->tv_nsec); +} + +size_t syscall_arg__scnprintf_timespec(char *bf, size_t size, struct syscall_arg *arg) +{ + if (arg->augmented.args) + return syscall_arg__scnprintf_augmented_timespec(arg, bf, size); + + return scnprintf(bf, size, "%#lx", arg->val); +} diff --git a/tools/perf/ui/util.c b/tools/perf/ui/util.c index 689b27c34246..1d38ddf01b60 100644 --- a/tools/perf/ui/util.c +++ b/tools/perf/ui/util.c @@ -15,6 +15,9 @@ static int perf_stdio__error(const char *format, va_list args) static int perf_stdio__warning(const char *format, va_list args) { + if (quiet) + return 0; + fprintf(stderr, "Warning:\n"); vfprintf(stderr, format, args); return 0; @@ -45,6 +48,8 @@ int ui__warning(const char *format, ...) { int ret; va_list args; + if (quiet) + return 0; va_start(args, format); ret = perf_eops->warning(format, args); diff --git a/tools/perf/util/Build b/tools/perf/util/Build index e315ecaec323..ab37f588ee8b 100644 --- a/tools/perf/util/Build +++ b/tools/perf/util/Build @@ -19,7 +19,6 @@ perf-y += perf_event_attr_fprintf.o perf-y += evswitch.o perf-y += find_bit.o perf-y += get_current_dir_name.o -perf-y += kallsyms.o perf-y += levenshtein.o perf-y += llvm-utils.o perf-y += mmap.o @@ -220,7 +219,7 @@ perf-$(CONFIG_CXX) += c++/ perf-$(CONFIG_LIBPFM4) += pfm.o CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" -CFLAGS_llvm-utils.o += -DPERF_INCLUDE_DIR="BUILD_STR($(perf_include_dir_SQ))" +CFLAGS_llvm-utils.o += -DLIBBPF_INCLUDE_DIR="BUILD_STR($(libbpf_include_dir_SQ))" # avoid compiler warnings in 32-bit mode CFLAGS_genelf_debug.o += -Wno-packed @@ -294,10 +293,6 @@ CFLAGS_expr.o += -Wno-redundant-decls CFLAGS_header.o += -include $(OUTPUT)PERF-VERSION-FILE CFLAGS_arm-spe.o += -I$(srctree)/tools/arch/arm64/include/ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE - $(call rule_mkdir) - $(call if_changed_dep,cc_o_c) - $(OUTPUT)util/argv_split.o: ../lib/argv_split.c FORCE $(call rule_mkdir) $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/amd-sample-raw.c b/tools/perf/util/amd-sample-raw.c index 238305868644..b0e70ce9d87a 100644 --- a/tools/perf/util/amd-sample-raw.c +++ b/tools/perf/util/amd-sample-raw.c @@ -16,6 +16,7 @@ #include "evlist.h" #include "sample-raw.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static u32 cpu_family, cpu_model, ibs_fetch_type, ibs_op_type; static bool zen4_ibs_extensions; diff --git a/tools/perf/util/arm64-frame-pointer-unwind-support.h b/tools/perf/util/arm64-frame-pointer-unwind-support.h index 32af9ce94398..42d3a45490f5 100644 --- a/tools/perf/util/arm64-frame-pointer-unwind-support.h +++ b/tools/perf/util/arm64-frame-pointer-unwind-support.h @@ -2,8 +2,10 @@ #ifndef __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H #define __PERF_ARM_FRAME_POINTER_UNWIND_SUPPORT_H -#include "event.h" -#include "thread.h" +#include <linux/types.h> + +struct perf_sample; +struct thread; u64 get_leaf_frame_caller_aarch64(struct perf_sample *sample, struct thread *thread, int user_idx); diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c index 46ada5ec3f9a..265d20cc126b 100644 --- a/tools/perf/util/auxtrace.c +++ b/tools/perf/util/auxtrace.c @@ -59,6 +59,7 @@ #include <linux/ctype.h> #include "symbol/kallsyms.h" #include <internal/lib.h> +#include "util/sample.h" /* * Make a group from 'leader' to 'last', requiring that the events were not diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h index 6a0f9b98f059..2cf63d377831 100644 --- a/tools/perf/util/auxtrace.h +++ b/tools/perf/util/auxtrace.h @@ -15,7 +15,7 @@ #include <linux/list.h> #include <linux/perf_event.h> #include <linux/types.h> -#include <internal/cpumap.h> +#include <perf/cpumap.h> #include <asm/bitsperlong.h> #include <asm/barrier.h> diff --git a/tools/perf/util/bpf-loader.c b/tools/perf/util/bpf-loader.c index f4adeccdbbcb..b3c8174360bf 100644 --- a/tools/perf/util/bpf-loader.c +++ b/tools/perf/util/bpf-loader.c @@ -27,11 +27,7 @@ #include "util.h" #include "llvm-utils.h" #include "c++/clang-c.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/hashmap.h> -#else #include "util/hashmap.h" -#endif #include "asm/bug.h" #include <internal/xyarray.h> diff --git a/tools/perf/util/bpf-prologue.h b/tools/perf/util/bpf-prologue.h index c50c7358009f..66dcf751ef65 100644 --- a/tools/perf/util/bpf-prologue.h +++ b/tools/perf/util/bpf-prologue.h @@ -6,9 +6,8 @@ #ifndef __BPF_PROLOGUE_H #define __BPF_PROLOGUE_H -#include <linux/compiler.h> -#include <linux/filter.h> -#include "probe-event.h" +struct probe_trace_arg; +struct bpf_insn; #define BPF_PROLOGUE_MAX_ARGS 3 #define BPF_PROLOGUE_START_ARG_REG BPF_REG_3 @@ -19,6 +18,7 @@ int bpf__gen_prologue(struct probe_trace_arg *args, int nargs, struct bpf_insn *new_prog, size_t *new_cnt, size_t cnt_space); #else +#include <linux/compiler.h> #include <errno.h> static inline int diff --git a/tools/perf/util/bpf_counter.c b/tools/perf/util/bpf_counter.c index ef1c15e4aeba..eeee899fcf34 100644 --- a/tools/perf/util/bpf_counter.c +++ b/tools/perf/util/bpf_counter.c @@ -561,7 +561,7 @@ static int bperf__load(struct evsel *evsel, struct target *target) if (filter_type == BPERF_FILTER_PID || filter_type == BPERF_FILTER_TGID) - key = evsel->core.threads->map[i].pid; + key = perf_thread_map__pid(evsel->core.threads, i); else if (filter_type == BPERF_FILTER_CPU) key = evsel->core.cpus->map[i].cpu; else diff --git a/tools/perf/util/bpf_kwork.c b/tools/perf/util/bpf_kwork.c index b629dd679d3f..6eb2c78fd7f4 100644 --- a/tools/perf/util/bpf_kwork.c +++ b/tools/perf/util/bpf_kwork.c @@ -7,15 +7,18 @@ #include <time.h> #include <fcntl.h> +#include <signal.h> #include <stdio.h> #include <unistd.h> #include <linux/time64.h> #include "util/debug.h" +#include "util/evsel.h" #include "util/kwork.h" #include <bpf/bpf.h> +#include <perf/cpumap.h> #include "util/bpf_skel/kwork_trace.skel.h" diff --git a/tools/perf/util/bpf_lock_contention.c b/tools/perf/util/bpf_lock_contention.c index fc4d613cb979..0deec1178778 100644 --- a/tools/perf/util/bpf_lock_contention.c +++ b/tools/perf/util/bpf_lock_contention.c @@ -108,28 +108,36 @@ int lock_contention_stop(void) int lock_contention_read(struct lock_contention *con) { - int fd, stack; + int fd, stack, err = 0; s32 prev_key, key; - struct lock_contention_data data; - struct lock_stat *st; + struct lock_contention_data data = {}; + struct lock_stat *st = NULL; struct machine *machine = con->machine; - u64 stack_trace[con->max_stack]; + u64 *stack_trace; + size_t stack_size = con->max_stack * sizeof(*stack_trace); fd = bpf_map__fd(skel->maps.lock_stat); stack = bpf_map__fd(skel->maps.stacks); con->lost = skel->bss->lost; + stack_trace = zalloc(stack_size); + if (stack_trace == NULL) + return -1; + prev_key = 0; while (!bpf_map_get_next_key(fd, &prev_key, &key)) { struct map *kmap; struct symbol *sym; int idx = 0; + /* to handle errors in the loop body */ + err = -1; + bpf_map_lookup_elem(fd, &key, &data); st = zalloc(sizeof(*st)); if (st == NULL) - return -1; + break; st->nr_contended = data.count; st->wait_time_total = data.total_time; @@ -163,25 +171,32 @@ int lock_contention_read(struct lock_contention *con) st->name = strdup(sym->name); if (ret < 0 || st->name == NULL) - return -1; + break; } else if (asprintf(&st->name, "%#lx", (unsigned long)st->addr) < 0) { - free(st); - return -1; + break; } if (verbose) { - st->callstack = memdup(stack_trace, sizeof(stack_trace)); - if (st->callstack == NULL) { - free(st); - return -1; - } + st->callstack = memdup(stack_trace, stack_size); + if (st->callstack == NULL) + break; } hlist_add_head(&st->hash_entry, con->result); prev_key = key; + + /* we're fine now, reset the values */ + st = NULL; + err = 0; } - return 0; + free(stack_trace); + if (st) { + free(st->name); + free(st); + } + + return err; } int lock_contention_finish(void) diff --git a/tools/perf/util/bpf_map.h b/tools/perf/util/bpf_map.h index d6abd5e47af8..c2f7c13cba23 100644 --- a/tools/perf/util/bpf_map.h +++ b/tools/perf/util/bpf_map.h @@ -3,7 +3,6 @@ #define __PERF_BPF_MAP_H 1 #include <stdio.h> -#include <linux/compiler.h> struct bpf_map; #ifdef HAVE_LIBBPF_SUPPORT @@ -12,6 +11,8 @@ int bpf_map__fprintf(struct bpf_map *map, FILE *fp); #else +#include <linux/compiler.h> + static inline int bpf_map__fprintf(struct bpf_map *map __maybe_unused, FILE *fp __maybe_unused) { return 0; diff --git a/tools/perf/util/branch.h b/tools/perf/util/branch.h index f838b23db180..d6017c9b1872 100644 --- a/tools/perf/util/branch.h +++ b/tools/perf/util/branch.h @@ -7,12 +7,10 @@ * detected in at least musl libc, used in Alpine Linux. -acme */ #include <stdio.h> -#include <stdint.h> -#include <linux/compiler.h> -#include <linux/stddef.h> #include <linux/perf_event.h> #include <linux/types.h> -#include "event.h" +#include "util/map_symbol.h" +#include "util/sample.h" struct branch_flags { union { diff --git a/tools/perf/util/config.c b/tools/perf/util/config.c index 3f2ae19a1dd4..658170b8dcef 100644 --- a/tools/perf/util/config.c +++ b/tools/perf/util/config.c @@ -556,7 +556,7 @@ static char *home_perfconfig(void) config = strdup(mkpath("%s/.perfconfig", home)); if (config == NULL) { - pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.", home); + pr_warning("Not enough memory to process %s/.perfconfig, ignoring it.\n", home); return NULL; } @@ -564,7 +564,7 @@ static char *home_perfconfig(void) goto out_free; if (st.st_uid && (st.st_uid != geteuid())) { - pr_warning("File %s not owned by current user or root, ignoring it.", config); + pr_warning("File %s not owned by current user or root, ignoring it.\n", config); goto out_free; } diff --git a/tools/perf/util/counts.c b/tools/perf/util/counts.c index 7a447d918458..11cd85b278a6 100644 --- a/tools/perf/util/counts.c +++ b/tools/perf/util/counts.c @@ -48,7 +48,6 @@ void perf_counts__reset(struct perf_counts *counts) { xyarray__reset(counts->loaded); xyarray__reset(counts->values); - memset(&counts->aggr, 0, sizeof(struct perf_counts_values)); } void evsel__reset_counts(struct evsel *evsel) diff --git a/tools/perf/util/counts.h b/tools/perf/util/counts.h index 5de275194f2b..42760242e0df 100644 --- a/tools/perf/util/counts.h +++ b/tools/perf/util/counts.h @@ -11,7 +11,6 @@ struct evsel; struct perf_counts { s8 scaled; - struct perf_counts_values aggr; struct xyarray *values; struct xyarray *loaded; }; diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index 8486ca3bec75..5e564974fba4 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -12,6 +12,7 @@ #include <linux/ctype.h> #include <linux/zalloc.h> +#include <internal/cpumap.h> static struct perf_cpu max_cpu_num; static struct perf_cpu max_present_cpu_num; @@ -234,7 +235,7 @@ static int aggr_cpu_id__cmp(const void *a_pointer, const void *b_pointer) struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data) + void *data, bool needs_sort) { int idx; struct perf_cpu cpu; @@ -270,8 +271,10 @@ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, if (trimmed_c) c = trimmed_c; } + /* ensure we process id in increasing order */ - qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); + if (needs_sort) + qsort(c->map, c->nr, sizeof(struct aggr_cpu_id), aggr_cpu_id__cmp); return c; @@ -354,6 +357,16 @@ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data __maybe_unu return id; } +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data __maybe_unused) +{ + struct aggr_cpu_id id = aggr_cpu_id__empty(); + + /* it always aggregates to the cpu 0 */ + cpu.cpu = 0; + id.cpu = cpu; + return id; +} + /* setup simple routines to easily access node numbers given a cpu number */ static int get_max_num(char *path, int *max) { diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 4a6d029576ee..c2f5824a3a22 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -4,8 +4,8 @@ #include <stdbool.h> #include <stdio.h> -#include <internal/cpumap.h> #include <perf/cpumap.h> +#include <linux/refcount.h> /** Identify where counts are aggregated, -1 implies not to aggregate. */ struct aggr_cpu_id { @@ -97,7 +97,7 @@ typedef struct aggr_cpu_id (*aggr_cpu_id_get_t)(struct perf_cpu cpu, void *data) */ struct cpu_aggr_map *cpu_aggr_map__new(const struct perf_cpu_map *cpus, aggr_cpu_id_get_t get_id, - void *data); + void *data, bool needs_sort); bool aggr_cpu_id__equal(const struct aggr_cpu_id *a, const struct aggr_cpu_id *b); bool aggr_cpu_id__is_empty(const struct aggr_cpu_id *a); @@ -133,5 +133,9 @@ struct aggr_cpu_id aggr_cpu_id__cpu(struct perf_cpu cpu, void *data); * cpu. The function signature is compatible with aggr_cpu_id_get_t. */ struct aggr_cpu_id aggr_cpu_id__node(struct perf_cpu cpu, void *data); - +/** + * aggr_cpu_id__global - Create an aggr_cpu_id for global aggregation. + * The function signature is compatible with aggr_cpu_id_get_t. + */ +struct aggr_cpu_id aggr_cpu_id__global(struct perf_cpu cpu, void *data); #endif /* __PERF_CPUMAP_H */ diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c index 9e0aee276df8..c65cdaf6975e 100644 --- a/tools/perf/util/data-convert-bt.c +++ b/tools/perf/util/data-convert-bt.c @@ -34,6 +34,7 @@ #include <linux/time64.h> #include "util.h" #include "clockid.h" +#include "util/sample.h" #define pr_N(n, fmt, ...) \ eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) diff --git a/tools/perf/util/data-convert-json.c b/tools/perf/util/data-convert-json.c index 613d6ae82663..57db59068cb6 100644 --- a/tools/perf/util/data-convert-json.c +++ b/tools/perf/util/data-convert-json.c @@ -217,6 +217,26 @@ static int process_sample_event(struct perf_tool *tool, } output_json_format(out, false, 3, "]"); + if (sample->raw_data) { + int i; + struct tep_format_field **fields; + + fields = tep_event_fields(evsel->tp_format); + if (fields) { + i = 0; + while (fields[i]) { + struct trace_seq s; + + trace_seq_init(&s); + tep_print_field(&s, sample->raw_data, fields[i]); + output_json_key_string(out, true, 3, fields[i]->name, s.buffer); + + i++; + } + free(fields); + } + } + output_json_format(out, false, 2, "}"); return 0; } diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index 609ca1671501..b07414409771 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -123,7 +123,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, if (die_find_realfunc(cu_die, addr, &die_mem) && die_entrypc(&die_mem, &faddr) == 0 && faddr == addr) { - *fname = dwarf_decl_file(&die_mem); + *fname = die_get_decl_file(&die_mem); dwarf_decl_line(&die_mem, lineno); goto out; } @@ -137,7 +137,7 @@ int cu_find_lineinfo(Dwarf_Die *cu_die, Dwarf_Addr addr, } out: - return *lineno ?: -ENOENT; + return (*lineno && *fname) ? *lineno : -ENOENT; } static int __die_find_inline_cb(Dwarf_Die *die_mem, void *data); @@ -308,26 +308,13 @@ static int die_get_attr_udata(Dwarf_Die *tp_die, unsigned int attr_name, { Dwarf_Attribute attr; - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || + if (dwarf_attr_integrate(tp_die, attr_name, &attr) == NULL || dwarf_formudata(&attr, result) != 0) return -ENOENT; return 0; } -/* Get attribute and translate it as a sdata */ -static int die_get_attr_sdata(Dwarf_Die *tp_die, unsigned int attr_name, - Dwarf_Sword *result) -{ - Dwarf_Attribute attr; - - if (dwarf_attr(tp_die, attr_name, &attr) == NULL || - dwarf_formsdata(&attr, result) != 0) - return -ENOENT; - - return 0; -} - /** * die_is_signed_type - Check whether a type DIE is signed or not * @tp_die: a DIE of a type @@ -467,9 +454,9 @@ int die_get_data_member_location(Dwarf_Die *mb_die, Dwarf_Word *offs) /* Get the call file index number in CU DIE */ static int die_get_call_fileno(Dwarf_Die *in_die) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(in_die, DW_AT_call_file, &idx) == 0) + if (die_get_attr_udata(in_die, DW_AT_call_file, &idx) == 0) return (int)idx; else return -ENOENT; @@ -478,14 +465,27 @@ static int die_get_call_fileno(Dwarf_Die *in_die) /* Get the declared file index number in CU DIE */ static int die_get_decl_fileno(Dwarf_Die *pdie) { - Dwarf_Sword idx; + Dwarf_Word idx; - if (die_get_attr_sdata(pdie, DW_AT_decl_file, &idx) == 0) + if (die_get_attr_udata(pdie, DW_AT_decl_file, &idx) == 0) return (int)idx; else return -ENOENT; } +/* Return the file name by index */ +static const char *die_get_file_name(Dwarf_Die *dw_die, int idx) +{ + Dwarf_Die cu_die; + Dwarf_Files *files; + + if (idx < 0 || !dwarf_diecu(dw_die, &cu_die, NULL, NULL) || + dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) + return NULL; + + return dwarf_filesrc(files, idx, NULL, NULL); +} + /** * die_get_call_file - Get callsite file name of inlined function instance * @in_die: a DIE of an inlined function instance @@ -495,18 +495,22 @@ static int die_get_decl_fileno(Dwarf_Die *pdie) */ const char *die_get_call_file(Dwarf_Die *in_die) { - Dwarf_Die cu_die; - Dwarf_Files *files; - int idx; - - idx = die_get_call_fileno(in_die); - if (idx < 0 || !dwarf_diecu(in_die, &cu_die, NULL, NULL) || - dwarf_getsrcfiles(&cu_die, &files, NULL) != 0) - return NULL; - - return dwarf_filesrc(files, idx, NULL, NULL); + return die_get_file_name(in_die, die_get_call_fileno(in_die)); } +/** + * die_get_decl_file - Find the declared file name of this DIE + * @dw_die: a DIE for something declared. + * + * Get declared file name of @dw_die. + * NOTE: Since some version of clang DWARF5 implementation incorrectly uses + * file index 0 for DW_AT_decl_file, die_get_decl_file() will return NULL for + * such cases. Use this function instead. + */ +const char *die_get_decl_file(Dwarf_Die *dw_die) +{ + return die_get_file_name(dw_die, die_get_decl_fileno(dw_die)); +} /** * die_find_child - Generic DIE search function in DIE tree @@ -790,7 +794,7 @@ static int __die_walk_funclines_cb(Dwarf_Die *in_die, void *data) } if (addr) { - fname = dwarf_decl_file(in_die); + fname = die_get_decl_file(in_die); if (fname && dwarf_decl_line(in_die, &lineno) == 0) { lw->retval = lw->callback(fname, lineno, addr, lw->data); if (lw->retval != 0) @@ -818,7 +822,7 @@ static int __die_walk_funclines(Dwarf_Die *sp_die, bool recursive, int lineno; /* Handle function declaration line */ - fname = dwarf_decl_file(sp_die); + fname = die_get_decl_file(sp_die); if (fname && dwarf_decl_line(sp_die, &lineno) == 0 && die_entrypc(sp_die, &addr) == 0) { lw.retval = callback(fname, lineno, addr, data); @@ -873,7 +877,12 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) if (dwarf_tag(rt_die) != DW_TAG_compile_unit) { cu_die = dwarf_diecu(rt_die, &die_mem, NULL, NULL); dwarf_decl_line(rt_die, &decl); - decf = dwarf_decl_file(rt_die); + decf = die_get_decl_file(rt_die); + if (!decf) { + pr_debug2("Failed to get the declared file name of %s\n", + dwarf_diename(rt_die)); + return -EINVAL; + } } else cu_die = rt_die; if (!cu_die) { @@ -923,7 +932,7 @@ int die_walk_lines(Dwarf_Die *rt_die, line_walk_callback_t callback, void *data) dwarf_decl_line(&die_mem, &inl); if (inl != decl || - decf != dwarf_decl_file(&die_mem)) + decf != die_get_decl_file(&die_mem)) continue; } } diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index 7ee0fa19b5c4..7ec8bc1083bb 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -50,6 +50,9 @@ int die_get_call_lineno(Dwarf_Die *in_die); /* Get callsite file name of inlined function instance */ const char *die_get_call_file(Dwarf_Die *in_die); +/* Get declared file name of a DIE */ +const char *die_get_decl_file(Dwarf_Die *dw_die); + /* Get type die */ Dwarf_Die *die_get_type(Dwarf_Die *vr_die, Dwarf_Die *die_mem); diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index 12eae6917022..6663a676eadc 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -12,11 +12,10 @@ #include <perf/event.h> #include <linux/types.h> -#include "perf_regs.h" - struct dso; struct machine; struct perf_event_attr; +struct perf_sample; #ifdef __LP64__ /* @@ -44,61 +43,6 @@ struct perf_event_attr; /* perf sample has 16 bits size limit */ #define PERF_SAMPLE_MAX_SIZE (1 << 16) -/* number of register is bound by the number of bits in regs_dump::mask (64) */ -#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) - -struct regs_dump { - u64 abi; - u64 mask; - u64 *regs; - - /* Cached values/mask filled by first register access. */ - u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; - u64 cache_mask; -}; - -struct stack_dump { - u16 offset; - u64 size; - char *data; -}; - -struct sample_read_value { - u64 value; - u64 id; /* only if PERF_FORMAT_ID */ - u64 lost; /* only if PERF_FORMAT_LOST */ -}; - -struct sample_read { - u64 time_enabled; - u64 time_running; - union { - struct { - u64 nr; - struct sample_read_value *values; - } group; - struct sample_read_value one; - }; -}; - -static inline size_t sample_read_value_size(u64 read_format) -{ - /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ - if (read_format & PERF_FORMAT_LOST) - return sizeof(struct sample_read_value); - else - return offsetof(struct sample_read_value, lost); -} - -static inline struct sample_read_value * -next_sample_read_value(struct sample_read_value *v, u64 read_format) -{ - return (void *)v + sample_read_value_size(read_format); -} - -#define sample_read_group__for_each(v, nr, rf) \ - for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) - struct ip_callchain { u64 nr; u64 ips[]; @@ -140,52 +84,6 @@ enum { PERF_IP_FLAG_VMENTRY |\ PERF_IP_FLAG_VMEXIT) -#define MAX_INSN 16 - -struct aux_sample { - u64 size; - void *data; -}; - -struct perf_sample { - u64 ip; - u32 pid, tid; - u64 time; - u64 addr; - u64 id; - u64 stream_id; - u64 period; - u64 weight; - u64 transaction; - u64 insn_cnt; - u64 cyc_cnt; - u32 cpu; - u32 raw_size; - u64 data_src; - u64 phys_addr; - u64 data_page_size; - u64 code_page_size; - u64 cgroup; - u32 flags; - u32 machine_pid; - u32 vcpu; - u16 insn_len; - u8 cpumode; - u16 misc; - u16 ins_lat; - u16 p_stage_cyc; - bool no_hw_idx; /* No hw_idx collected in branch_stack */ - char insn[MAX_INSN]; - void *raw_data; - struct ip_callchain *callchain; - struct branch_stack *branch_stack; - struct regs_dump user_regs; - struct regs_dump intr_regs; - struct stack_dump user_stack; - struct sample_read read; - struct aux_sample aux_sample; -}; - #define PERF_MEM_DATA_SRC_NONE \ (PERF_MEM_S(OP, NA) |\ PERF_MEM_S(LVL, NA) |\ @@ -344,15 +242,6 @@ struct perf_synth_intel_iflag_chg { u64 branch_ip; /* If via_branch */ }; -/* - * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get - * 8-byte alignment. - */ -static inline void *perf_sample__synth_ptr(struct perf_sample *sample) -{ - return sample->raw_data - 4; -} - static inline void *perf_synth__raw_data(void *p) { return p + 4; @@ -446,19 +335,8 @@ int perf_event__process(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine); -struct addr_location; - -int machine__resolve(struct machine *machine, struct addr_location *al, - struct perf_sample *sample); - -void addr_location__put(struct addr_location *al); - -struct thread; - bool is_bts_event(struct perf_event_attr *attr); bool sample_addr_correlates_sym(struct perf_event_attr *attr); -void thread__resolve(struct thread *thread, struct addr_location *al, - struct perf_sample *sample); const char *perf_event__name(unsigned int id); diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 6612b00949e7..fbf3192bced9 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -24,11 +24,13 @@ #include "../perf.h" #include "asm/bug.h" #include "bpf-event.h" +#include "util/event.h" #include "util/string2.h" #include "util/perf_api_probe.h" #include "util/evsel_fprintf.h" #include "util/evlist-hybrid.h" #include "util/pmu.h" +#include "util/sample.h" #include <signal.h> #include <unistd.h> #include <sched.h> diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index 76605fde3507..45f4f08399ae 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -46,17 +46,14 @@ #include "string2.h" #include "memswap.h" #include "util.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/hashmap.h> -#else #include "util/hashmap.h" -#endif #include "pmu-hybrid.h" #include "off_cpu.h" #include "../perf-sys.h" #include "util/parse-branch-options.h" #include <internal/xyarray.h> #include <internal/lib.h> +#include <internal/threadmap.h> #include <linux/ctype.h> @@ -467,6 +464,7 @@ struct evsel *evsel__clone(struct evsel *orig) evsel->collect_stat = orig->collect_stat; evsel->weak_group = orig->weak_group; evsel->use_config_name = orig->use_config_name; + evsel->pmu = orig->pmu; if (evsel__copy_config_terms(evsel, orig) < 0) goto out_err; @@ -1525,13 +1523,8 @@ void evsel__compute_deltas(struct evsel *evsel, int cpu_map_idx, int thread, if (!evsel->prev_raw_counts) return; - if (cpu_map_idx == -1) { - tmp = evsel->prev_raw_counts->aggr; - evsel->prev_raw_counts->aggr = *count; - } else { - tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); - *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; - } + tmp = *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread); + *perf_counts(evsel->prev_raw_counts, cpu_map_idx, thread) = *count; count->val = count->val - tmp.val; count->ena = count->ena - tmp.ena; @@ -1966,17 +1959,16 @@ bool evsel__detect_missing_features(struct evsel *evsel) perf_missing_features.mmap2 = true; pr_debug2_peo("switching off mmap2\n"); return true; - } else if ((evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) && - (evsel->pmu == NULL || evsel->pmu->missing_features.exclude_guest)) { - if (evsel->pmu == NULL) { + } else if (evsel->core.attr.exclude_guest || evsel->core.attr.exclude_host) { + if (evsel->pmu == NULL) evsel->pmu = evsel__find_pmu(evsel); - if (evsel->pmu) - evsel->pmu->missing_features.exclude_guest = true; - else { - /* we cannot find PMU, disable attrs now */ - evsel->core.attr.exclude_host = false; - evsel->core.attr.exclude_guest = false; - } + + if (evsel->pmu) + evsel->pmu->missing_features.exclude_guest = true; + else { + /* we cannot find PMU, disable attrs now */ + evsel->core.attr.exclude_host = false; + evsel->core.attr.exclude_guest = false; } if (evsel->exclude_GH) { @@ -3129,7 +3121,7 @@ void evsel__zero_per_pkg(struct evsel *evsel) } } -bool evsel__is_hybrid(struct evsel *evsel) +bool evsel__is_hybrid(const struct evsel *evsel) { return evsel->pmu_name && perf_pmu__is_hybrid(evsel->pmu_name); } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 989865e16aad..f3485799ddf9 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -10,8 +10,6 @@ #include <internal/evsel.h> #include <perf/evsel.h> #include "symbol_conf.h" -#include <internal/cpumap.h> -#include <perf/cpumap.h> struct bpf_object; struct cgroup; @@ -498,7 +496,7 @@ struct perf_env *evsel__env(struct evsel *evsel); int evsel__store_ids(struct evsel *evsel, struct evlist *evlist); void evsel__zero_per_pkg(struct evsel *evsel); -bool evsel__is_hybrid(struct evsel *evsel); +bool evsel__is_hybrid(const struct evsel *evsel); struct evsel *evsel__leader(struct evsel *evsel); bool evsel__has_leader(struct evsel *evsel, struct evsel *leader); bool evsel__is_leader(struct evsel *evsel); diff --git a/tools/perf/util/expr.c b/tools/perf/util/expr.c index aaacf514dc09..140f2acdb325 100644 --- a/tools/perf/util/expr.c +++ b/tools/perf/util/expr.c @@ -11,6 +11,7 @@ #include "expr.h" #include "expr-bison.h" #include "expr-flex.h" +#include "util/hashmap.h" #include "smt.h" #include "tsc.h" #include <linux/err.h> diff --git a/tools/perf/util/expr.h b/tools/perf/util/expr.h index d6c1668dc1a0..029271540fb0 100644 --- a/tools/perf/util/expr.h +++ b/tools/perf/util/expr.h @@ -2,12 +2,7 @@ #ifndef PARSE_CTX_H #define PARSE_CTX_H 1 -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/hashmap.h> -#else -#include "util/hashmap.h" -#endif - +struct hashmap; struct metric_ref; struct expr_scanner_ctx { diff --git a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c index 1376077183f7..22308dd93010 100644 --- a/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c +++ b/tools/perf/util/intel-pt-decoder/intel-pt-insn-decoder.c @@ -18,6 +18,7 @@ #include "intel-pt-insn-decoder.h" #include "dump-insn.h" +#include "util/sample.h" #if INTEL_PT_INSN_BUF_SZ < MAX_INSN_SIZE || INTEL_PT_INSN_BUF_SZ > MAX_INSN #error Instruction buffer size too small diff --git a/tools/perf/util/kwork.h b/tools/perf/util/kwork.h index 320c0a6d2e08..53b7327550b8 100644 --- a/tools/perf/util/kwork.h +++ b/tools/perf/util/kwork.h @@ -1,16 +1,16 @@ #ifndef PERF_UTIL_KWORK_H #define PERF_UTIL_KWORK_H -#include "perf.h" - #include "util/tool.h" -#include "util/event.h" -#include "util/evlist.h" -#include "util/session.h" #include "util/time-utils.h" -#include <linux/list.h> #include <linux/bitmap.h> +#include <linux/list.h> +#include <linux/rbtree.h> +#include <linux/types.h> + +struct perf_sample; +struct perf_session; enum kwork_class_type { KWORK_CLASS_IRQ, diff --git a/tools/perf/util/llvm-utils.c b/tools/perf/util/llvm-utils.c index 2dc797007419..650ffe336f3a 100644 --- a/tools/perf/util/llvm-utils.c +++ b/tools/perf/util/llvm-utils.c @@ -463,7 +463,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, char *pipe_template = NULL; const char *opts = llvm_param.opts; char *command_echo = NULL, *command_out; - char *perf_include_dir = system_path(PERF_INCLUDE_DIR); + char *libbpf_include_dir = system_path(LIBBPF_INCLUDE_DIR); if (path[0] != '-' && realpath(path, abspath) == NULL) { err = errno; @@ -495,7 +495,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, snprintf(linux_version_code_str, sizeof(linux_version_code_str), "0x%x", kernel_version); - if (asprintf(&perf_bpf_include_opts, "-I%s/bpf", perf_include_dir) < 0) + if (asprintf(&perf_bpf_include_opts, "-I%s/", libbpf_include_dir) < 0) goto errout; force_set_env("NR_CPUS", nr_cpus_avail_str); force_set_env("LINUX_VERSION_CODE", linux_version_code_str); @@ -556,7 +556,7 @@ int llvm__compile_bpf(const char *path, void **p_obj_buf, free(kbuild_dir); free(kbuild_include_opts); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); if (!p_obj_buf) free(obj_buf); @@ -572,7 +572,7 @@ errout: free(kbuild_include_opts); free(obj_buf); free(perf_bpf_include_opts); - free(perf_include_dir); + free(libbpf_include_dir); free(pipe_template); if (p_obj_buf) *p_obj_buf = NULL; diff --git a/tools/perf/util/lock-contention.h b/tools/perf/util/lock-contention.h index b8cb8830b7bc..e3c061b1795b 100644 --- a/tools/perf/util/lock-contention.h +++ b/tools/perf/util/lock-contention.h @@ -91,7 +91,7 @@ struct thread_stat { * Number of stack trace entries to skip when finding callers. * The first few entries belong to the locking implementation itself. */ -#define CONTENTION_STACK_SKIP 3 +#define CONTENTION_STACK_SKIP 4 /* * flags for lock:contention_begin diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index 74935dfaa937..6267c1d6f232 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -305,4 +305,7 @@ int machine__create_extra_kernel_map(struct machine *machine, int machine__map_x86_64_entry_trampolines(struct machine *machine, struct dso *kernel); +int machine__resolve(struct machine *machine, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_MACHINE_H */ diff --git a/tools/perf/util/metricgroup.c b/tools/perf/util/metricgroup.c index 4c98ac29ee13..cf9e2452d322 100644 --- a/tools/perf/util/metricgroup.c +++ b/tools/perf/util/metricgroup.c @@ -28,6 +28,7 @@ #include "util.h" #include <asm/bug.h> #include "cgroup.h" +#include "util/hashmap.h" struct metric_event *metricgroup__lookup(struct rblist *metric_events, struct evsel *evsel, @@ -556,11 +557,12 @@ static int metricgroup__print_callback(const struct pmu_event *pe, void *vdata) { struct metricgroup_print_data *data = vdata; + const char *pmu = pe->pmu ?: "cpu"; if (!pe->metric_expr) return 0; - if (data->pmu_name && perf_pmu__is_hybrid(pe->pmu) && strcmp(data->pmu_name, pe->pmu)) + if (data->pmu_name && strcmp(data->pmu_name, pmu)) return 0; return metricgroup__print_pmu_event(pe, data->metricgroups, data->filter, diff --git a/tools/perf/util/mmap.h b/tools/perf/util/mmap.h index cd4ccec7f361..f944c3cd5efa 100644 --- a/tools/perf/util/mmap.h +++ b/tools/perf/util/mmap.h @@ -2,18 +2,13 @@ #define __PERF_MMAP_H 1 #include <internal/mmap.h> -#include <linux/compiler.h> -#include <linux/refcount.h> #include <linux/types.h> -#include <linux/ring_buffer.h> #include <linux/bitops.h> #include <perf/cpumap.h> -#include <stdbool.h> #ifdef HAVE_AIO_SUPPORT #include <aio.h> #endif #include "auxtrace.h" -#include "event.h" #include "util/compress.h" struct aiocb; diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 5973f46c2375..6502cd679f57 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -266,6 +266,7 @@ __add_event(struct list_head *list, int *idx, evsel->core.own_cpus = perf_cpu_map__get(cpus); evsel->core.requires_cpu = pmu ? pmu->is_uncore : false; evsel->auto_merge_stats = auto_merge_stats; + evsel->pmu = pmu; if (name) evsel->name = strdup(name); diff --git a/tools/perf/util/perf_regs.c b/tools/perf/util/perf_regs.c index 872dd3d38782..57a567ee2cea 100644 --- a/tools/perf/util/perf_regs.c +++ b/tools/perf/util/perf_regs.c @@ -2,7 +2,7 @@ #include <errno.h> #include <string.h> #include "perf_regs.h" -#include "event.h" +#include "util/sample.h" int __weak arch_sdt_arg_parse_op(char *old_op __maybe_unused, char **new_op __maybe_unused) diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 03284059175f..075c82dd1347 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -31,10 +31,26 @@ struct perf_pmu perf_pmu__fake; +/** + * struct perf_pmu_format - Values from a format file read from + * <sysfs>/devices/cpu/format/ held in struct perf_pmu. + * + * For example, the contents of <sysfs>/devices/cpu/format/event may be + * "config:0-7" and will be represented here as name="event", + * value=PERF_PMU_FORMAT_VALUE_CONFIG and bits 0 to 7 will be set. + */ struct perf_pmu_format { + /** @name: The modifier/file name. */ char *name; + /** + * @value : Which config value the format relates to. Supported values + * are from PERF_PMU_FORMAT_VALUE_CONFIG to + * PERF_PMU_FORMAT_VALUE_CONFIG_END. + */ int value; + /** @bits: Which config bits are set by this format value. */ DECLARE_BITMAP(bits, PERF_PMU_FORMAT_BITS); + /** @list: Element on list within struct perf_pmu. */ struct list_head list; }; @@ -980,7 +996,6 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) pmu->is_uncore = pmu_is_uncore(name); if (pmu->is_uncore) pmu->id = pmu_id(name); - pmu->is_hybrid = is_hybrid; pmu->max_precise = pmu_max_precise(name); pmu_add_cpu_aliases(&aliases, pmu); pmu_add_sys_aliases(&aliases, pmu); @@ -992,7 +1007,7 @@ static struct perf_pmu *pmu_lookup(const char *lookup_name) list_splice(&aliases, &pmu->aliases); list_add_tail(&pmu->list, &pmus); - if (pmu->is_hybrid) + if (is_hybrid) list_add_tail(&pmu->hybrid_list, &perf_pmu__hybrid_pmus); pmu->default_config = perf_pmu__get_default_config(pmu); @@ -1065,11 +1080,15 @@ struct perf_pmu *evsel__find_pmu(struct evsel *evsel) { struct perf_pmu *pmu = NULL; + if (evsel->pmu) + return evsel->pmu; + while ((pmu = perf_pmu__scan(pmu)) != NULL) { if (pmu->type == evsel->core.attr.type) break; } + evsel->pmu = pmu; return pmu; } @@ -1534,8 +1553,8 @@ static int sub_non_neg(int a, int b) return a - b; } -static char *format_alias(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { struct parse_events_term *term; int used = snprintf(buf, len, "%s/%s", pmu->name, alias->name); @@ -1560,51 +1579,67 @@ static char *format_alias(char *buf, int len, struct perf_pmu *pmu, return buf; } -static char *format_alias_or(char *buf, int len, struct perf_pmu *pmu, - struct perf_pmu_alias *alias) +static char *format_alias_or(char *buf, int len, const struct perf_pmu *pmu, + const struct perf_pmu_alias *alias) { snprintf(buf, len, "%s OR %s/%s/", alias->name, pmu->name, alias->name); return buf; } +/** Struct for ordering events as output in perf list. */ struct sevent { - char *name; - char *desc; - char *topic; - char *str; - char *pmu; - char *metric_expr; - char *metric_name; - int is_cpu; + /** PMU for event. */ + const struct perf_pmu *pmu; + /** + * Optional event for name, desc, etc. If not present then this is a + * selectable PMU and the event name is shown as "//". + */ + const struct perf_pmu_alias *event; + /** Is the PMU for the CPU? */ + bool is_cpu; }; static int cmp_sevent(const void *a, const void *b) { const struct sevent *as = a; const struct sevent *bs = b; + const char *a_pmu_name, *b_pmu_name; + const char *a_name = "//", *a_desc = NULL, *a_topic = ""; + const char *b_name = "//", *b_desc = NULL, *b_topic = ""; int ret; - /* Put extra events last */ - if (!!as->desc != !!bs->desc) - return !!as->desc - !!bs->desc; - if (as->topic && bs->topic) { - int n = strcmp(as->topic, bs->topic); - - if (n) - return n; + if (as->event) { + a_name = as->event->name; + a_desc = as->event->desc; + a_topic = as->event->topic ?: ""; + } + if (bs->event) { + b_name = bs->event->name; + b_desc = bs->event->desc; + b_topic = bs->event->topic ?: ""; } + /* Put extra events last. */ + if (!!a_desc != !!b_desc) + return !!a_desc - !!b_desc; + + /* Order by topics. */ + ret = strcmp(a_topic, b_topic); + if (ret) + return ret; /* Order CPU core events to be first */ if (as->is_cpu != bs->is_cpu) return bs->is_cpu - as->is_cpu; - ret = strcmp(as->name, bs->name); - if (!ret) { - if (as->pmu && bs->pmu) - return strcmp(as->pmu, bs->pmu); - } + /* Order by PMU name. */ + a_pmu_name = as->pmu->name ?: ""; + b_pmu_name = bs->pmu->name ?: ""; + ret = strcmp(a_pmu_name, b_pmu_name); + if (ret) + return ret; - return ret; + /* Order by event name. */ + return strcmp(a_name, b_name); } static void wordwrap(char *s, int start, int max, int corr) @@ -1636,16 +1671,18 @@ bool is_pmu_core(const char *name) static bool pmu_alias_is_duplicate(struct sevent *alias_a, struct sevent *alias_b) { - /* Different names -> never duplicates */ - if (strcmp(alias_a->name, alias_b->name)) - return false; + const char *a_pmu_name, *b_pmu_name; + const char *a_name = alias_a->event ? alias_a->event->name : "//"; + const char *b_name = alias_b->event ? alias_b->event->name : "//"; - /* Don't remove duplicates for hybrid PMUs */ - if (perf_pmu__is_hybrid(alias_a->pmu) && - perf_pmu__is_hybrid(alias_b->pmu)) + /* Different names -> never duplicates */ + if (strcmp(a_name, b_name)) return false; - return true; + /* Don't remove duplicates for different PMUs */ + a_pmu_name = alias_a->pmu->name ?: ""; + b_pmu_name = alias_b->pmu->name ?: ""; + return strcmp(a_pmu_name, b_pmu_name) == 0; } void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, @@ -1671,112 +1708,104 @@ void print_pmu_events(const char *event_glob, bool name_only, bool quiet_flag, len++; } aliases = zalloc(sizeof(struct sevent) * len); - if (!aliases) - goto out_enomem; + if (!aliases) { + pr_err("FATAL: not enough memory to print PMU events\n"); + return; + } pmu = NULL; j = 0; while ((pmu = perf_pmu__scan(pmu)) != NULL) { - if (pmu_name && perf_pmu__is_hybrid(pmu->name) && - strcmp(pmu_name, pmu->name)) { + bool is_cpu; + + if (pmu_name && pmu->name && strcmp(pmu_name, pmu->name)) continue; - } - list_for_each_entry(alias, &pmu->aliases, list) { - char *name = alias->desc ? alias->name : - format_alias(buf, sizeof(buf), pmu, alias); - bool is_cpu = is_pmu_core(pmu->name) || - perf_pmu__is_hybrid(pmu->name); + is_cpu = is_pmu_core(pmu->name) || perf_pmu__is_hybrid(pmu->name); + list_for_each_entry(alias, &pmu->aliases, list) { if (alias->deprecated && !deprecated) continue; if (event_glob != NULL && - !(strglobmatch_nocase(name, event_glob) || - (!is_cpu && strglobmatch_nocase(alias->name, - event_glob)) || + !(strglobmatch_nocase(alias->name, event_glob) || + (!is_cpu && + strglobmatch_nocase(alias->name, event_glob)) || (alias->topic && strglobmatch_nocase(alias->topic, event_glob)))) continue; - if (is_cpu && !name_only && !alias->desc) - name = format_alias_or(buf, sizeof(buf), pmu, alias); - - aliases[j].name = name; - if (is_cpu && !name_only && !alias->desc) - aliases[j].name = format_alias_or(buf, - sizeof(buf), - pmu, alias); - aliases[j].name = strdup(aliases[j].name); - if (!aliases[j].name) - goto out_enomem; - - aliases[j].desc = long_desc ? alias->long_desc : - alias->desc; - aliases[j].topic = alias->topic; - aliases[j].str = alias->str; - aliases[j].pmu = pmu->name; - aliases[j].metric_expr = alias->metric_expr; - aliases[j].metric_name = alias->metric_name; + aliases[j].event = alias; + aliases[j].pmu = pmu; aliases[j].is_cpu = is_cpu; j++; } if (pmu->selectable && (event_glob == NULL || strglobmatch(pmu->name, event_glob))) { - char *s; - if (asprintf(&s, "%s//", pmu->name) < 0) - goto out_enomem; - aliases[j].name = s; + aliases[j].event = NULL; + aliases[j].pmu = pmu; + aliases[j].is_cpu = is_cpu; j++; } } len = j; qsort(aliases, len, sizeof(struct sevent), cmp_sevent); for (j = 0; j < len; j++) { + char *name, *desc; + /* Skip duplicates */ if (j > 0 && pmu_alias_is_duplicate(&aliases[j], &aliases[j - 1])) continue; + if (!aliases[j].event) { + /* A selectable event. */ + snprintf(buf, sizeof(buf), "%s//", aliases[j].pmu->name); + name = buf; + } else if (aliases[j].event->desc) { + name = aliases[j].event->name; + } else { + if (!name_only && aliases[j].is_cpu) { + name = format_alias_or(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } else { + name = format_alias(buf, sizeof(buf), aliases[j].pmu, + aliases[j].event); + } + } if (name_only) { - printf("%s ", aliases[j].name); + printf("%s ", name); continue; } - if (aliases[j].desc && !quiet_flag) { - if (numdesc++ == 0) - printf("\n"); - if (aliases[j].topic && (!topic || - strcmp(topic, aliases[j].topic))) { - printf("%s%s:\n", topic ? "\n" : "", - aliases[j].topic); - topic = aliases[j].topic; - } - printf(" %-50s\n", aliases[j].name); - printf("%*s", 8, "["); - wordwrap(aliases[j].desc, 8, columns, 0); - printf("]\n"); - if (details_flag) { - printf("%*s%s/%s/ ", 8, "", aliases[j].pmu, aliases[j].str); - if (aliases[j].metric_name) - printf(" MetricName: %s", aliases[j].metric_name); - if (aliases[j].metric_expr) - printf(" MetricExpr: %s", aliases[j].metric_expr); - putchar('\n'); - } - } else - printf(" %-50s [Kernel PMU event]\n", aliases[j].name); printed++; + if (!aliases[j].event || !aliases[j].event->desc || quiet_flag) { + printf(" %-50s [Kernel PMU event]\n", name); + continue; + } + if (numdesc++ == 0) + printf("\n"); + if (aliases[j].event->topic && (!topic || + strcmp(topic, aliases[j].event->topic))) { + printf("%s%s:\n", topic ? "\n" : "", aliases[j].event->topic); + topic = aliases[j].event->topic; + } + printf(" %-50s\n", name); + printf("%*s", 8, "["); + desc = long_desc ? aliases[j].event->long_desc : aliases[j].event->desc; + wordwrap(desc, 8, columns, 0); + printf("]\n"); + if (details_flag) { + printf("%*s%s/%s/ ", 8, "", aliases[j].pmu->name, aliases[j].event->str); + if (aliases[j].event->metric_name) + printf(" MetricName: %s", aliases[j].event->metric_name); + if (aliases[j].event->metric_expr) + printf(" MetricExpr: %s", aliases[j].event->metric_expr); + putchar('\n'); + } } if (printed && pager_in_use()) printf("\n"); -out_free: - for (j = 0; j < len; j++) - zfree(&aliases[j].name); + zfree(&aliases); return; - -out_enomem: - printf("FATAL: not enough memory to print PMU events\n"); - if (aliases) - goto out_free; } bool pmu_have_event(const char *pname, const char *name) diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 68e15c38ae71..ee02e1ef9187 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -33,31 +33,101 @@ struct perf_pmu_caps { struct list_head list; }; +/** + * struct perf_pmu - hi + */ struct perf_pmu { + /** @name: The name of the PMU such as "cpu". */ char *name; + /** + * @alias_name: Optional alternate name for the PMU determined in + * architecture specific code. + */ char *alias_name; + /** + * @id: Optional PMU identifier read from + * <sysfs>/bus/event_source/devices/<name>/identifier. + */ char *id; + /** + * @type: Perf event attributed type value, read from + * <sysfs>/bus/event_source/devices/<name>/type. + */ __u32 type; + /** + * @selectable: Can the PMU name be selected as if it were an event? + */ bool selectable; + /** + * @is_uncore: Is the PMU not within the CPU core? Determined by the + * presence of <sysfs>/bus/event_source/devices/<name>/cpumask. + */ bool is_uncore; - bool is_hybrid; + /** + * @auxtrace: Are events auxiliary events? Determined in architecture + * specific code. + */ bool auxtrace; + /** + * @max_precise: Number of levels of :ppp precision supported by the + * PMU, read from + * <sysfs>/bus/event_source/devices/<name>/caps/max_precise. + */ int max_precise; + /** + * @default_config: Optional default perf_event_attr determined in + * architecture specific code. + */ struct perf_event_attr *default_config; + /** + * @cpus: Empty or the contents of either of: + * <sysfs>/bus/event_source/devices/<name>/cpumask. + * <sysfs>/bus/event_source/devices/<cpu>/cpus. + */ struct perf_cpu_map *cpus; - struct list_head format; /* HEAD struct perf_pmu_format -> list */ - struct list_head aliases; /* HEAD struct perf_pmu_alias -> list */ + /** + * @format: Holds the contents of files read from + * <sysfs>/bus/event_source/devices/<name>/format/. The contents specify + * which event parameter changes what config, config1 or config2 bits. + */ + struct list_head format; + /** + * @aliases: List of struct perf_pmu_alias. Each alias corresponds to an + * event read from <sysfs>/bus/event_source/devices/<name>/events/ or + * from json events in pmu-events.c. + */ + struct list_head aliases; + /** @caps_initialized: Has the list caps been initialized? */ bool caps_initialized; + /** @nr_caps: The length of the list caps. */ u32 nr_caps; - struct list_head caps; /* HEAD struct perf_pmu_caps -> list */ - struct list_head list; /* ELEM */ + /** + * @caps: Holds the contents of files read from + * <sysfs>/bus/event_source/devices/<name>/caps/. + * + * The contents are pairs of the filename with the value of its + * contents, for example, max_precise (see above) may have a value of 3. + */ + struct list_head caps; + /** @list: Element on pmus list in pmu.c. */ + struct list_head list; + /** @hybrid_list: Element on perf_pmu__hybrid_pmus. */ struct list_head hybrid_list; + /** + * @missing_features: Features to inhibit when events on this PMU are + * opened. + */ struct { + /** + * @exclude_guest: Disables perf_event_attr exclude_guest and + * exclude_host. + */ bool exclude_guest; } missing_features; }; +/** @perf_pmu__fake: A special global PMU used for testing. */ extern struct perf_pmu perf_pmu__fake; struct perf_pmu_info { @@ -71,21 +141,60 @@ struct perf_pmu_info { #define UNIT_MAX_LEN 31 /* max length for event unit name */ +/** + * struct perf_pmu_alias - An event either read from sysfs or builtin in + * pmu-events.c, created by parsing the pmu-events json files. + */ struct perf_pmu_alias { + /** @name: Name of the event like "mem-loads". */ char *name; + /** @desc: Optional short description of the event. */ char *desc; + /** @long_desc: Optional long description. */ char *long_desc; + /** + * @topic: Optional topic such as cache or pipeline, particularly for + * json events. + */ char *topic; + /** + * @str: Comma separated parameter list like + * "event=0xcd,umask=0x1,ldlat=0x3". + */ char *str; - struct list_head terms; /* HEAD struct parse_events_term -> list */ - struct list_head list; /* ELEM */ + /** @terms: Owned list of the original parsed parameters. */ + struct list_head terms; + /** @list: List element of struct perf_pmu aliases. */ + struct list_head list; + /** @unit: Units for the event, such as bytes or cache lines. */ char unit[UNIT_MAX_LEN+1]; + /** @scale: Value to scale read counter values by. */ double scale; + /** + * @per_pkg: Does the file + * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.per-pkg or + * equivalent json value exist and have the value 1. + */ bool per_pkg; + /** + * @snapshot: Does the file + * <sysfs>/bus/event_source/devices/<pmu_name>/events/<name>.snapshot + * exist and have the value 1. + */ bool snapshot; + /** + * @deprecated: Is the event hidden and so not shown in perf list by + * default. + */ bool deprecated; + /** + * @metric_expr: A metric expression associated with an event. Doing + * this makes little sense due to scale and unit applying to both. + */ char *metric_expr; + /** @metric_name: A name for the metric. unit applying to both. */ char *metric_name; + /** @pmu_name: The name copied from struct perf_pmu. */ char *pmu_name; }; diff --git a/tools/perf/util/print-events.c b/tools/perf/util/print-events.c index c4d5d87fae2f..d53dba033597 100644 --- a/tools/perf/util/print-events.c +++ b/tools/perf/util/print-events.c @@ -52,40 +52,27 @@ static const struct event_symbol event_symbols_tool[PERF_TOOL_MAX] = { }, }; -static int cmp_string(const void *a, const void *b) -{ - const char * const *as = a; - const char * const *bs = b; - - return strcmp(*as, *bs); -} - /* * Print the events from <debugfs_mount_point>/tracing/events */ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only) { - DIR *sys_dir, *evt_dir; - struct dirent *sys_dirent, *evt_dirent; - char evt_path[MAXPATHLEN]; - char *dir_path; - char **evt_list = NULL; - unsigned int evt_i = 0, evt_num = 0; - bool evt_num_known = false; - -restart: - sys_dir = tracing_events__opendir(); - if (!sys_dir) - return; - - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_close_sys_dir; - } + struct dirent **sys_namelist = NULL; + bool printed = false; + int sys_items = tracing_events__scandir_alphasort(&sys_namelist); + + for (int i = 0; i < sys_items; i++) { + struct dirent *sys_dirent = sys_namelist[i]; + struct dirent **evt_namelist = NULL; + char *dir_path; + int evt_items; + + if (sys_dirent->d_type != DT_DIR || + !strcmp(sys_dirent->d_name, ".") || + !strcmp(sys_dirent->d_name, "..")) + continue; - for_each_subsystem(sys_dir, sys_dirent) { if (subsys_glob != NULL && !strglobmatch(sys_dirent->d_name, subsys_glob)) continue; @@ -93,69 +80,40 @@ restart: dir_path = get_events_file(sys_dirent->d_name); if (!dir_path) continue; - evt_dir = opendir(dir_path); - if (!evt_dir) - goto next; - for_each_event(dir_path, evt_dir, evt_dirent) { - if (event_glob != NULL && - !strglobmatch(evt_dirent->d_name, event_glob)) + evt_items = scandir(dir_path, &evt_namelist, NULL, alphasort); + for (int j = 0; j < evt_items; j++) { + struct dirent *evt_dirent = evt_namelist[j]; + char evt_path[MAXPATHLEN]; + + if (evt_dirent->d_type != DT_DIR || + !strcmp(evt_dirent->d_name, ".") || + !strcmp(evt_dirent->d_name, "..")) continue; - if (!evt_num_known) { - evt_num++; + if (tp_event_has_id(dir_path, evt_dirent) != 0) + continue; + + if (event_glob != NULL && + !strglobmatch(evt_dirent->d_name, event_glob)) continue; - } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent->d_name, evt_dirent->d_name); - - evt_list[evt_i] = strdup(evt_path); - if (evt_list[evt_i] == NULL) { - put_events_file(dir_path); - goto out_close_evt_dir; + if (name_only) + printf("%s ", evt_path); + else { + printf(" %-50s [%s]\n", evt_path, + event_type_descriptors[PERF_TYPE_TRACEPOINT]); } - evt_i++; - } - closedir(evt_dir); -next: - put_events_file(dir_path); - } - closedir(sys_dir); - - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { - if (name_only) { - printf("%s ", evt_list[evt_i++]); - continue; + printed = true; } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + free(dir_path); + free(evt_namelist); } - if (evt_num && pager_in_use()) + free(sys_namelist); + if (printed && pager_in_use()) printf("\n"); - -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_close_evt_dir: - closedir(evt_dir); -out_close_sys_dir: - closedir(sys_dir); - - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_TRACEPOINT]); - if (evt_list) - goto out_free; } void print_sdt_events(const char *subsys_glob, const char *event_glob, @@ -240,137 +198,59 @@ void print_sdt_events(const char *subsys_glob, const char *event_glob, int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, evt_i = 0, evt_num = 0, npmus = 0; - char name[64], new_name[128]; - char **evt_list = NULL, **evt_pmus = NULL; - bool evt_num_known = false; - struct perf_pmu *pmu = NULL; - - if (perf_pmu__has_hybrid()) { - npmus = perf_pmu__hybrid_pmu_num(); - evt_pmus = zalloc(sizeof(char *) * npmus); - if (!evt_pmus) - goto out_enomem; - } + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for hwcache events\n"); + return -ENOMEM; } - - for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { - for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { + for (int type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { + for (int op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { /* skip invalid cache type */ if (!evsel__is_cache_op_valid(type, op)) continue; - for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { - unsigned int hybrid_supported = 0, j; - bool supported; + for (int i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { + struct perf_pmu *pmu = NULL; + char name[64]; __evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); if (event_glob != NULL && !strglobmatch(name, event_glob)) continue; if (!perf_pmu__has_hybrid()) { - if (!is_event_supported(PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16))) { - continue; - } - } else { - perf_pmu__for_each_hybrid_pmu(pmu) { - if (!evt_num_known) { - evt_num++; - continue; - } - - supported = is_event_supported( - PERF_TYPE_HW_CACHE, - type | (op << 8) | (i << 16) | - ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT)); - if (supported) { - snprintf(new_name, sizeof(new_name), - "%s/%s/", pmu->name, name); - evt_pmus[hybrid_supported] = - strdup(new_name); - hybrid_supported++; - } - } - - if (hybrid_supported == 0) - continue; - } - - if (!evt_num_known) { - evt_num++; + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16))) + strlist__add(evt_name_list, name); continue; } - - if ((hybrid_supported == 0) || - (hybrid_supported == npmus)) { - evt_list[evt_i] = strdup(name); - if (npmus > 0) { - for (j = 0; j < npmus; j++) - zfree(&evt_pmus[j]); - } - } else { - for (j = 0; j < hybrid_supported; j++) { - evt_list[evt_i++] = evt_pmus[j]; - evt_pmus[j] = NULL; + perf_pmu__for_each_hybrid_pmu(pmu) { + if (is_event_supported(PERF_TYPE_HW_CACHE, + type | (op << 8) | (i << 16) | + ((__u64)pmu->type << PERF_PMU_TYPE_SHIFT))) { + char new_name[128]; + snprintf(new_name, sizeof(new_name), + "%s/%s/", pmu->name, name); + strlist__add(evt_name_list, new_name); } - continue; } - - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; } } } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - - for (evt_i = 0; evt_i < evt_num; evt_i++) { - if (!evt_list[evt_i]) - break; - } - - evt_num = evt_i; - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], - event_type_descriptors[PERF_TYPE_HW_CACHE]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[PERF_TYPE_HW_CACHE]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - - for (evt_i = 0; evt_i < npmus; evt_i++) - zfree(&evt_pmus[evt_i]); - zfree(&evt_pmus); - return evt_num; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", - event_type_descriptors[PERF_TYPE_HW_CACHE]); - if (evt_list) - goto out_free; - return evt_num; + strlist__delete(evt_name_list); + return 0; } static void print_tool_event(const struct event_symbol *syms, const char *event_glob, @@ -410,77 +290,48 @@ void print_symbol_events(const char *event_glob, unsigned int type, struct event_symbol *syms, unsigned int max, bool name_only) { - unsigned int i, evt_i = 0, evt_num = 0; - char name[MAX_NAME_LEN]; - char **evt_list = NULL; - bool evt_num_known = false; - -restart: - if (evt_num_known) { - evt_list = zalloc(sizeof(char *) * evt_num); - if (!evt_list) - goto out_enomem; - syms -= max; - } + struct strlist *evt_name_list = strlist__new(NULL, NULL); + struct str_node *nd; - for (i = 0; i < max; i++, syms++) { + if (!evt_name_list) { + pr_debug("Failed to allocate new strlist for symbol events\n"); + return; + } + for (unsigned int i = 0; i < max; i++) { /* * New attr.config still not supported here, the latest * example was PERF_COUNT_SW_CGROUP_SWITCHES */ - if (syms->symbol == NULL) + if (syms[i].symbol == NULL) continue; - if (event_glob != NULL && !(strglobmatch(syms->symbol, event_glob) || - (syms->alias && strglobmatch(syms->alias, event_glob)))) + if (event_glob != NULL && !(strglobmatch(syms[i].symbol, event_glob) || + (syms[i].alias && strglobmatch(syms[i].alias, event_glob)))) continue; if (!is_event_supported(type, i)) continue; - if (!evt_num_known) { - evt_num++; - continue; - } - - if (!name_only && strlen(syms->alias)) - snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); - else - strlcpy(name, syms->symbol, MAX_NAME_LEN); + if (strlen(syms[i].alias)) { + char name[MAX_NAME_LEN]; - evt_list[evt_i] = strdup(name); - if (evt_list[evt_i] == NULL) - goto out_enomem; - evt_i++; + snprintf(name, MAX_NAME_LEN, "%s OR %s", syms[i].symbol, syms[i].alias); + strlist__add(evt_name_list, name); + } else + strlist__add(evt_name_list, syms[i].symbol); } - if (!evt_num_known) { - evt_num_known = true; - goto restart; - } - qsort(evt_list, evt_num, sizeof(char *), cmp_string); - evt_i = 0; - while (evt_i < evt_num) { + strlist__for_each_entry(nd, evt_name_list) { if (name_only) { - printf("%s ", evt_list[evt_i++]); + printf("%s ", nd->s); continue; } - printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + printf(" %-50s [%s]\n", nd->s, event_type_descriptors[type]); } - if (evt_num && pager_in_use()) + if (!strlist__empty(evt_name_list) && pager_in_use()) printf("\n"); -out_free: - evt_num = evt_i; - for (evt_i = 0; evt_i < evt_num; evt_i++) - zfree(&evt_list[evt_i]); - zfree(&evt_list); - return; - -out_enomem: - printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); - if (evt_list) - goto out_free; + strlist__delete(evt_name_list); } /* diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index 50d861a80f57..54b49ce85c9f 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -763,7 +763,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) /* Skip if declared file name does not match */ if (fsp->file) { - file = dwarf_decl_file(fn_die); + file = die_get_decl_file(fn_die); if (!file || strcmp(fsp->file, file) != 0) return 0; } @@ -1063,6 +1063,7 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct probe_finder *pf = param->data; struct perf_probe_point *pp = &pf->pev->point; + const char *fname; /* Check tag and diename */ if (!die_is_func_def(sp_die) || @@ -1070,12 +1071,17 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; /* Check declared file */ - if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die))) + fname = die_get_decl_file(sp_die); + if (!fname) { + pr_warning("A function DIE doesn't have decl_line. Maybe broken DWARF?\n"); + return DWARF_CB_OK; + } + if (pp->file && fname && strtailcmp(pp->file, fname)) return DWARF_CB_OK; pr_debug("Matched function: %s [%lx]\n", dwarf_diename(sp_die), (unsigned long)dwarf_dieoffset(sp_die)); - pf->fname = dwarf_decl_file(sp_die); + pf->fname = fname; if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; @@ -1134,6 +1140,7 @@ struct pubname_callback_param { static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) { struct pubname_callback_param *param = data; + const char *fname; if (dwarf_offdie(dbg, gl->die_offset, param->sp_die)) { if (dwarf_tag(param->sp_die) != DW_TAG_subprogram) @@ -1143,9 +1150,11 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die)) return DWARF_CB_OK; - if (param->file && - strtailcmp(param->file, dwarf_decl_file(param->sp_die))) - return DWARF_CB_OK; + if (param->file) { + fname = die_get_decl_file(param->sp_die); + if (!fname || strtailcmp(param->file, fname)) + return DWARF_CB_OK; + } param->found = 1; return DWARF_CB_ABORT; @@ -1741,7 +1750,7 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, goto post; } - fname = dwarf_decl_file(&spdie); + fname = die_get_decl_file(&spdie); if (addr == baseaddr) { /* Function entry - Relative line number is 0 */ lineno = baseline; @@ -1778,8 +1787,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, u64 addr, } } /* Verify the lineno and baseline are in a same file */ - tmp = dwarf_decl_file(&spdie); - if (!tmp || strcmp(tmp, fname) != 0) + tmp = die_get_decl_file(&spdie); + if (!tmp || (fname && strcmp(tmp, fname) != 0)) lineno = 0; } @@ -1889,13 +1898,17 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) struct dwarf_callback_param *param = data; struct line_finder *lf = param->data; struct line_range *lr = lf->lr; + const char *fname; /* Check declared file */ - if (lr->file && strtailcmp(lr->file, dwarf_decl_file(sp_die))) - return DWARF_CB_OK; + if (lr->file) { + fname = die_get_decl_file(sp_die); + if (!fname || strtailcmp(lr->file, fname)) + return DWARF_CB_OK; + } if (die_match_name(sp_die, lr->function) && die_is_func_def(sp_die)) { - lf->fname = dwarf_decl_file(sp_die); + lf->fname = die_get_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); lf->lno_s = lr->offset + lr->start; diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index 5be5fa2391de..b5941c74a0d6 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -718,17 +718,17 @@ static Py_ssize_t pyrf_thread_map__length(PyObject *obj) { struct pyrf_thread_map *pthreads = (void *)obj; - return pthreads->threads->nr; + return perf_thread_map__nr(pthreads->threads); } static PyObject *pyrf_thread_map__item(PyObject *obj, Py_ssize_t i) { struct pyrf_thread_map *pthreads = (void *)obj; - if (i >= pthreads->threads->nr) + if (i >= perf_thread_map__nr(pthreads->threads)) return NULL; - return Py_BuildValue("i", pthreads->threads->map[i]); + return Py_BuildValue("i", perf_thread_map__pid(pthreads->threads, i)); } static PySequenceMethods pyrf_thread_map__sequence_methods = { diff --git a/tools/perf/util/s390-cpumsf.c b/tools/perf/util/s390-cpumsf.c index f3fdad28a852..6fe478b0b61b 100644 --- a/tools/perf/util/s390-cpumsf.c +++ b/tools/perf/util/s390-cpumsf.c @@ -163,6 +163,7 @@ #include "s390-cpumsf-kernel.h" #include "s390-cpumcf-kernel.h" #include "config.h" +#include "util/sample.h" struct s390_cpumsf { struct auxtrace auxtrace; diff --git a/tools/perf/util/s390-sample-raw.c b/tools/perf/util/s390-sample-raw.c index 9a631d97471c..c10b891dbad6 100644 --- a/tools/perf/util/s390-sample-raw.c +++ b/tools/perf/util/s390-sample-raw.c @@ -28,6 +28,7 @@ #include "sample-raw.h" #include "s390-cpumcf-kernel.h" #include "pmu-events/pmu-events.h" +#include "util/sample.h" static size_t ctrset_size(struct cf_ctrset_entry *set) { diff --git a/tools/perf/util/sample.h b/tools/perf/util/sample.h new file mode 100644 index 000000000000..60ec79d4eea4 --- /dev/null +++ b/tools/perf/util/sample.h @@ -0,0 +1,117 @@ +/* SPDX-License-Identifier: GPL-2.0 */ +#ifndef __PERF_SAMPLE_H +#define __PERF_SAMPLE_H + +#include <linux/perf_event.h> +#include <linux/types.h> + +/* number of register is bound by the number of bits in regs_dump::mask (64) */ +#define PERF_SAMPLE_REGS_CACHE_SIZE (8 * sizeof(u64)) + +struct regs_dump { + u64 abi; + u64 mask; + u64 *regs; + + /* Cached values/mask filled by first register access. */ + u64 cache_regs[PERF_SAMPLE_REGS_CACHE_SIZE]; + u64 cache_mask; +}; + +struct stack_dump { + u16 offset; + u64 size; + char *data; +}; + +struct sample_read_value { + u64 value; + u64 id; /* only if PERF_FORMAT_ID */ + u64 lost; /* only if PERF_FORMAT_LOST */ +}; + +struct sample_read { + u64 time_enabled; + u64 time_running; + union { + struct { + u64 nr; + struct sample_read_value *values; + } group; + struct sample_read_value one; + }; +}; + +static inline size_t sample_read_value_size(u64 read_format) +{ + /* PERF_FORMAT_ID is forced for PERF_SAMPLE_READ */ + if (read_format & PERF_FORMAT_LOST) + return sizeof(struct sample_read_value); + else + return offsetof(struct sample_read_value, lost); +} + +static inline struct sample_read_value *next_sample_read_value(struct sample_read_value *v, u64 read_format) +{ + return (void *)v + sample_read_value_size(read_format); +} + +#define sample_read_group__for_each(v, nr, rf) \ + for (int __i = 0; __i < (int)nr; v = next_sample_read_value(v, rf), __i++) + +#define MAX_INSN 16 + +struct aux_sample { + u64 size; + void *data; +}; + +struct perf_sample { + u64 ip; + u32 pid, tid; + u64 time; + u64 addr; + u64 id; + u64 stream_id; + u64 period; + u64 weight; + u64 transaction; + u64 insn_cnt; + u64 cyc_cnt; + u32 cpu; + u32 raw_size; + u64 data_src; + u64 phys_addr; + u64 data_page_size; + u64 code_page_size; + u64 cgroup; + u32 flags; + u32 machine_pid; + u32 vcpu; + u16 insn_len; + u8 cpumode; + u16 misc; + u16 ins_lat; + u16 p_stage_cyc; + bool no_hw_idx; /* No hw_idx collected in branch_stack */ + char insn[MAX_INSN]; + void *raw_data; + struct ip_callchain *callchain; + struct branch_stack *branch_stack; + struct regs_dump user_regs; + struct regs_dump intr_regs; + struct stack_dump user_stack; + struct sample_read read; + struct aux_sample aux_sample; +}; + +/* + * raw_data is always 4 bytes from an 8-byte boundary, so subtract 4 to get + * 8-byte alignment. + */ +static inline void *perf_sample__synth_ptr(struct perf_sample *sample) +{ + return sample->raw_data - 4; +} + +#endif /* __PERF_SAMPLE_H */ diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 1f2040f36d4e..1cf65db8f861 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -52,6 +52,7 @@ #include "print_binary.h" #include "stat.h" #include "mem-events.h" +#include "util/perf_regs.h" #if PY_MAJOR_VERSION < 3 #define _PyUnicode_FromString(arg) \ @@ -1653,13 +1654,7 @@ static void python_process_stat(struct perf_stat_config *config, struct perf_cpu_map *cpus = counter->core.cpus; int cpu, thread; - if (config->aggr_mode == AGGR_GLOBAL) { - process_stat(counter, (struct perf_cpu){ .cpu = -1 }, -1, tstamp, - &counter->counts->aggr); - return; - } - - for (thread = 0; thread < threads->nr; thread++) { + for (thread = 0; thread < perf_thread_map__nr(threads); thread++) { for (cpu = 0; cpu < perf_cpu_map__nr(cpus); cpu++) { process_stat(counter, perf_cpu_map__cpu(cpus, cpu), perf_thread_map__pid(threads, thread), tstamp, diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 1a4f10de29ff..0e1a3d6bacb9 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -1,5 +1,6 @@ // SPDX-License-Identifier: GPL-2.0 #include <errno.h> +#include <signal.h> #include <inttypes.h> #include <linux/err.h> #include <linux/kernel.h> @@ -2022,7 +2023,7 @@ static int perf_session__flush_thread_stacks(struct perf_session *session) NULL); } -volatile int session_done; +volatile sig_atomic_t session_done; static int __perf_session__process_decomp_events(struct perf_session *session); diff --git a/tools/perf/util/stat-display.c b/tools/perf/util/stat-display.c index ba66bb7fc1ca..f5501760ff2e 100644 --- a/tools/perf/util/stat-display.c +++ b/tools/perf/util/stat-display.c @@ -25,41 +25,124 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" -static void print_running(struct perf_stat_config *config, - u64 run, u64 ena) +#define METRIC_LEN 38 +#define EVNAME_LEN 32 +#define COUNTS_LEN 18 +#define INTERVAL_LEN 16 +#define CGROUP_LEN 16 +#define COMM_LEN 16 +#define PID_LEN 7 +#define CPUS_LEN 4 + +static int aggr_header_lens[] = { + [AGGR_CORE] = 18, + [AGGR_DIE] = 12, + [AGGR_SOCKET] = 6, + [AGGR_NODE] = 6, + [AGGR_NONE] = 6, + [AGGR_THREAD] = 16, + [AGGR_GLOBAL] = 0, +}; + +static const char *aggr_header_csv[] = { + [AGGR_CORE] = "core,cpus,", + [AGGR_DIE] = "die,cpus,", + [AGGR_SOCKET] = "socket,cpus,", + [AGGR_NONE] = "cpu,", + [AGGR_THREAD] = "comm-pid,", + [AGGR_NODE] = "node,", + [AGGR_GLOBAL] = "" +}; + +static const char *aggr_header_std[] = { + [AGGR_CORE] = "core", + [AGGR_DIE] = "die", + [AGGR_SOCKET] = "socket", + [AGGR_NONE] = "cpu", + [AGGR_THREAD] = "comm-pid", + [AGGR_NODE] = "node", + [AGGR_GLOBAL] = "" +}; + +static void print_running_std(struct perf_stat_config *config, u64 run, u64 ena) { + if (run != ena) + fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); +} +static void print_running_csv(struct perf_stat_config *config, u64 run, u64 ena) +{ double enabled_percent = 100; if (run != ena) enabled_percent = 100 * run / ena; - if (config->json_output) - fprintf(config->output, - "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", - run, enabled_percent); - else if (config->csv_output) - fprintf(config->output, - "%s%" PRIu64 "%s%.2f", config->csv_sep, - run, config->csv_sep, enabled_percent); - else if (run != ena) - fprintf(config->output, " (%.2f%%)", 100.0 * run / ena); + fprintf(config->output, "%s%" PRIu64 "%s%.2f", + config->csv_sep, run, config->csv_sep, enabled_percent); +} + +static void print_running_json(struct perf_stat_config *config, u64 run, u64 ena) +{ + double enabled_percent = 100; + + if (run != ena) + enabled_percent = 100 * run / ena; + fprintf(config->output, "\"event-runtime\" : %" PRIu64 ", \"pcnt-running\" : %.2f, ", + run, enabled_percent); +} + +static void print_running(struct perf_stat_config *config, + u64 run, u64 ena, bool before_metric) +{ + if (config->json_output) { + if (before_metric) + print_running_json(config, run, ena); + } else if (config->csv_output) { + if (before_metric) + print_running_csv(config, run, ena); + } else { + if (!before_metric) + print_running_std(config, run, ena); + } +} + +static void print_noise_pct_std(struct perf_stat_config *config, + double pct) +{ + if (pct) + fprintf(config->output, " ( +-%6.2f%% )", pct); +} + +static void print_noise_pct_csv(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); +} + +static void print_noise_pct_json(struct perf_stat_config *config, + double pct) +{ + fprintf(config->output, "\"variance\" : %.2f, ", pct); } static void print_noise_pct(struct perf_stat_config *config, - double total, double avg) + double total, double avg, bool before_metric) { double pct = rel_stddev_stats(total, avg); - if (config->json_output) - fprintf(config->output, "\"variance\" : %.2f, ", pct); - else if (config->csv_output) - fprintf(config->output, "%s%.2f%%", config->csv_sep, pct); - else if (pct) - fprintf(config->output, " ( +-%6.2f%% )", pct); + if (config->json_output) { + if (before_metric) + print_noise_pct_json(config, pct); + } else if (config->csv_output) { + if (before_metric) + print_noise_pct_csv(config, pct); + } else { + if (!before_metric) + print_noise_pct_std(config, pct); + } } static void print_noise(struct perf_stat_config *config, - struct evsel *evsel, double avg) + struct evsel *evsel, double avg, bool before_metric) { struct perf_stat_evsel *ps; @@ -67,139 +150,169 @@ static void print_noise(struct perf_stat_config *config, return; ps = evsel->stats; - print_noise_pct(config, stddev_stats(&ps->res_stats), avg); + print_noise_pct(config, stddev_stats(&ps->res_stats), avg, before_metric); } -static void print_cgroup(struct perf_stat_config *config, struct evsel *evsel) +static void print_cgroup_std(struct perf_stat_config *config, const char *cgrp_name) { - if (nr_cgroups) { - const char *cgrp_name = evsel->cgrp ? evsel->cgrp->name : ""; + fprintf(config->output, " %-*s", CGROUP_LEN, cgrp_name); +} + +static void print_cgroup_csv(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); +} + +static void print_cgroup_json(struct perf_stat_config *config, const char *cgrp_name) +{ + fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); +} + +static void print_cgroup(struct perf_stat_config *config, struct cgroup *cgrp) +{ + if (nr_cgroups || config->cgroup_list) { + const char *cgrp_name = cgrp ? cgrp->name : ""; if (config->json_output) - fprintf(config->output, "\"cgroup\" : \"%s\", ", cgrp_name); + print_cgroup_json(config, cgrp_name); + if (config->csv_output) + print_cgroup_csv(config, cgrp_name); else - fprintf(config->output, "%s%s", config->csv_sep, cgrp_name); + print_cgroup_std(config, cgrp_name); } } - -static void aggr_printout(struct perf_stat_config *config, - struct evsel *evsel, struct aggr_cpu_id id, int nr) +static void print_aggr_id_std(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) { + FILE *output = config->output; + int idx = config->aggr_mode; + char buf[128]; + switch (config->aggr_mode) { + case AGGR_CORE: + snprintf(buf, sizeof(buf), "S%d-D%d-C%d", id.socket, id.die, id.core); + break; + case AGGR_DIE: + snprintf(buf, sizeof(buf), "S%d-D%d", id.socket, id.die); + break; + case AGGR_SOCKET: + snprintf(buf, sizeof(buf), "S%d", id.socket); + break; + case AGGR_NODE: + snprintf(buf, sizeof(buf), "N%d", id.node); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + snprintf(buf, sizeof(buf), "S%d-D%d-C%d ", + id.socket, id.die, id.core); + fprintf(output, "%-*s ", + aggr_header_lens[AGGR_CORE], buf); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%-*d ", + aggr_header_lens[AGGR_NONE] - 3, id.cpu.cpu); + } + return; + case AGGR_THREAD: + fprintf(output, "%*s-%-*d ", + COMM_LEN, perf_thread_map__comm(evsel->core.threads, id.thread_idx), + PID_LEN, perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + return; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + return; + } - if (config->json_output && !config->interval) - fprintf(config->output, "{"); + fprintf(output, "%-*s %*d ", aggr_header_lens[idx], buf, 4, nr); +} + +static void print_aggr_id_csv(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + const char *sep = config->csv_sep; switch (config->aggr_mode) { case AGGR_CORE: - if (config->json_output) { - fprintf(config->output, - "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - id.core, - nr); - } else { - fprintf(config->output, "S%d-D%d-C%*d%s%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -8, - id.core, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%d-C%d%s%d%s", + id.socket, id.die, id.core, sep, nr, sep); break; case AGGR_DIE: - if (config->json_output) { - fprintf(config->output, - "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", - id.socket, - id.die, - nr); - } else { - fprintf(config->output, "S%d-D%*d%s%*d%s", - id.socket, - config->csv_output ? 0 : -8, - id.die, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d-D%d%s%d%s", + id.socket, id.die, sep, nr, sep); break; case AGGR_SOCKET: - if (config->json_output) { - fprintf(config->output, - "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", - id.socket, - nr); - } else { - fprintf(config->output, "S%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.socket, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "S%d%s%d%s", + id.socket, sep, nr, sep); break; case AGGR_NODE: - if (config->json_output) { - fprintf(config->output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", - id.node, - nr); - } else { - fprintf(config->output, "N%*d%s%*d%s", - config->csv_output ? 0 : -5, - id.node, - config->csv_sep, - config->csv_output ? 0 : 4, - nr, - config->csv_sep); - } + fprintf(output, "N%d%s%d%s", + id.node, sep, nr, sep); break; case AGGR_NONE: - if (config->json_output) { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "\"core\" : \"S%d-D%d-C%d\"", - id.socket, - id.die, - id.core); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "\"cpu\" : \"%d\", ", - id.cpu.cpu); - } - } else { - if (evsel->percore && !config->percore_show_thread) { - fprintf(config->output, "S%d-D%d-C%*d%s", - id.socket, - id.die, - config->csv_output ? 0 : -3, - id.core, config->csv_sep); - } else if (id.cpu.cpu > -1) { - fprintf(config->output, "CPU%*d%s", - config->csv_output ? 0 : -7, - id.cpu.cpu, config->csv_sep); - } + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "S%d-D%d-C%d%s", + id.socket, id.die, id.core, sep); + } else if (id.cpu.cpu > -1) { + fprintf(output, "CPU%d%s", + id.cpu.cpu, sep); } break; case AGGR_THREAD: - if (config->json_output) { - fprintf(config->output, "\"thread\" : \"%s-%d\", ", - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - perf_thread_map__pid(evsel->core.threads, id.thread_idx)); - } else { - fprintf(config->output, "%*s-%*d%s", - config->csv_output ? 0 : 16, - perf_thread_map__comm(evsel->core.threads, id.thread_idx), - config->csv_output ? 0 : -8, - perf_thread_map__pid(evsel->core.threads, id.thread_idx), - config->csv_sep); + fprintf(output, "%s-%d%s", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx), + sep); + break; + case AGGR_GLOBAL: + case AGGR_UNSET: + case AGGR_MAX: + default: + break; + } +} + +static void print_aggr_id_json(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + FILE *output = config->output; + + if (!config->interval) + fputc('{', output); + + switch (config->aggr_mode) { + case AGGR_CORE: + fprintf(output, "\"core\" : \"S%d-D%d-C%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, id.core, nr); + break; + case AGGR_DIE: + fprintf(output, "\"die\" : \"S%d-D%d\", \"aggregate-number\" : %d, ", + id.socket, id.die, nr); + break; + case AGGR_SOCKET: + fprintf(output, "\"socket\" : \"S%d\", \"aggregate-number\" : %d, ", + id.socket, nr); + break; + case AGGR_NODE: + fprintf(output, "\"node\" : \"N%d\", \"aggregate-number\" : %d, ", + id.node, nr); + break; + case AGGR_NONE: + if (evsel->percore && !config->percore_show_thread) { + fprintf(output, "\"core\" : \"S%d-D%d-C%d\"", + id.socket, id.die, id.core); + } else if (id.cpu.cpu > -1) { + fprintf(output, "\"cpu\" : \"%d\", ", + id.cpu.cpu); } break; + case AGGR_THREAD: + fprintf(output, "\"thread\" : \"%s-%d\", ", + perf_thread_map__comm(evsel->core.threads, id.thread_idx), + perf_thread_map__pid(evsel->core.threads, id.thread_idx)); + break; case AGGR_GLOBAL: case AGGR_UNSET: case AGGR_MAX: @@ -208,6 +321,17 @@ static void aggr_printout(struct perf_stat_config *config, } } +static void aggr_printout(struct perf_stat_config *config, + struct evsel *evsel, struct aggr_cpu_id id, int nr) +{ + if (config->json_output) + print_aggr_id_json(config, evsel, id, nr); + else if (config->csv_output) + print_aggr_id_csv(config, evsel, id, nr); + else + print_aggr_id_std(config, evsel, id, nr); +} + struct outstate { FILE *fh; bool newline; @@ -216,10 +340,9 @@ struct outstate { int nr; struct aggr_cpu_id id; struct evsel *evsel; + struct cgroup *cgrp; }; -#define METRIC_LEN 35 - static void new_line_std(struct perf_stat_config *config __maybe_unused, void *ctx) { @@ -430,84 +553,100 @@ static void print_metric_header(struct perf_stat_config *config, os->evsel->priv != os->evsel->evlist->selected->priv) return; - if (!valid_only_metric(unit) && !config->json_output) + if (os->evsel->cgrp != os->cgrp) + return; + + if (!valid_only_metric(unit)) return; unit = fixunit(tbuf, os->evsel, unit); if (config->json_output) - fprintf(os->fh, "\"unit\" : \"%s\"", unit); + fprintf(os->fh, "{\"unit\" : \"%s\"}", unit); else if (config->csv_output) fprintf(os->fh, "%s%s", unit, config->csv_sep); else fprintf(os->fh, "%*s ", config->metric_only_len, unit); } -static int first_shadow_map_idx(struct perf_stat_config *config, - struct evsel *evsel, const struct aggr_cpu_id *id) +static void print_counter_value_std(struct perf_stat_config *config, + struct evsel *evsel, double avg, bool ok) { - struct perf_cpu_map *cpus = evsel__cpus(evsel); - struct perf_cpu cpu; - int idx; - - if (config->aggr_mode == AGGR_NONE) - return perf_cpu_map__idx(cpus, id->cpu); + FILE *output = config->output; + double sc = evsel->scale; + const char *fmt; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - if (config->aggr_mode == AGGR_THREAD) - return id->thread_idx; + if (config->big_num) + fmt = floor(sc) != sc ? "%'*.2f " : "%'*.0f "; + else + fmt = floor(sc) != sc ? "%*.2f " : "%*.0f "; - if (!config->aggr_get_id) - return 0; + if (ok) + fprintf(output, fmt, COUNTS_LEN, avg); + else + fprintf(output, "%*s ", COUNTS_LEN, bad_count); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct aggr_cpu_id cpu_id = config->aggr_get_id(config, cpu); + if (evsel->unit) + fprintf(output, "%-*s ", config->unit_width, evsel->unit); - if (aggr_cpu_id__equal(&cpu_id, id)) - return idx; - } - return 0; + fprintf(output, "%-*s", EVNAME_LEN, evsel__name(evsel)); } -static void abs_printout(struct perf_stat_config *config, - struct aggr_cpu_id id, int nr, struct evsel *evsel, double avg) +static void print_counter_value_csv(struct perf_stat_config *config, + struct evsel *evsel, double avg, bool ok) { FILE *output = config->output; double sc = evsel->scale; - const char *fmt; + const char *sep = config->csv_sep; + const char *fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; - if (config->csv_output) { - fmt = floor(sc) != sc ? "%.2f%s" : "%.0f%s"; - } else { - if (config->big_num) - fmt = floor(sc) != sc ? "%'18.2f%s" : "%'18.0f%s"; - else - fmt = floor(sc) != sc ? "%18.2f%s" : "%18.0f%s"; - } + if (ok) + fprintf(output, fmt, avg, sep); + else + fprintf(output, "%s%s", bad_count, sep); - aggr_printout(config, evsel, id, nr); + if (evsel->unit) + fprintf(output, "%s%s", evsel->unit, sep); - if (config->json_output) + fprintf(output, "%s", evsel__name(evsel)); +} + +static void print_counter_value_json(struct perf_stat_config *config, + struct evsel *evsel, double avg, bool ok) +{ + FILE *output = config->output; + const char *bad_count = evsel->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED; + + if (ok) fprintf(output, "\"counter-value\" : \"%f\", ", avg); else - fprintf(output, fmt, avg, config->csv_sep); + fprintf(output, "\"counter-value\" : \"%s\", ", bad_count); - if (config->json_output) { - if (evsel->unit) { - fprintf(output, "\"unit\" : \"%s\", ", - evsel->unit); - } - } else { - if (evsel->unit) - fprintf(output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - evsel->unit, config->csv_sep); - } + if (evsel->unit) + fprintf(output, "\"unit\" : \"%s\", ", evsel->unit); + fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); +} + +static void print_counter_value(struct perf_stat_config *config, + struct evsel *evsel, double avg, bool ok) +{ if (config->json_output) - fprintf(output, "\"event\" : \"%s\", ", evsel__name(evsel)); + print_counter_value_json(config, evsel, avg, ok); + else if (config->csv_output) + print_counter_value_csv(config, evsel, avg, ok); else - fprintf(output, "%-*s", config->csv_output ? 0 : 32, evsel__name(evsel)); + print_counter_value_std(config, evsel, avg, ok); +} - print_cgroup(config, evsel); +static void abs_printout(struct perf_stat_config *config, + struct aggr_cpu_id id, int nr, + struct evsel *evsel, double avg, bool ok) +{ + aggr_printout(config, evsel, id, nr); + print_counter_value(config, evsel, avg, ok); + print_cgroup(config, evsel->cgrp); } static bool is_mixed_hw_group(struct evsel *counter) @@ -537,7 +676,7 @@ static bool is_mixed_hw_group(struct evsel *counter) static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int nr, struct evsel *counter, double uval, char *prefix, u64 run, u64 ena, double noise, - struct runtime_stat *st) + struct runtime_stat *st, int map_idx) { struct perf_stat_output_ctx out; struct outstate os = { @@ -549,6 +688,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int }; print_metric_t pm; new_line_t nl; + bool ok = true; if (config->csv_output) { static const int aggr_fields[AGGR_MAX] = { @@ -574,7 +714,7 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int } if (!config->no_csv_summary && config->csv_output && - config->summary && !config->interval) { + config->summary && !config->interval && !config->metric_only) { fprintf(config->output, "%16s%s", "summary", config->csv_sep); } @@ -583,17 +723,8 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int pm(config, &os, NULL, "", "", 0); return; } - aggr_printout(config, counter, id, nr); - if (config->json_output) { - fprintf(config->output, "\"counter-value\" : \"%s\", ", - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED); - } else { - fprintf(config->output, "%*s%s", - config->csv_output ? 0 : 18, - counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, - config->csv_sep); - } + ok = false; if (counter->supported) { if (!evlist__has_hybrid(counter->evlist)) { @@ -602,86 +733,30 @@ static void printout(struct perf_stat_config *config, struct aggr_cpu_id id, int config->print_mixed_hw_group_error = 1; } } - - if (config->json_output) { - fprintf(config->output, "\"unit\" : \"%s\", ", counter->unit); - } else { - fprintf(config->output, "%-*s%s", - config->csv_output ? 0 : config->unit_width, - counter->unit, config->csv_sep); - } - - if (config->json_output) { - fprintf(config->output, "\"event\" : \"%s\", ", - evsel__name(counter)); - } else { - fprintf(config->output, "%*s", - config->csv_output ? 0 : -25, evsel__name(counter)); - } - - print_cgroup(config, counter); - - if (!config->csv_output && !config->json_output) - pm(config, &os, NULL, NULL, "", 0); - print_noise(config, counter, noise); - print_running(config, run, ena); - if (config->csv_output) - pm(config, &os, NULL, NULL, "", 0); - else if (config->json_output) - pm(config, &os, NULL, NULL, "", 0); - return; } - if (!config->metric_only) - abs_printout(config, id, nr, counter, uval); - out.print_metric = pm; out.new_line = nl; out.ctx = &os; out.force_header = false; - if (config->csv_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } else if (config->json_output && !config->metric_only) { - print_noise(config, counter, noise); - print_running(config, run, ena); - } + if (!config->metric_only) { + abs_printout(config, id, nr, counter, uval, ok); - perf_stat__print_shadow_stats(config, counter, uval, - first_shadow_map_idx(config, counter, &id), - &out, &config->metric_events, st); - if (!config->csv_output && !config->metric_only && !config->json_output) { - print_noise(config, counter, noise); - print_running(config, run, ena); + print_noise(config, counter, noise, /*before_metric=*/true); + print_running(config, run, ena, /*before_metric=*/true); } -} -static void aggr_update_shadow(struct perf_stat_config *config, - struct evlist *evlist) -{ - int idx, s; - struct perf_cpu cpu; - struct aggr_cpu_id s2, id; - u64 val; - struct evsel *counter; - struct perf_cpu_map *cpus; + if (ok) { + perf_stat__print_shadow_stats(config, counter, uval, map_idx, + &out, &config->metric_events, st); + } else { + pm(config, &os, /*color=*/NULL, /*format=*/NULL, /*unit=*/"", /*val=*/0); + } - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - evlist__for_each_entry(evlist, counter) { - cpus = evsel__cpus(counter); - val = 0; - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &id)) - continue; - val += perf_counts(counter->counts, idx, 0)->val; - } - perf_stat__update_shadow_stats(counter, val, - first_shadow_map_idx(config, counter, &id), - &rt_stat); - } + if (!config->metric_only) { + print_noise(config, counter, noise, /*before_metric=*/false); + print_running(config, run, ena, /*before_metric=*/false); } } @@ -704,7 +779,7 @@ static void uniquify_event_name(struct evsel *counter) counter->name = new_name; } } else { - if (perf_pmu__has_hybrid()) { + if (evsel__is_hybrid(counter)) { ret = asprintf(&new_name, "%s/%s/", counter->pmu_name, counter->name); } else { @@ -721,360 +796,162 @@ static void uniquify_event_name(struct evsel *counter) counter->uniquified_name = true; } -static void collect_all_aliases(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) +static bool hybrid_uniquify(struct evsel *evsel, struct perf_stat_config *config) { - struct evlist *evlist = counter->evlist; - struct evsel *alias; - - alias = list_prepare_entry(counter, &(evlist->core.entries), core.node); - list_for_each_entry_continue (alias, &evlist->core.entries, core.node) { - /* Merge events with the same name, etc. but on different PMUs. */ - if (!strcmp(evsel__name(alias), evsel__name(counter)) && - alias->scale == counter->scale && - alias->cgrp == counter->cgrp && - !strcmp(alias->unit, counter->unit) && - evsel__is_clock(alias) == evsel__is_clock(counter) && - strcmp(alias->pmu_name, counter->pmu_name)) { - alias->merged_stat = true; - cb(config, alias, data, false); - } - } -} - -static bool is_uncore(struct evsel *evsel) -{ - struct perf_pmu *pmu = evsel__find_pmu(evsel); - - return pmu && pmu->is_uncore; + return evsel__is_hybrid(evsel) && !config->hybrid_merge; } -static bool hybrid_uniquify(struct evsel *evsel) +static void uniquify_counter(struct perf_stat_config *config, struct evsel *counter) { - return perf_pmu__has_hybrid() && !is_uncore(evsel); -} - -static bool hybrid_merge(struct evsel *counter, struct perf_stat_config *config, - bool check) -{ - if (hybrid_uniquify(counter)) { - if (check) - return config && config->hybrid_merge; - else - return config && !config->hybrid_merge; - } - - return false; -} - -static bool collect_data(struct perf_stat_config *config, struct evsel *counter, - void (*cb)(struct perf_stat_config *config, struct evsel *counter, void *data, - bool first), - void *data) -{ - if (counter->merged_stat) - return false; - cb(config, counter, data, true); - if (config->no_merge || hybrid_merge(counter, config, false)) + if (config->no_merge || hybrid_uniquify(counter, config)) uniquify_event_name(counter); - else if (counter->auto_merge_stats || hybrid_merge(counter, config, true)) - collect_all_aliases(config, counter, cb, data); - return true; -} - -struct aggr_data { - u64 ena, run, val; - struct aggr_cpu_id id; - int nr; - int cpu_map_idx; -}; - -static void aggr_cb(struct perf_stat_config *config, - struct evsel *counter, void *data, bool first) -{ - struct aggr_data *ad = data; - int idx; - struct perf_cpu cpu; - struct perf_cpu_map *cpus; - struct aggr_cpu_id s2; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - struct perf_counts_values *counts; - - s2 = config->aggr_get_id(config, cpu); - if (!aggr_cpu_id__equal(&s2, &ad->id)) - continue; - if (first) - ad->nr++; - counts = perf_counts(counter->counts, idx, 0); - /* - * When any result is bad, make them all to give - * consistent output in interval mode. - */ - if (counts->ena == 0 || counts->run == 0 || - counter->counts->scaled == -1) { - ad->ena = 0; - ad->run = 0; - break; - } - ad->val += counts->val; - ad->ena += counts->ena; - ad->run += counts->run; - } } static void print_counter_aggrdata(struct perf_stat_config *config, struct evsel *counter, int s, - char *prefix, bool metric_only, - bool *first, struct perf_cpu cpu) + char *prefix, bool metric_only) { - struct aggr_data ad; FILE *output = config->output; u64 ena, run, val; - int nr; - struct aggr_cpu_id id; double uval; + struct perf_stat_evsel *ps = counter->stats; + struct perf_stat_aggr *aggr = &ps->aggr[s]; + struct aggr_cpu_id id = config->aggr_map->map[s]; + double avg = aggr->counts.val; - ad.id = id = config->aggr_map->map[s]; - ad.val = ad.ena = ad.run = 0; - ad.nr = 0; - if (!collect_data(config, counter, aggr_cb, &ad)) + if (counter->supported && aggr->nr == 0) return; - if (perf_pmu__has_hybrid() && ad.ena == 0) - return; + uniquify_counter(config, counter); + + val = aggr->counts.val; + ena = aggr->counts.ena; + run = aggr->counts.run; - nr = ad.nr; - ena = ad.ena; - run = ad.run; - val = ad.val; - if (*first && metric_only) { - *first = false; - aggr_printout(config, counter, id, nr); - } if (prefix && !metric_only) fprintf(output, "%s", prefix); uval = val * counter->scale; - if (cpu.cpu != -1) - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, nr, counter, uval, - prefix, run, ena, 1.0, &rt_stat); + printout(config, id, aggr->nr, counter, uval, + prefix, run, ena, avg, &rt_stat, s); + if (!metric_only) fputc('\n', output); } +static void print_metric_begin(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix, int aggr_idx, + struct cgroup *cgrp) +{ + struct perf_stat_aggr *aggr; + struct aggr_cpu_id id; + struct evsel *evsel; + + if (!config->metric_only) + return; + + if (prefix) + fprintf(config->output, "%s", prefix); + + evsel = evlist__first(evlist); + id = config->aggr_map->map[aggr_idx]; + aggr = &evsel->stats->aggr[aggr_idx]; + aggr_printout(config, evsel, id, aggr->nr); + + print_cgroup(config, cgrp); +} + +static void print_metric_end(struct perf_stat_config *config) +{ + if (!config->metric_only) + return; + + fputc('\n', config->output); +} + static void print_aggr(struct perf_stat_config *config, struct evlist *evlist, char *prefix) { bool metric_only = config->metric_only; - FILE *output = config->output; struct evsel *counter; int s; - bool first; if (!config->aggr_map || !config->aggr_get_id) return; - aggr_update_shadow(config, evlist); - /* * With metric_only everything is on a single line. * Without each counter has its own line. */ for (s = 0; s < config->aggr_map->nr; s++) { - if (prefix && metric_only) - fprintf(output, "%s", prefix); + print_metric_begin(config, evlist, prefix, s, /*cgrp=*/NULL); - first = true; evlist__for_each_entry(evlist, counter) { - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); - } - if (metric_only) - fputc('\n', output); - } -} - -static int cmp_val(const void *a, const void *b) -{ - return ((struct perf_aggr_thread_value *)b)->val - - ((struct perf_aggr_thread_value *)a)->val; -} - -static struct perf_aggr_thread_value *sort_aggr_thread( - struct evsel *counter, - int *ret, - struct target *_target) -{ - int nthreads = perf_thread_map__nr(counter->core.threads); - int i = 0; - double uval; - struct perf_aggr_thread_value *buf; - - buf = calloc(nthreads, sizeof(struct perf_aggr_thread_value)); - if (!buf) - return NULL; - - for (int thread = 0; thread < nthreads; thread++) { - int idx; - u64 ena = 0, run = 0, val = 0; - - perf_cpu_map__for_each_idx(idx, evsel__cpus(counter)) { - struct perf_counts_values *counts = - perf_counts(counter->counts, idx, thread); + if (counter->merged_stat) + continue; - val += counts->val; - ena += counts->ena; - run += counts->run; + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } - - uval = val * counter->scale; - - /* - * Skip value 0 when enabling --per-thread globally, - * otherwise too many 0 output. - */ - if (uval == 0.0 && target__has_per_thread(_target)) - continue; - - buf[i].counter = counter; - buf[i].id = aggr_cpu_id__empty(); - buf[i].id.thread_idx = thread; - buf[i].uval = uval; - buf[i].val = val; - buf[i].run = run; - buf[i].ena = ena; - i++; + print_metric_end(config); } - - qsort(buf, i, sizeof(struct perf_aggr_thread_value), cmp_val); - - if (ret) - *ret = i; - - return buf; } -static void print_aggr_thread(struct perf_stat_config *config, - struct target *_target, - struct evsel *counter, char *prefix) +static void print_aggr_cgroup(struct perf_stat_config *config, + struct evlist *evlist, + char *prefix) { - FILE *output = config->output; - int thread, sorted_threads; - struct aggr_cpu_id id; - struct perf_aggr_thread_value *buf; + bool metric_only = config->metric_only; + struct evsel *counter, *evsel; + struct cgroup *cgrp = NULL; + int s; - buf = sort_aggr_thread(counter, &sorted_threads, _target); - if (!buf) { - perror("cannot sort aggr thread"); + if (!config->aggr_map || !config->aggr_get_id) return; - } - - for (thread = 0; thread < sorted_threads; thread++) { - if (prefix) - fprintf(output, "%s", prefix); - - id = buf[thread].id; - printout(config, id, 0, buf[thread].counter, buf[thread].uval, - prefix, buf[thread].run, buf[thread].ena, 1.0, - &rt_stat); - fputc('\n', output); - } - - free(buf); -} -struct caggr_data { - double avg, avg_enabled, avg_running; -}; - -static void counter_aggr_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct caggr_data *cd = data; - struct perf_counts_values *aggr = &counter->counts->aggr; - - cd->avg += aggr->val; - cd->avg_enabled += aggr->ena; - cd->avg_running += aggr->run; -} - -/* - * Print out the results of a single counter: - * aggregated counts in system-wide mode - */ -static void print_counter_aggr(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - bool metric_only = config->metric_only; - FILE *output = config->output; - double uval; - struct caggr_data cd = { .avg = 0.0 }; + evlist__for_each_entry(evlist, evsel) { + if (cgrp == evsel->cgrp) + continue; - if (!collect_data(config, counter, counter_aggr_cb, &cd)) - return; + cgrp = evsel->cgrp; - if (prefix && !metric_only) - fprintf(output, "%s", prefix); + for (s = 0; s < config->aggr_map->nr; s++) { + print_metric_begin(config, evlist, prefix, s, cgrp); - uval = cd.avg * counter->scale; - printout(config, aggr_cpu_id__empty(), 0, counter, uval, prefix, cd.avg_running, - cd.avg_enabled, cd.avg, &rt_stat); - if (!metric_only) - fprintf(output, "\n"); -} + evlist__for_each_entry(evlist, counter) { + if (counter->merged_stat) + continue; -static void counter_cb(struct perf_stat_config *config __maybe_unused, - struct evsel *counter, void *data, - bool first __maybe_unused) -{ - struct aggr_data *ad = data; + if (counter->cgrp != cgrp) + continue; - ad->val += perf_counts(counter->counts, ad->cpu_map_idx, 0)->val; - ad->ena += perf_counts(counter->counts, ad->cpu_map_idx, 0)->ena; - ad->run += perf_counts(counter->counts, ad->cpu_map_idx, 0)->run; + print_counter_aggrdata(config, counter, s, prefix, + metric_only); + } + print_metric_end(config); + } + } } -/* - * Print out the results of a single counter: - * does not use aggregated count in system-wide - */ static void print_counter(struct perf_stat_config *config, struct evsel *counter, char *prefix) { - FILE *output = config->output; - u64 ena, run, val; - double uval; - int idx; - struct perf_cpu cpu; - struct aggr_cpu_id id; - - perf_cpu_map__for_each_cpu(cpu, idx, evsel__cpus(counter)) { - struct aggr_data ad = { .cpu_map_idx = idx }; - - if (!collect_data(config, counter, counter_cb, &ad)) - return; - val = ad.val; - ena = ad.ena; - run = ad.run; + bool metric_only = config->metric_only; + int s; - if (prefix) - fprintf(output, "%s", prefix); + /* AGGR_THREAD doesn't have config->aggr_get_id */ + if (!config->aggr_map) + return; - uval = val * counter->scale; - id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); - printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + if (counter->merged_stat) + return; - fputc('\n', output); + for (s = 0; s < config->aggr_map->nr; s++) { + print_counter_aggrdata(config, counter, s, prefix, + metric_only); } } @@ -1093,6 +970,7 @@ static void print_no_aggr_metric(struct perf_stat_config *config, u64 ena, run, val; double uval; struct aggr_cpu_id id; + struct perf_stat_evsel *ps = counter->stats; int counter_idx = perf_cpu_map__idx(evsel__cpus(counter), cpu); if (counter_idx < 0) @@ -1100,206 +978,240 @@ static void print_no_aggr_metric(struct perf_stat_config *config, id = aggr_cpu_id__cpu(cpu, /*data=*/NULL); if (first) { - if (prefix) - fputs(prefix, config->output); - aggr_printout(config, counter, id, 0); + print_metric_begin(config, evlist, prefix, + counter_idx, /*cgrp=*/NULL); first = false; } - val = perf_counts(counter->counts, counter_idx, 0)->val; - ena = perf_counts(counter->counts, counter_idx, 0)->ena; - run = perf_counts(counter->counts, counter_idx, 0)->run; + val = ps->aggr[counter_idx].counts.val; + ena = ps->aggr[counter_idx].counts.ena; + run = ps->aggr[counter_idx].counts.run; uval = val * counter->scale; printout(config, id, 0, counter, uval, prefix, - run, ena, 1.0, &rt_stat); + run, ena, 1.0, &rt_stat, counter_idx); } if (!first) - fputc('\n', config->output); + print_metric_end(config); } } -static int aggr_header_lens[] = { - [AGGR_CORE] = 24, - [AGGR_DIE] = 18, - [AGGR_SOCKET] = 12, - [AGGR_NONE] = 6, - [AGGR_THREAD] = 24, - [AGGR_NODE] = 6, - [AGGR_GLOBAL] = 0, -}; +static void print_metric_headers_std(struct perf_stat_config *config, + const char *prefix, bool no_indent) +{ + if (prefix) + fprintf(config->output, "%s", prefix); -static const char *aggr_header_csv[] = { - [AGGR_CORE] = "core,cpus,", - [AGGR_DIE] = "die,cpus", - [AGGR_SOCKET] = "socket,cpus", - [AGGR_NONE] = "cpu,", - [AGGR_THREAD] = "comm-pid,", - [AGGR_NODE] = "node,", - [AGGR_GLOBAL] = "" -}; + if (!no_indent) { + int len = aggr_header_lens[config->aggr_mode]; + + if (nr_cgroups || config->cgroup_list) + len += CGROUP_LEN + 1; + + fprintf(config->output, "%*s", len, ""); + } +} + +static void print_metric_headers_csv(struct perf_stat_config *config, + const char *prefix, + bool no_indent __maybe_unused) +{ + if (prefix) + fprintf(config->output, "%s", prefix); + if (config->interval) + fputs("time,", config->output); + if (!config->iostat_run) + fputs(aggr_header_csv[config->aggr_mode], config->output); +} + +static void print_metric_headers_json(struct perf_stat_config *config, + const char *prefix __maybe_unused, + bool no_indent __maybe_unused) +{ + if (config->interval) + fputs("{\"unit\" : \"sec\"}", config->output); +} static void print_metric_headers(struct perf_stat_config *config, struct evlist *evlist, const char *prefix, bool no_indent) { - struct perf_stat_output_ctx out; struct evsel *counter; struct outstate os = { .fh = config->output }; - bool first = true; - - if (config->json_output && !config->interval) - fprintf(config->output, "{"); + struct perf_stat_output_ctx out = { + .ctx = &os, + .print_metric = print_metric_header, + .new_line = new_line_metric, + .force_header = true, + }; - if (prefix && !config->json_output) - fprintf(config->output, "%s", prefix); + if (config->json_output) + print_metric_headers_json(config, prefix, no_indent); + else if (config->csv_output) + print_metric_headers_csv(config, prefix, no_indent); + else + print_metric_headers_std(config, prefix, no_indent); - if (!config->csv_output && !no_indent) - fprintf(config->output, "%*s", - aggr_header_lens[config->aggr_mode], ""); - if (config->csv_output) { - if (config->interval) - fputs("time,", config->output); - if (!config->iostat_run) - fputs(aggr_header_csv[config->aggr_mode], config->output); - } if (config->iostat_run) iostat_print_header_prefix(config); + if (config->cgroup_list) + os.cgrp = evlist__first(evlist)->cgrp; + /* Print metrics headers only */ evlist__for_each_entry(evlist, counter) { os.evsel = counter; - out.ctx = &os; - out.print_metric = print_metric_header; - if (!first && config->json_output) - fprintf(config->output, ", "); - first = false; - out.new_line = new_line_metric; - out.force_header = true; + perf_stat__print_shadow_stats(config, counter, 0, 0, &out, &config->metric_events, &rt_stat); } - if (config->json_output) - fprintf(config->output, "}"); fputc('\n', config->output); } -static void print_interval(struct perf_stat_config *config, - struct evlist *evlist, - char *prefix, struct timespec *ts) +static void prepare_interval(struct perf_stat_config *config, + char *prefix, struct timespec *ts) { - bool metric_only = config->metric_only; - unsigned int unit_width = config->unit_width; - FILE *output = config->output; - static int num_print_interval; - - if (config->interval_clear) - puts(CONSOLE_CLEAR); + if (config->iostat_run) + return; - if (!config->iostat_run && !config->json_output) + if (!config->json_output) sprintf(prefix, "%6lu.%09lu%s", (unsigned long) ts->tv_sec, ts->tv_nsec, config->csv_sep); - if (!config->iostat_run && config->json_output && !config->metric_only) + else if (!config->metric_only) sprintf(prefix, "{\"interval\" : %lu.%09lu, ", (unsigned long) ts->tv_sec, ts->tv_nsec); - if (!config->iostat_run && config->json_output && config->metric_only) + else sprintf(prefix, "{\"interval\" : %lu.%09lu}", (unsigned long) ts->tv_sec, ts->tv_nsec); +} - if ((num_print_interval == 0 && !config->csv_output && !config->json_output) - || config->interval_clear) { - switch (config->aggr_mode) { - case AGGR_NODE: - fprintf(output, "# time node cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_SOCKET: - fprintf(output, "# time socket cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_DIE: - fprintf(output, "# time die cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU "); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_THREAD: - fprintf(output, "# time comm-pid"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - if (!config->iostat_run) { - fprintf(output, "# time"); - if (!metric_only) - fprintf(output, " counts %*s events\n", unit_width, "unit"); - } - case AGGR_UNSET: - case AGGR_MAX: - break; - } +static void print_header_interval_std(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + FILE *output = config->output; + + switch (config->aggr_mode) { + case AGGR_NODE: + case AGGR_SOCKET: + case AGGR_DIE: + case AGGR_CORE: + fprintf(output, "#%*s %-*s cpus", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_NONE: + fprintf(output, "#%*s %-*s", + INTERVAL_LEN - 1, "time", + aggr_header_lens[config->aggr_mode], + aggr_header_std[config->aggr_mode]); + break; + case AGGR_THREAD: + fprintf(output, "#%*s %*s-%-*s", + INTERVAL_LEN - 1, "time", + COMM_LEN, "comm", PID_LEN, "pid"); + break; + case AGGR_GLOBAL: + default: + if (!config->iostat_run) + fprintf(output, "#%*s", + INTERVAL_LEN - 1, "time"); + case AGGR_UNSET: + case AGGR_MAX: + break; } - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && !config->json_output) + if (config->metric_only) + print_metric_headers(config, evlist, " ", true); + else + fprintf(output, " %*s %*s events\n", + COUNTS_LEN, "counts", config->unit_width, "unit"); +} + +static void print_header_std(struct perf_stat_config *config, + struct target *_target, struct evlist *evlist, + int argc, const char **argv) +{ + FILE *output = config->output; + int i; + + fprintf(output, "\n"); + fprintf(output, " Performance counter stats for "); + if (_target->bpf_str) + fprintf(output, "\'BPF program(s) %s", _target->bpf_str); + else if (_target->system_wide) + fprintf(output, "\'system wide"); + else if (_target->cpu_list) + fprintf(output, "\'CPU(s) %s", _target->cpu_list); + else if (!target__has_task(_target)) { + fprintf(output, "\'%s", argv ? argv[0] : "pipe"); + for (i = 1; argv && (i < argc); i++) + fprintf(output, " %s", argv[i]); + } else if (_target->pid) + fprintf(output, "process id \'%s", _target->pid); + else + fprintf(output, "thread id \'%s", _target->tid); + + fprintf(output, "\'"); + if (config->run_count > 1) + fprintf(output, " (%d runs)", config->run_count); + fprintf(output, ":\n\n"); + + if (config->metric_only) + print_metric_headers(config, evlist, " ", false); +} + +static void print_header_csv(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) print_metric_headers(config, evlist, " ", true); - if ((num_print_interval == 0 || config->interval_clear) - && metric_only && config->json_output) { - fprintf(output, "{"); +} +static void print_header_json(struct perf_stat_config *config, + struct target *_target __maybe_unused, + struct evlist *evlist, + int argc __maybe_unused, + const char **argv __maybe_unused) +{ + if (config->metric_only) print_metric_headers(config, evlist, " ", true); - } - if (++num_print_interval == 25) - num_print_interval = 0; } static void print_header(struct perf_stat_config *config, struct target *_target, + struct evlist *evlist, int argc, const char **argv) { - FILE *output = config->output; - int i; + static int num_print_iv; fflush(stdout); - if (!config->csv_output && !config->json_output) { - fprintf(output, "\n"); - fprintf(output, " Performance counter stats for "); - if (_target->bpf_str) - fprintf(output, "\'BPF program(s) %s", _target->bpf_str); - else if (_target->system_wide) - fprintf(output, "\'system wide"); - else if (_target->cpu_list) - fprintf(output, "\'CPU(s) %s", _target->cpu_list); - else if (!target__has_task(_target)) { - fprintf(output, "\'%s", argv ? argv[0] : "pipe"); - for (i = 1; argv && (i < argc); i++) - fprintf(output, " %s", argv[i]); - } else if (_target->pid) - fprintf(output, "process id \'%s", _target->pid); - else - fprintf(output, "thread id \'%s", _target->tid); + if (config->interval_clear) + puts(CONSOLE_CLEAR); - fprintf(output, "\'"); - if (config->run_count > 1) - fprintf(output, " (%d runs)", config->run_count); - fprintf(output, ":\n\n"); + if (num_print_iv == 0 || config->interval_clear) { + if (config->json_output) + print_header_json(config, _target, evlist, argc, argv); + else if (config->csv_output) + print_header_csv(config, _target, evlist, argc, argv); + else if (config->interval) + print_header_interval_std(config, _target, evlist, argc, argv); + else + print_header_std(config, _target, evlist, argc, argv); } + + if (num_print_iv++ == 25) + num_print_iv = 0; } static int get_precision(double num) @@ -1348,6 +1260,9 @@ static void print_footer(struct perf_stat_config *config) double avg = avg_stats(config->walltime_nsecs_stats) / NSEC_PER_SEC; FILE *output = config->output; + if (config->interval || config->csv_output || config->json_output) + return; + if (!config->null_run) fprintf(output, "\n"); @@ -1376,7 +1291,7 @@ static void print_footer(struct perf_stat_config *config) fprintf(output, " %17.*f +- %.*f seconds time elapsed", precision, avg, precision, sd); - print_noise_pct(config, sd, avg); + print_noise_pct(config, sd, avg, /*before_metric=*/false); } fprintf(output, "\n\n"); @@ -1393,58 +1308,72 @@ static void print_footer(struct perf_stat_config *config) "the same PMU. Try reorganizing the group.\n"); } -static void print_percore_thread(struct perf_stat_config *config, - struct evsel *counter, char *prefix) -{ - int s; - struct aggr_cpu_id s2, id; - struct perf_cpu_map *cpus; - bool first = true; - int idx; - struct perf_cpu cpu; - - cpus = evsel__cpus(counter); - perf_cpu_map__for_each_cpu(cpu, idx, cpus) { - s2 = config->aggr_get_id(config, cpu); - for (s = 0; s < config->aggr_map->nr; s++) { - id = config->aggr_map->map[s]; - if (aggr_cpu_id__equal(&s2, &id)) - break; - } - - print_counter_aggrdata(config, counter, s, - prefix, false, - &first, cpu); - } -} - static void print_percore(struct perf_stat_config *config, struct evsel *counter, char *prefix) { bool metric_only = config->metric_only; FILE *output = config->output; - int s; - bool first = true; + struct cpu_aggr_map *core_map; + int s, c, i; if (!config->aggr_map || !config->aggr_get_id) return; if (config->percore_show_thread) - return print_percore_thread(config, counter, prefix); + return print_counter(config, counter, prefix); - for (s = 0; s < config->aggr_map->nr; s++) { - if (prefix && metric_only) - fprintf(output, "%s", prefix); + core_map = cpu_aggr_map__empty_new(config->aggr_map->nr); + if (core_map == NULL) { + fprintf(output, "Cannot allocate per-core aggr map for display\n"); + return; + } - print_counter_aggrdata(config, counter, s, - prefix, metric_only, - &first, (struct perf_cpu){ .cpu = -1 }); + for (s = 0, c = 0; s < config->aggr_map->nr; s++) { + struct perf_cpu curr_cpu = config->aggr_map->map[s].cpu; + struct aggr_cpu_id core_id = aggr_cpu_id__core(curr_cpu, NULL); + bool found = false; + + for (i = 0; i < c; i++) { + if (aggr_cpu_id__equal(&core_map->map[i], &core_id)) { + found = true; + break; + } + } + if (found) + continue; + + print_counter_aggrdata(config, counter, s, prefix, metric_only); + + core_map->map[c++] = core_id; } + free(core_map); if (metric_only) fputc('\n', output); } +static void print_cgroup_counter(struct perf_stat_config *config, struct evlist *evlist, + char *prefix) +{ + struct cgroup *cgrp = NULL; + struct evsel *counter; + + evlist__for_each_entry(evlist, counter) { + if (cgrp != counter->cgrp) { + if (cgrp != NULL) + print_metric_end(config); + + cgrp = counter->cgrp; + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, cgrp); + } + + print_counter(config, counter, prefix); + } + if (cgrp) + print_metric_end(config); +} + void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *config, struct target *_target, struct timespec *ts, int argc, const char **argv) { @@ -1456,47 +1385,37 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf if (config->iostat_run) evlist->selected = evlist__first(evlist); - if (interval) - print_interval(config, evlist, prefix = buf, ts); - else - print_header(config, _target, argc, argv); - - if (metric_only) { - static int num_print_iv; - - if (num_print_iv == 0 && !interval) - print_metric_headers(config, evlist, prefix, false); - if (num_print_iv++ == 25) - num_print_iv = 0; - if (config->aggr_mode == AGGR_GLOBAL && prefix && !config->iostat_run) - fprintf(config->output, "%s", prefix); - - if (config->json_output && !config->metric_only) - fprintf(config->output, "}"); + if (interval) { + prefix = buf; + prepare_interval(config, prefix, ts); } + print_header(config, _target, evlist, argc, argv); + switch (config->aggr_mode) { case AGGR_CORE: case AGGR_DIE: case AGGR_SOCKET: case AGGR_NODE: - print_aggr(config, evlist, prefix); + if (config->cgroup_list) + print_aggr_cgroup(config, evlist, prefix); + else + print_aggr(config, evlist, prefix); break; case AGGR_THREAD: - evlist__for_each_entry(evlist, counter) { - print_aggr_thread(config, _target, counter, prefix); - } - break; case AGGR_GLOBAL: - if (config->iostat_run) + if (config->iostat_run) { iostat_print_counters(evlist, config, ts, prefix = buf, - print_counter_aggr); - else { + print_counter); + } else if (config->cgroup_list) { + print_cgroup_counter(config, evlist, prefix); + } else { + print_metric_begin(config, evlist, prefix, + /*aggr_idx=*/0, /*cgrp=*/NULL); evlist__for_each_entry(evlist, counter) { - print_counter_aggr(config, counter, prefix); + print_counter(config, counter, prefix); } - if (metric_only) - fputc('\n', config->output); + print_metric_end(config); } break; case AGGR_NONE: @@ -1517,8 +1436,7 @@ void evlist__print_counters(struct evlist *evlist, struct perf_stat_config *conf break; } - if (!interval && !config->csv_output && !config->json_output) - print_footer(config); + print_footer(config); fflush(config->output); } diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c index 07b29fe272c7..9bde9224a97c 100644 --- a/tools/perf/util/stat-shadow.c +++ b/tools/perf/util/stat-shadow.c @@ -14,6 +14,7 @@ #include "units.h" #include <linux/zalloc.h> #include "iostat.h" +#include "util/hashmap.h" /* * AGGR_GLOBAL: Use CPU 0 diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 8ec8bb4a9912..673f017a211f 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -14,11 +14,7 @@ #include "evlist.h" #include "evsel.h" #include "thread_map.h" -#ifdef HAVE_LIBBPF_SUPPORT -#include <bpf/hashmap.h> -#else #include "util/hashmap.h" -#endif #include <linux/zalloc.h> void update_stats(struct stats *stats, u64 val) @@ -130,18 +126,65 @@ static void perf_stat_evsel_id_init(struct evsel *evsel) } } +static void evsel__reset_aggr_stats(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct perf_stat_aggr *aggr = ps->aggr; + + if (aggr) + memset(aggr, 0, sizeof(*aggr) * ps->nr_aggr); +} + static void evsel__reset_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; init_stats(&ps->res_stats); + evsel__reset_aggr_stats(evsel); +} + +static int evsel__alloc_aggr_stats(struct evsel *evsel, int nr_aggr) +{ + struct perf_stat_evsel *ps = evsel->stats; + + if (ps == NULL) + return 0; + + ps->nr_aggr = nr_aggr; + ps->aggr = calloc(nr_aggr, sizeof(*ps->aggr)); + if (ps->aggr == NULL) + return -ENOMEM; + + return 0; +} + +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) { + if (evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) + return -1; + } + return 0; } -static int evsel__alloc_stat_priv(struct evsel *evsel) +static int evsel__alloc_stat_priv(struct evsel *evsel, int nr_aggr) { - evsel->stats = zalloc(sizeof(struct perf_stat_evsel)); - if (evsel->stats == NULL) + struct perf_stat_evsel *ps; + + ps = zalloc(sizeof(*ps)); + if (ps == NULL) return -ENOMEM; + + evsel->stats = ps; + + if (nr_aggr && evsel__alloc_aggr_stats(evsel, nr_aggr) < 0) { + evsel->stats = NULL; + free(ps); + return -ENOMEM; + } + perf_stat_evsel_id_init(evsel); evsel__reset_stat_priv(evsel); return 0; @@ -151,8 +194,10 @@ static void evsel__free_stat_priv(struct evsel *evsel) { struct perf_stat_evsel *ps = evsel->stats; - if (ps) + if (ps) { + zfree(&ps->aggr); zfree(&ps->group_data); + } zfree(&evsel->stats); } @@ -181,9 +226,9 @@ static void evsel__reset_prev_raw_counts(struct evsel *evsel) perf_counts__reset(evsel->prev_raw_counts); } -static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) +static int evsel__alloc_stats(struct evsel *evsel, int nr_aggr, bool alloc_raw) { - if (evsel__alloc_stat_priv(evsel) < 0 || + if (evsel__alloc_stat_priv(evsel, nr_aggr) < 0 || evsel__alloc_counts(evsel) < 0 || (alloc_raw && evsel__alloc_prev_raw_counts(evsel) < 0)) return -ENOMEM; @@ -191,12 +236,17 @@ static int evsel__alloc_stats(struct evsel *evsel, bool alloc_raw) return 0; } -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw) +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw) { struct evsel *evsel; + int nr_aggr = 0; + + if (config && config->aggr_map) + nr_aggr = config->aggr_map->nr; evlist__for_each_entry(evlist, evsel) { - if (evsel__alloc_stats(evsel, alloc_raw)) + if (evsel__alloc_stats(evsel, nr_aggr, alloc_raw)) goto out_free; } @@ -228,6 +278,14 @@ void evlist__reset_stats(struct evlist *evlist) } } +void evlist__reset_aggr_stats(struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__reset_aggr_stats(evsel); +} + void evlist__reset_prev_raw_counts(struct evlist *evlist) { struct evsel *evsel; @@ -246,8 +304,6 @@ static void evsel__copy_prev_raw_counts(struct evsel *evsel) *perf_counts(evsel->prev_raw_counts, idx, thread); } } - - evsel->counts->aggr = evsel->prev_raw_counts->aggr; } void evlist__copy_prev_raw_counts(struct evlist *evlist) @@ -258,26 +314,6 @@ void evlist__copy_prev_raw_counts(struct evlist *evlist) evsel__copy_prev_raw_counts(evsel); } -void evlist__save_aggr_prev_raw_counts(struct evlist *evlist) -{ - struct evsel *evsel; - - /* - * To collect the overall statistics for interval mode, - * we copy the counts from evsel->prev_raw_counts to - * evsel->counts. The perf_stat_process_counter creates - * aggr values from per cpu values, but the per cpu values - * are 0 for AGGR_GLOBAL. So we use a trick that saves the - * previous aggr value to the first member of perf_counts, - * then aggr calculation in process_counter_values can work - * correctly. - */ - evlist__for_each_entry(evlist, evsel) { - *perf_counts(evsel->prev_raw_counts, 0, 0) = - evsel->prev_raw_counts->aggr; - } -} - static size_t pkg_id_hash(const void *__key, void *ctx __maybe_unused) { uint64_t *key = (uint64_t *) __key; @@ -356,12 +392,31 @@ static int check_per_pkg(struct evsel *counter, struct perf_counts_values *vals, return ret; } +static bool evsel__count_has_error(struct evsel *evsel, + struct perf_counts_values *count, + struct perf_stat_config *config) +{ + /* the evsel was failed already */ + if (evsel->err || evsel->counts->scaled == -1) + return true; + + /* this is meaningful for CPU aggregation modes only */ + if (config->aggr_mode == AGGR_GLOBAL) + return false; + + /* it's considered ok when it actually ran */ + if (count->ena != 0 && count->run != 0) + return false; + + return true; +} + static int process_counter_values(struct perf_stat_config *config, struct evsel *evsel, int cpu_map_idx, int thread, struct perf_counts_values *count) { - struct perf_counts_values *aggr = &evsel->counts->aggr; + struct perf_stat_evsel *ps = evsel->stats; static struct perf_counts_values zero; bool skip = false; @@ -373,34 +428,60 @@ process_counter_values(struct perf_stat_config *config, struct evsel *evsel, if (skip) count = &zero; - switch (config->aggr_mode) { - case AGGR_THREAD: - case AGGR_CORE: - case AGGR_DIE: - case AGGR_SOCKET: - case AGGR_NODE: - case AGGR_NONE: - if (!evsel->snapshot) - evsel__compute_deltas(evsel, cpu_map_idx, thread, count); - perf_counts_values__scale(count, config->scale, NULL); - if ((config->aggr_mode == AGGR_NONE) && (!evsel->percore)) { - perf_stat__update_shadow_stats(evsel, count->val, - cpu_map_idx, &rt_stat); - } + if (!evsel->snapshot) + evsel__compute_deltas(evsel, cpu_map_idx, thread, count); + perf_counts_values__scale(count, config->scale, NULL); + + if (config->aggr_mode == AGGR_THREAD) { + struct perf_counts_values *aggr_counts = &ps->aggr[thread].counts; + + /* + * Skip value 0 when enabling --per-thread globally, + * otherwise too many 0 output. + */ + if (count->val == 0 && config->system_wide) + return 0; + + ps->aggr[thread].nr++; + + aggr_counts->val += count->val; + aggr_counts->ena += count->ena; + aggr_counts->run += count->run; + return 0; + } - if (config->aggr_mode == AGGR_THREAD) { - perf_stat__update_shadow_stats(evsel, count->val, - thread, &rt_stat); + if (ps->aggr) { + struct perf_cpu cpu = perf_cpu_map__cpu(evsel->core.cpus, cpu_map_idx); + struct aggr_cpu_id aggr_id = config->aggr_get_id(config, cpu); + struct perf_stat_aggr *ps_aggr; + int i; + + for (i = 0; i < ps->nr_aggr; i++) { + if (!aggr_cpu_id__equal(&aggr_id, &config->aggr_map->map[i])) + continue; + + ps_aggr = &ps->aggr[i]; + ps_aggr->nr++; + + /* + * When any result is bad, make them all to give consistent output + * in interval mode. But per-task counters can have 0 enabled time + * when some tasks are idle. + */ + if (evsel__count_has_error(evsel, count, config) && !ps_aggr->failed) { + ps_aggr->counts.val = 0; + ps_aggr->counts.ena = 0; + ps_aggr->counts.run = 0; + ps_aggr->failed = true; + } + + if (!ps_aggr->failed) { + ps_aggr->counts.val += count->val; + ps_aggr->counts.ena += count->ena; + ps_aggr->counts.run += count->run; + } + break; } - break; - case AGGR_GLOBAL: - aggr->val += count->val; - aggr->ena += count->ena; - aggr->run += count->run; - case AGGR_UNSET: - case AGGR_MAX: - default: - break; } return 0; @@ -427,13 +508,10 @@ static int process_counter_maps(struct perf_stat_config *config, int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter) { - struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat_evsel *ps = counter->stats; - u64 *count = counter->counts->aggr.values; + u64 *count; int ret; - aggr->val = aggr->ena = aggr->run = 0; - if (counter->per_pkg) evsel__zero_per_pkg(counter); @@ -444,10 +522,11 @@ int perf_stat_process_counter(struct perf_stat_config *config, if (config->aggr_mode != AGGR_GLOBAL) return 0; - if (!counter->snapshot) - evsel__compute_deltas(counter, -1, -1, aggr); - perf_counts_values__scale(aggr, config->scale, &counter->counts->scaled); - + /* + * GLOBAL aggregation mode only has a single aggr counts, + * so we can use ps->aggr[0] as the actual output. + */ + count = ps->aggr[0].counts.values; update_stats(&ps->res_stats, *count); if (verbose > 0) { @@ -455,13 +534,194 @@ int perf_stat_process_counter(struct perf_stat_config *config, evsel__name(counter), count[0], count[1], count[2]); } - /* - * Save the full runtime - to allow normalization during printout: - */ - perf_stat__update_shadow_stats(counter, *count, 0, &rt_stat); + return 0; +} + +static int evsel__merge_aggr_counters(struct evsel *evsel, struct evsel *alias) +{ + struct perf_stat_evsel *ps_a = evsel->stats; + struct perf_stat_evsel *ps_b = alias->stats; + int i; + + if (ps_a->aggr == NULL && ps_b->aggr == NULL) + return 0; + + if (ps_a->nr_aggr != ps_b->nr_aggr) { + pr_err("Unmatched aggregation mode between aliases\n"); + return -1; + } + + for (i = 0; i < ps_a->nr_aggr; i++) { + struct perf_counts_values *aggr_counts_a = &ps_a->aggr[i].counts; + struct perf_counts_values *aggr_counts_b = &ps_b->aggr[i].counts; + + /* NB: don't increase aggr.nr for aliases */ + + aggr_counts_a->val += aggr_counts_b->val; + aggr_counts_a->ena += aggr_counts_b->ena; + aggr_counts_a->run += aggr_counts_b->run; + } return 0; } +/* events should have the same name, scale, unit, cgroup but on different PMUs */ +static bool evsel__is_alias(struct evsel *evsel_a, struct evsel *evsel_b) +{ + if (strcmp(evsel__name(evsel_a), evsel__name(evsel_b))) + return false; + + if (evsel_a->scale != evsel_b->scale) + return false; + + if (evsel_a->cgrp != evsel_b->cgrp) + return false; + + if (strcmp(evsel_a->unit, evsel_b->unit)) + return false; + + if (evsel__is_clock(evsel_a) != evsel__is_clock(evsel_b)) + return false; + + return !!strcmp(evsel_a->pmu_name, evsel_b->pmu_name); +} + +static void evsel__merge_aliases(struct evsel *evsel) +{ + struct evlist *evlist = evsel->evlist; + struct evsel *alias; + + alias = list_prepare_entry(evsel, &(evlist->core.entries), core.node); + list_for_each_entry_continue(alias, &evlist->core.entries, core.node) { + /* Merge the same events on different PMUs. */ + if (evsel__is_alias(evsel, alias)) { + evsel__merge_aggr_counters(evsel, alias); + alias->merged_stat = true; + } + } +} + +static bool evsel__should_merge_hybrid(const struct evsel *evsel, + const struct perf_stat_config *config) +{ + return config->hybrid_merge && evsel__is_hybrid(evsel); +} + +static void evsel__merge_stats(struct evsel *evsel, struct perf_stat_config *config) +{ + /* this evsel is already merged */ + if (evsel->merged_stat) + return; + + if (evsel->auto_merge_stats || evsel__should_merge_hybrid(evsel, config)) + evsel__merge_aliases(evsel); +} + +/* merge the same uncore and hybrid events if requested */ +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->no_merge) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__merge_stats(evsel, config); +} + +static void evsel__update_percore_stats(struct evsel *evsel, struct aggr_cpu_id *core_id) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct perf_counts_values counts = { 0, }; + struct aggr_cpu_id id; + struct perf_cpu cpu; + int idx; + + /* collect per-core counts */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + counts.val += aggr->counts.val; + counts.ena += aggr->counts.ena; + counts.run += aggr->counts.run; + } + + /* update aggregated per-core counts for each CPU */ + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + id = aggr_cpu_id__core(cpu, NULL); + if (!aggr_cpu_id__equal(core_id, &id)) + continue; + + aggr->counts.val = counts.val; + aggr->counts.ena = counts.ena; + aggr->counts.run = counts.run; + + aggr->used = true; + } +} + +/* we have an aggr_map for cpu, but want to aggregate the counters per-core */ +static void evsel__process_percore(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + struct aggr_cpu_id core_id; + struct perf_cpu cpu; + int idx; + + if (!evsel->percore) + return; + + perf_cpu_map__for_each_cpu(cpu, idx, evsel->core.cpus) { + struct perf_stat_aggr *aggr = &ps->aggr[idx]; + + if (aggr->used) + continue; + + core_id = aggr_cpu_id__core(cpu, NULL); + evsel__update_percore_stats(evsel, &core_id); + } +} + +/* process cpu stats on per-core events */ +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist) +{ + struct evsel *evsel; + + if (config->aggr_mode != AGGR_NONE) + return; + + evlist__for_each_entry(evlist, evsel) + evsel__process_percore(evsel); +} + +static void evsel__update_shadow_stats(struct evsel *evsel) +{ + struct perf_stat_evsel *ps = evsel->stats; + int i; + + if (ps->aggr == NULL) + return; + + for (i = 0; i < ps->nr_aggr; i++) { + struct perf_counts_values *aggr_counts = &ps->aggr[i].counts; + + perf_stat__update_shadow_stats(evsel, aggr_counts->val, i, &rt_stat); + } +} + +void perf_stat_process_shadow_stats(struct perf_stat_config *config __maybe_unused, + struct evlist *evlist) +{ + struct evsel *evsel; + + evlist__for_each_entry(evlist, evsel) + evsel__update_shadow_stats(evsel); +} int perf_event__process_stat_event(struct perf_session *session, union perf_event *event) diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index b0899c6e002f..499c3bf81333 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -8,6 +8,7 @@ #include <sys/resource.h> #include "cpumap.h" #include "rblist.h" +#include "counts.h" struct perf_cpu_map; struct perf_stat_config; @@ -42,9 +43,29 @@ enum perf_stat_evsel_id { PERF_STAT_EVSEL_ID__MAX, }; +/* hold aggregated event info */ +struct perf_stat_aggr { + /* aggregated values */ + struct perf_counts_values counts; + /* number of entries (CPUs) aggregated */ + int nr; + /* whether any entry has failed to read/process event */ + bool failed; + /* to mark this data is processed already */ + bool used; +}; + +/* per-evsel event stats */ struct perf_stat_evsel { + /* used for repeated runs */ struct stats res_stats; + /* evsel id for quick check */ enum perf_stat_evsel_id id; + /* number of allocated 'aggr' */ + int nr_aggr; + /* aggregated event values */ + struct perf_stat_aggr *aggr; + /* used for group read */ u64 *group_data; }; @@ -139,7 +160,6 @@ struct perf_stat_config { bool metric_no_group; bool metric_no_merge; bool stop_read_counter; - bool quiet; bool iostat_run; char *user_requested_cpu_list; bool system_wide; @@ -203,15 +223,6 @@ static inline void update_rusage_stats(struct rusage_stats *ru_stats, struct rus struct evsel; struct evlist; -struct perf_aggr_thread_value { - struct evsel *counter; - struct aggr_cpu_id id; - double uval; - u64 val; - u64 run; - u64 ena; -}; - bool __perf_stat_evsel__is(struct evsel *evsel, enum perf_stat_evsel_id id); #define perf_stat_evsel__is(evsel, id) \ @@ -248,15 +259,23 @@ void perf_stat__print_shadow_stats(struct perf_stat_config *config, struct runtime_stat *st); void perf_stat__collect_metric_expr(struct evlist *); -int evlist__alloc_stats(struct evlist *evlist, bool alloc_raw); +int evlist__alloc_stats(struct perf_stat_config *config, + struct evlist *evlist, bool alloc_raw); void evlist__free_stats(struct evlist *evlist); void evlist__reset_stats(struct evlist *evlist); void evlist__reset_prev_raw_counts(struct evlist *evlist); void evlist__copy_prev_raw_counts(struct evlist *evlist); void evlist__save_aggr_prev_raw_counts(struct evlist *evlist); +int evlist__alloc_aggr_stats(struct evlist *evlist, int nr_aggr); +void evlist__reset_aggr_stats(struct evlist *evlist); + int perf_stat_process_counter(struct perf_stat_config *config, struct evsel *counter); +void perf_stat_merge_counters(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_percore(struct perf_stat_config *config, struct evlist *evlist); +void perf_stat_process_shadow_stats(struct perf_stat_config *config, struct evlist *evlist); + struct perf_tool; union perf_event; struct perf_session; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 0b893dcc8ea6..e297de14184c 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -132,6 +132,8 @@ struct addr_location { s32 socket; }; +void addr_location__put(struct addr_location *al); + int dso__load(struct dso *dso, struct map *map); int dso__load_vmlinux(struct dso *dso, struct map *map, const char *vmlinux, bool vmlinux_allocated); diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 241f300d7d6e..395c626699a9 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -158,4 +158,7 @@ static inline bool thread__is_filtered(struct thread *thread) void thread__free_stitch_list(struct thread *thread); +void thread__resolve(struct thread *thread, struct addr_location *al, + struct perf_sample *sample); + #endif /* __PERF_THREAD_H */ diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index c9bfe4696943..e848579e61a8 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -18,6 +18,7 @@ #include "thread_map.h" #include "debug.h" #include "event.h" +#include <internal/threadmap.h> /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 3bb860a32b8e..00ec05fc1656 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -4,8 +4,6 @@ #include <sys/types.h> #include <stdio.h> -#include <linux/refcount.h> -#include <internal/threadmap.h> #include <perf/threadmap.h> struct perf_record_thread_map; diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 7172ca05265f..636a010d929b 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -12,9 +12,9 @@ #include "debug.h" #include "trace-event.h" -#include "event.h" #include "evsel.h" #include <linux/zalloc.h> +#include "util/sample.h" struct scripting_context *scripting_context; |