diff options
Diffstat (limited to 'tools')
544 files changed, 34556 insertions, 6158 deletions
diff --git a/tools/Makefile b/tools/Makefile index 9a617adc6675..d6f307dfb1a3 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,3 +1,8 @@ +# Some of the tools (perf) use same make variables +# as in kernel build. +export srctree= +export objtree= + include scripts/Makefile.include help: @@ -8,6 +13,7 @@ help: @echo ' cpupower - a tool for all things x86 CPU power' @echo ' firewire - the userspace part of nosy, an IEEE-1394 traffic sniffer' @echo ' hv - tools used when in Hyper-V clients' + @echo ' iio - IIO tools' @echo ' lguest - a minimal 32-bit x86 hypervisor' @echo ' perf - Linux performance measurement and analysis tool' @echo ' selftests - various kernel selftests' @@ -18,6 +24,7 @@ help: @echo ' vm - misc vm tools' @echo ' x86_energy_perf_policy - Intel energy policy tool' @echo ' tmon - thermal monitoring and tuning tool' + @echo ' freefall - laptop accelerometer program for disk protection' @echo '' @echo 'You can do:' @echo ' $$ make -C tools/ <tool>_install' @@ -41,17 +48,22 @@ acpi: FORCE cpupower: FORCE $(call descend,power/$@) -cgroup firewire hv guest usb virtio vm net: FORCE +cgroup firewire hv guest usb virtio vm net iio: FORCE $(call descend,$@) liblockdep: FORCE $(call descend,lib/lockdep) -libapikfs: FORCE +libapi: FORCE $(call descend,lib/api) -perf: libapikfs FORCE - $(call descend,$@) +# The perf build does not follow the descend function setup, +# invoking it via it's own make rule. +PERF_O = $(if $(O),$(O)/tools/perf,) + +perf: FORCE + $(Q)mkdir -p $(PERF_O) . + $(Q)$(MAKE) --no-print-directory -C perf O=$(PERF_O) subdir= selftests: FORCE $(call descend,testing/$@) @@ -62,6 +74,9 @@ turbostat x86_energy_perf_policy: FORCE tmon: FORCE $(call descend,thermal/$@) +freefall: FORCE + $(call descend,laptop/$@) + acpi_install: $(call descend,power/$(@:_install=),install) @@ -80,10 +95,13 @@ turbostat_install x86_energy_perf_policy_install: tmon_install: $(call descend,thermal/$(@:_install=),install) +freefall_install: + $(call descend,laptop/$(@:_install=),install) + install: acpi_install cgroup_install cpupower_install hv_install firewire_install lguest_install \ perf_install selftests_install turbostat_install usb_install \ virtio_install vm_install net_install x86_energy_perf_policy_install \ - tmon + tmon freefall_install acpi_clean: $(call descend,power/acpi,clean) @@ -91,16 +109,16 @@ acpi_clean: cpupower_clean: $(call descend,power/cpupower,clean) -cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean: +cgroup_clean hv_clean firewire_clean lguest_clean usb_clean virtio_clean vm_clean net_clean iio_clean: $(call descend,$(@:_clean=),clean) liblockdep_clean: $(call descend,lib/lockdep,clean) -libapikfs_clean: +libapi_clean: $(call descend,lib/api,clean) -perf_clean: libapikfs_clean +perf_clean: $(call descend,$(@:_clean=),clean) selftests_clean: @@ -112,8 +130,12 @@ turbostat_clean x86_energy_perf_policy_clean: tmon_clean: $(call descend,thermal/tmon,clean) +freefall_clean: + $(call descend,laptop/freefall,clean) + clean: acpi_clean cgroup_clean cpupower_clean hv_clean firewire_clean lguest_clean \ perf_clean selftests_clean turbostat_clean usb_clean virtio_clean \ - vm_clean net_clean x86_energy_perf_policy_clean tmon_clean + vm_clean net_clean iio_clean x86_energy_perf_policy_clean tmon_clean \ + freefall_clean .PHONY: FORCE diff --git a/tools/arch/alpha/include/asm/barrier.h b/tools/arch/alpha/include/asm/barrier.h new file mode 100644 index 000000000000..95df19c95482 --- /dev/null +++ b/tools/arch/alpha/include/asm/barrier.h @@ -0,0 +1,8 @@ +#ifndef __TOOLS_LINUX_ASM_ALPHA_BARRIER_H +#define __TOOLS_LINUX_ASM_ALPHA_BARRIER_H + +#define mb() __asm__ __volatile__("mb": : :"memory") +#define rmb() __asm__ __volatile__("mb": : :"memory") +#define wmb() __asm__ __volatile__("wmb": : :"memory") + +#endif /* __TOOLS_LINUX_ASM_ALPHA_BARRIER_H */ diff --git a/tools/arch/arm/include/asm/barrier.h b/tools/arch/arm/include/asm/barrier.h new file mode 100644 index 000000000000..005c618a0ab0 --- /dev/null +++ b/tools/arch/arm/include/asm/barrier.h @@ -0,0 +1,12 @@ +#ifndef _TOOLS_LINUX_ASM_ARM_BARRIER_H +#define _TOOLS_LINUX_ASM_ARM_BARRIER_H + +/* + * Use the __kuser_memory_barrier helper in the CPU helper page. See + * arch/arm/kernel/entry-armv.S in the kernel source for details. + */ +#define mb() ((void(*)(void))0xffff0fa0)() +#define wmb() ((void(*)(void))0xffff0fa0)() +#define rmb() ((void(*)(void))0xffff0fa0)() + +#endif /* _TOOLS_LINUX_ASM_ARM_BARRIER_H */ diff --git a/tools/arch/arm64/include/asm/barrier.h b/tools/arch/arm64/include/asm/barrier.h new file mode 100644 index 000000000000..a0483c8e0142 --- /dev/null +++ b/tools/arch/arm64/include/asm/barrier.h @@ -0,0 +1,16 @@ +#ifndef _TOOLS_LINUX_ASM_AARCH64_BARRIER_H +#define _TOOLS_LINUX_ASM_AARCH64_BARRIER_H + +/* + * From tools/perf/perf-sys.h, last modified in: + * f428ebd184c82a7914b2aa7e9f868918aaf7ea78 perf tools: Fix AAAAARGH64 memory barriers + * + * XXX: arch/arm64/include/asm/barrier.h in the kernel sources use dsb, is this + * a case like for arm32 where we do things differently in userspace? + */ + +#define mb() asm volatile("dmb ish" ::: "memory") +#define wmb() asm volatile("dmb ishst" ::: "memory") +#define rmb() asm volatile("dmb ishld" ::: "memory") + +#endif /* _TOOLS_LINUX_ASM_AARCH64_BARRIER_H */ diff --git a/tools/arch/ia64/include/asm/barrier.h b/tools/arch/ia64/include/asm/barrier.h new file mode 100644 index 000000000000..e4422b4b634e --- /dev/null +++ b/tools/arch/ia64/include/asm/barrier.h @@ -0,0 +1,48 @@ +/* + * Copied from the kernel sources to tools/: + * + * Memory barrier definitions. This is based on information published + * in the Processor Abstraction Layer and the System Abstraction Layer + * manual. + * + * Copyright (C) 1998-2003 Hewlett-Packard Co + * David Mosberger-Tang <davidm@hpl.hp.com> + * Copyright (C) 1999 Asit Mallick <asit.k.mallick@intel.com> + * Copyright (C) 1999 Don Dugger <don.dugger@intel.com> + */ +#ifndef _TOOLS_LINUX_ASM_IA64_BARRIER_H +#define _TOOLS_LINUX_ASM_IA64_BARRIER_H + +#include <linux/compiler.h> + +/* + * Macros to force memory ordering. In these descriptions, "previous" + * and "subsequent" refer to program order; "visible" means that all + * architecturally visible effects of a memory access have occurred + * (at a minimum, this means the memory has been read or written). + * + * wmb(): Guarantees that all preceding stores to memory- + * like regions are visible before any subsequent + * stores and that all following stores will be + * visible only after all previous stores. + * rmb(): Like wmb(), but for reads. + * mb(): wmb()/rmb() combo, i.e., all previous memory + * accesses are visible before all subsequent + * accesses and vice versa. This is also known as + * a "fence." + * + * Note: "mb()" and its variants cannot be used as a fence to order + * accesses to memory mapped I/O registers. For that, mf.a needs to + * be used. However, we don't want to always use mf.a because (a) + * it's (presumably) much slower than mf and (b) mf.a is supported for + * sequential memory pages only. + */ + +/* XXX From arch/ia64/include/uapi/asm/gcc_intrin.h */ +#define ia64_mf() asm volatile ("mf" ::: "memory") + +#define mb() ia64_mf() +#define rmb() mb() +#define wmb() mb() + +#endif /* _TOOLS_LINUX_ASM_IA64_BARRIER_H */ diff --git a/tools/arch/mips/include/asm/barrier.h b/tools/arch/mips/include/asm/barrier.h new file mode 100644 index 000000000000..80f96f7556e3 --- /dev/null +++ b/tools/arch/mips/include/asm/barrier.h @@ -0,0 +1,20 @@ +#ifndef _TOOLS_LINUX_ASM_MIPS_BARRIER_H +#define _TOOLS_LINUX_ASM_MIPS_BARRIER_H +/* + * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced + * in c1e028ef40b8d6943b767028ba17d4f2ba020edb, more work needed to make it + * more closely follow the Linux kernel arch/mips/include/asm/barrier.h file. + * Probably when we continue work on tools/ Kconfig support to have all the + * CONFIG_ needed for properly doing that. + */ +#define mb() asm volatile( \ + ".set mips2\n\t" \ + "sync\n\t" \ + ".set mips0" \ + : /* no output */ \ + : /* no input */ \ + : "memory") +#define wmb() mb() +#define rmb() mb() + +#endif /* _TOOLS_LINUX_ASM_MIPS_BARRIER_H */ diff --git a/tools/arch/powerpc/include/asm/barrier.h b/tools/arch/powerpc/include/asm/barrier.h new file mode 100644 index 000000000000..b23aee8e6d90 --- /dev/null +++ b/tools/arch/powerpc/include/asm/barrier.h @@ -0,0 +1,29 @@ +/* + * Copied from the kernel sources: + * + * Copyright (C) 1999 Cort Dougan <cort@cs.nmt.edu> + */ +#ifndef _TOOLS_LINUX_ASM_POWERPC_BARRIER_H +#define _TOOLS_LINUX_ASM_POWERPC_BARRIER_H + +/* + * Memory barrier. + * The sync instruction guarantees that all memory accesses initiated + * by this processor have been performed (with respect to all other + * mechanisms that access memory). The eieio instruction is a barrier + * providing an ordering (separately) for (a) cacheable stores and (b) + * loads and stores to non-cacheable memory (e.g. I/O devices). + * + * mb() prevents loads and stores being reordered across this point. + * rmb() prevents loads being reordered across this point. + * wmb() prevents stores being reordered across this point. + * + * *mb() variants without smp_ prefix must order all types of memory + * operations with one another. sync is the only instruction sufficient + * to do this. + */ +#define mb() __asm__ __volatile__ ("sync" : : : "memory") +#define rmb() __asm__ __volatile__ ("sync" : : : "memory") +#define wmb() __asm__ __volatile__ ("sync" : : : "memory") + +#endif /* _TOOLS_LINUX_ASM_POWERPC_BARRIER_H */ diff --git a/tools/arch/s390/include/asm/barrier.h b/tools/arch/s390/include/asm/barrier.h new file mode 100644 index 000000000000..f85141266b92 --- /dev/null +++ b/tools/arch/s390/include/asm/barrier.h @@ -0,0 +1,30 @@ +/* + * Copied from the kernel sources: + * + * Copyright IBM Corp. 1999, 2009 + * + * Author(s): Martin Schwidefsky <schwidefsky@de.ibm.com> + */ + +#ifndef __TOOLS_LINUX_ASM_BARRIER_H +#define __TOOLS_LINUX_ASM_BARRIER_H + +/* + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#ifdef CONFIG_HAVE_MARCH_Z196_FEATURES +/* Fast-BCR without checkpoint synchronization */ +#define __ASM_BARRIER "bcr 14,0\n" +#else +#define __ASM_BARRIER "bcr 15,0\n" +#endif + +#define mb() do { asm volatile(__ASM_BARRIER : : : "memory"); } while (0) + +#define rmb() mb() +#define wmb() mb() + +#endif /* __TOOLS_LIB_ASM_BARRIER_H */ diff --git a/tools/arch/sh/include/asm/barrier.h b/tools/arch/sh/include/asm/barrier.h new file mode 100644 index 000000000000..c18fd7599b97 --- /dev/null +++ b/tools/arch/sh/include/asm/barrier.h @@ -0,0 +1,32 @@ +/* + * Copied from the kernel sources: + * + * Copyright (C) 1999, 2000 Niibe Yutaka & Kaz Kojima + * Copyright (C) 2002 Paul Mundt + */ +#ifndef __TOOLS_LINUX_ASM_SH_BARRIER_H +#define __TOOLS_LINUX_ASM_SH_BARRIER_H + +/* + * A brief note on ctrl_barrier(), the control register write barrier. + * + * Legacy SH cores typically require a sequence of 8 nops after + * modification of a control register in order for the changes to take + * effect. On newer cores (like the sh4a and sh5) this is accomplished + * with icbi. + * + * Also note that on sh4a in the icbi case we can forego a synco for the + * write barrier, as it's not necessary for control registers. + * + * Historically we have only done this type of barrier for the MMUCR, but + * it's also necessary for the CCR, so we make it generic here instead. + */ +#if defined(__SH4A__) || defined(__SH5__) +#define mb() __asm__ __volatile__ ("synco": : :"memory") +#define rmb() mb() +#define wmb() mb() +#endif + +#include <asm-generic/barrier.h> + +#endif /* __TOOLS_LINUX_ASM_SH_BARRIER_H */ diff --git a/tools/arch/sparc/include/asm/barrier.h b/tools/arch/sparc/include/asm/barrier.h new file mode 100644 index 000000000000..8c017b3b1391 --- /dev/null +++ b/tools/arch/sparc/include/asm/barrier.h @@ -0,0 +1,8 @@ +#ifndef ___TOOLS_LINUX_ASM_SPARC_BARRIER_H +#define ___TOOLS_LINUX_ASM_SPARC_BARRIER_H +#if defined(__sparc__) && defined(__arch64__) +#include "barrier_64.h" +#else +#include "barrier_32.h" +#endif +#endif diff --git a/tools/arch/sparc/include/asm/barrier_32.h b/tools/arch/sparc/include/asm/barrier_32.h new file mode 100644 index 000000000000..c5eadd0a7233 --- /dev/null +++ b/tools/arch/sparc/include/asm/barrier_32.h @@ -0,0 +1,6 @@ +#ifndef __TOOLS_PERF_SPARC_BARRIER_H +#define __TOOLS_PERF_SPARC_BARRIER_H + +#include <asm-generic/barrier.h> + +#endif /* !(__TOOLS_PERF_SPARC_BARRIER_H) */ diff --git a/tools/arch/sparc/include/asm/barrier_64.h b/tools/arch/sparc/include/asm/barrier_64.h new file mode 100644 index 000000000000..9a7d7322c3f7 --- /dev/null +++ b/tools/arch/sparc/include/asm/barrier_64.h @@ -0,0 +1,42 @@ +#ifndef __TOOLS_LINUX_SPARC64_BARRIER_H +#define __TOOLS_LINUX_SPARC64_BARRIER_H + +/* Copied from the kernel sources to tools/: + * + * These are here in an effort to more fully work around Spitfire Errata + * #51. Essentially, if a memory barrier occurs soon after a mispredicted + * branch, the chip can stop executing instructions until a trap occurs. + * Therefore, if interrupts are disabled, the chip can hang forever. + * + * It used to be believed that the memory barrier had to be right in the + * delay slot, but a case has been traced recently wherein the memory barrier + * was one instruction after the branch delay slot and the chip still hung. + * The offending sequence was the following in sym_wakeup_done() of the + * sym53c8xx_2 driver: + * + * call sym_ccb_from_dsa, 0 + * movge %icc, 0, %l0 + * brz,pn %o0, .LL1303 + * mov %o0, %l2 + * membar #LoadLoad + * + * The branch has to be mispredicted for the bug to occur. Therefore, we put + * the memory barrier explicitly into a "branch always, predicted taken" + * delay slot to avoid the problem case. + */ +#define membar_safe(type) \ +do { __asm__ __volatile__("ba,pt %%xcc, 1f\n\t" \ + " membar " type "\n" \ + "1:\n" \ + : : : "memory"); \ +} while (0) + +/* The kernel always executes in TSO memory model these days, + * and furthermore most sparc64 chips implement more stringent + * memory ordering than required by the specifications. + */ +#define mb() membar_safe("#StoreLoad") +#define rmb() __asm__ __volatile__("":::"memory") +#define wmb() __asm__ __volatile__("":::"memory") + +#endif /* !(__TOOLS_LINUX_SPARC64_BARRIER_H) */ diff --git a/tools/arch/tile/include/asm/barrier.h b/tools/arch/tile/include/asm/barrier.h new file mode 100644 index 000000000000..7d3692c3d4ac --- /dev/null +++ b/tools/arch/tile/include/asm/barrier.h @@ -0,0 +1,15 @@ +#ifndef _TOOLS_LINUX_ASM_TILE_BARRIER_H +#define _TOOLS_LINUX_ASM_TILE_BARRIER_H +/* + * FIXME: This came from tools/perf/perf-sys.h, where it was first introduced + * in 620830b6954913647b7c7f68920cf48eddf6ad92, more work needed to make it + * more closely follow the Linux kernel arch/tile/include/asm/barrier.h file. + * Probably when we continue work on tools/ Kconfig support to have all the + * CONFIG_ needed for properly doing that. + */ + +#define mb() asm volatile ("mf" ::: "memory") +#define wmb() mb() +#define rmb() mb() + +#endif /* _TOOLS_LINUX_ASM_TILE_BARRIER_H */ diff --git a/tools/arch/x86/include/asm/atomic.h b/tools/arch/x86/include/asm/atomic.h new file mode 100644 index 000000000000..059e33e94260 --- /dev/null +++ b/tools/arch/x86/include/asm/atomic.h @@ -0,0 +1,65 @@ +#ifndef _TOOLS_LINUX_ASM_X86_ATOMIC_H +#define _TOOLS_LINUX_ASM_X86_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> +#include "rmwcc.h" + +#define LOCK_PREFIX "\n\tlock; " + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + */ + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return ACCESS_ONCE((v)->counter); +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + asm volatile(LOCK_PREFIX "incl %0" + : "+m" (v->counter)); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + GEN_UNARY_RMWcc(LOCK_PREFIX "decl", v->counter, "%0", "e"); +} + +#endif /* _TOOLS_LINUX_ASM_X86_ATOMIC_H */ diff --git a/tools/arch/x86/include/asm/barrier.h b/tools/arch/x86/include/asm/barrier.h new file mode 100644 index 000000000000..f366d8e550e4 --- /dev/null +++ b/tools/arch/x86/include/asm/barrier.h @@ -0,0 +1,28 @@ +#ifndef _TOOLS_LINUX_ASM_X86_BARRIER_H +#define _TOOLS_LINUX_ASM_X86_BARRIER_H + +/* + * Copied from the Linux kernel sources, and also moving code + * out from tools/perf/perf-sys.h so as to make it be located + * in a place similar as in the kernel sources. + * + * Force strict CPU ordering. + * And yes, this is required on UP too when we're talking + * to devices. + */ + +#if defined(__i386__) +/* + * Some non-Intel clones support out of order store. wmb() ceases to be a + * nop for these. + */ +#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") +#elif defined(__x86_64__) +#define mb() asm volatile("mfence":::"memory") +#define rmb() asm volatile("lfence":::"memory") +#define wmb() asm volatile("sfence" ::: "memory") +#endif + +#endif /* _TOOLS_LINUX_ASM_X86_BARRIER_H */ diff --git a/tools/arch/x86/include/asm/rmwcc.h b/tools/arch/x86/include/asm/rmwcc.h new file mode 100644 index 000000000000..a6669bc06939 --- /dev/null +++ b/tools/arch/x86/include/asm/rmwcc.h @@ -0,0 +1,41 @@ +#ifndef _TOOLS_LINUX_ASM_X86_RMWcc +#define _TOOLS_LINUX_ASM_X86_RMWcc + +#ifdef CC_HAVE_ASM_GOTO + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + asm_volatile_goto (fullop "; j" cc " %l[cc_label]" \ + : : "m" (var), ## __VA_ARGS__ \ + : "memory" : cc_label); \ + return 0; \ +cc_label: \ + return 1; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %1, " arg0, var, cc, vcon (val)) + +#else /* !CC_HAVE_ASM_GOTO */ + +#define __GEN_RMWcc(fullop, var, cc, ...) \ +do { \ + char c; \ + asm volatile (fullop "; set" cc " %1" \ + : "+m" (var), "=qm" (c) \ + : __VA_ARGS__ : "memory"); \ + return c != 0; \ +} while (0) + +#define GEN_UNARY_RMWcc(op, var, arg0, cc) \ + __GEN_RMWcc(op " " arg0, var, cc) + +#define GEN_BINARY_RMWcc(op, var, vcon, val, arg0, cc) \ + __GEN_RMWcc(op " %2, " arg0, var, cc, vcon (val)) + +#endif /* CC_HAVE_ASM_GOTO */ + +#endif /* _TOOLS_LINUX_ASM_X86_RMWcc */ diff --git a/tools/arch/xtensa/include/asm/barrier.h b/tools/arch/xtensa/include/asm/barrier.h new file mode 100644 index 000000000000..583800bd7259 --- /dev/null +++ b/tools/arch/xtensa/include/asm/barrier.h @@ -0,0 +1,18 @@ +/* + * Copied from the kernel sources to tools/: + * + * This file is subject to the terms and conditions of the GNU General Public + * License. See the file "COPYING" in the main directory of this archive + * for more details. + * + * Copyright (C) 2001 - 2012 Tensilica Inc. + */ + +#ifndef _TOOLS_LINUX_XTENSA_SYSTEM_H +#define _TOOLS_LINUX_XTENSA_SYSTEM_H + +#define mb() ({ __asm__ __volatile__("memw" : : : "memory"); }) +#define rmb() barrier() +#define wmb() mb() + +#endif /* _TOOLS_LINUX_XTENSA_SYSTEM_H */ diff --git a/tools/build/Build.include b/tools/build/Build.include new file mode 100644 index 000000000000..4c8daaccb82a --- /dev/null +++ b/tools/build/Build.include @@ -0,0 +1,81 @@ +### +# build: Generic definitions +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +### +# Convenient variables +comma := , +squote := ' + +### +# Name of target with a '.' as filename prefix. foo/bar.o => foo/.bar.o +dot-target = $(dir $@).$(notdir $@) + +### +# filename of target with directory and extension stripped +basetarget = $(basename $(notdir $@)) + +### +# The temporary file to save gcc -MD generated dependencies must not +# contain a comma +depfile = $(subst $(comma),_,$(dot-target).d) + +### +# Check if both arguments has same arguments. Result is empty string if equal. +arg-check = $(strip $(filter-out $(cmd_$(1)), $(cmd_$@)) \ + $(filter-out $(cmd_$@), $(cmd_$(1))) ) + +### +# Escape single quote for use in echo statements +escsq = $(subst $(squote),'\$(squote)',$1) + +# Echo command +# Short version is used, if $(quiet) equals `quiet_', otherwise full one. +echo-cmd = $(if $($(quiet)cmd_$(1)),\ + echo ' $(call escsq,$($(quiet)cmd_$(1)))';) + +### +# Replace >$< with >$$< to preserve $ when reloading the .cmd file +# (needed for make) +# Replace >#< with >\#< to avoid starting a comment in the .cmd file +# (needed for make) +# Replace >'< with >'\''< to be able to enclose the whole string in '...' +# (needed for the shell) +make-cmd = $(call escsq,$(subst \#,\\\#,$(subst $$,$$$$,$(cmd_$(1))))) + +### +# Find any prerequisites that is newer than target or that does not exist. +# PHONY targets skipped in both cases. +any-prereq = $(filter-out $(PHONY),$?) $(filter-out $(PHONY) $(wildcard $^),$^) + +### +# if_changed_dep - execute command if any prerequisite is newer than +# target, or command line has changed and update +# dependencies in the cmd file +if_changed_dep = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + cat $(depfile) > $(dot-target).cmd; \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' >> $(dot-target).cmd) + +# if_changed - execute command if any prerequisite is newer than +# target, or command line has changed +if_changed = $(if $(strip $(any-prereq) $(arg-check)), \ + @set -e; \ + $(echo-cmd) $(cmd_$(1)); \ + printf '%s\n' 'cmd_$@ := $(make-cmd)' > $(dot-target).cmd) + +### +# C flags to be used in rule definitions, includes: +# - depfile generation +# - global $(CFLAGS) +# - per target C flags +# - per object C flags +# - BUILD_STR macro to allow '-D"$(variable)"' constructs +c_flags = -Wp,-MD,$(depfile),-MT,$@ $(CFLAGS) -D"BUILD_STR(s)=\#s" $(CFLAGS_$(basetarget).o) $(CFLAGS_$(obj)) diff --git a/tools/build/Documentation/Build.txt b/tools/build/Documentation/Build.txt new file mode 100644 index 000000000000..00ad2d608727 --- /dev/null +++ b/tools/build/Documentation/Build.txt @@ -0,0 +1,139 @@ +Build Framework +=============== + +The perf build framework was adopted from the kernel build system, hence the +idea and the way how objects are built is the same. + +Basically the user provides set of 'Build' files that list objects and +directories to nest for specific target to be build. + +Unlike the kernel we don't have a single build object 'obj-y' list that where +we setup source objects, but we support more. This allows one 'Build' file to +carry a sources list for multiple build objects. + +a) Build framework makefiles +---------------------------- + +The build framework consists of 2 Makefiles: + + Build.include + Makefile.build + +While the 'Build.include' file contains just some generic definitions, the +'Makefile.build' file is the makefile used from the outside. It's +interface/usage is following: + + $ make -f tools/build/Makefile srctree=$(KSRC) dir=$(DIR) obj=$(OBJECT) + +where: + + KSRC - is the path to kernel sources + DIR - is the path to the project to be built + OBJECT - is the name of the build object + +When succefully finished the $(DIR) directory contains the final object file +called $(OBJECT)-in.o: + + $ ls $(DIR)/$(OBJECT)-in.o + +which includes all compiled sources described in 'Build' makefiles. + +a) Build makefiles +------------------ + +The user supplies 'Build' makefiles that contains a objects list, and connects +the build to nested directories. + +Assume we have the following project structure: + + ex/a.c + /b.c + /c.c + /d.c + /arch/e.c + /arch/f.c + +Out of which you build the 'ex' binary ' and the 'libex.a' library: + + 'ex' - consists of 'a.o', 'b.o' and libex.a + 'libex.a' - consists of 'c.o', 'd.o', 'e.o' and 'f.o' + +The build framework does not create the 'ex' and 'libex.a' binaries for you, it +only prepares proper objects to be compiled and grouped together. + +To follow the above example, the user provides following 'Build' files: + + ex/Build: + ex-y += a.o + ex-y += b.o + + libex-y += c.o + libex-y += d.o + libex-y += arch/ + + ex/arch/Build: + libex-y += e.o + libex-y += f.o + +and runs: + + $ make -f tools/build/Makefile.build dir=. obj=ex + $ make -f tools/build/Makefile.build dir=. obj=libex + +which creates the following objects: + + ex/ex-in.o + ex/libex-in.o + +that contain request objects names in Build files. + +It's only a matter of 2 single commands to create the final binaries: + + $ ar rcs libex.a libex-in.o + $ gcc -o ex ex-in.o libex.a + +You can check the 'ex' example in 'tools/build/tests/ex' for more details. + +b) Rules +-------- + +The build framework provides standard compilation rules to handle .S and .c +compilation. + +It's possible to include special rule if needed (like we do for flex or bison +code generation). + +c) CFLAGS +--------- + +It's possible to alter the standard object C flags in the following way: + + CFLAGS_perf.o += '...' - alters CFLAGS for perf.o object + CFLAGS_gtk += '...' - alters CFLAGS for gtk build object + +This C flags changes has the scope of the Build makefile they are defined in. + + +d) Dependencies +--------------- + +For each built object file 'a.o' the '.a.cmd' is created and holds: + + - Command line used to built that object + (for each object) + + - Dependency rules generated by 'gcc -Wp,-MD,...' + (for compiled object) + +All existing '.cmd' files are included in the Build process to follow properly +the dependencies and trigger a rebuild when necessary. + + +e) Single rules +--------------- + +It's possible to build single object file by choice, like: + + $ make util/map.o # objects + $ make util/map.i # preprocessor + $ make util/map.s # assembly diff --git a/tools/build/Makefile.build b/tools/build/Makefile.build new file mode 100644 index 000000000000..faca2bf6a430 --- /dev/null +++ b/tools/build/Makefile.build @@ -0,0 +1,130 @@ +### +# Main build makefile. +# +# Lots of this code have been borrowed or heavily inspired from parts +# of kbuild code, which is not credited, but mostly developed by: +# +# Copyright (C) Sam Ravnborg <sam@mars.ravnborg.org>, 2015 +# Copyright (C) Linus Torvalds <torvalds@linux-foundation.org>, 2015 +# + +PHONY := __build +__build: + +ifeq ($(V),1) + quiet = + Q = +else + quiet=quiet_ + Q=@ +endif + +build-dir := $(srctree)/tools/build + +# Generic definitions +include $(build-dir)/Build.include + +# do not force detected configuration +-include $(OUTPUT).config-detected + +# Init all relevant variables used in build files so +# 1) they have correct type +# 2) they do not inherit any value from the environment +subdir-y := +obj-y := +subdir-y := +subdir-obj-y := + +# Build definitions +build-file := $(dir)/Build +-include $(build-file) + +quiet_cmd_flex = FLEX $@ +quiet_cmd_bison = BISON $@ + +# Create directory unless it exists +quiet_cmd_mkdir = MKDIR $(dir $@) + cmd_mkdir = mkdir -p $(dir $@) + rule_mkdir = $(if $(wildcard $(dir $@)),,@$(call echo-cmd,mkdir) $(cmd_mkdir)) + +# Compile command +quiet_cmd_cc_o_c = CC $@ + cmd_cc_o_c = $(CC) $(c_flags) -c -o $@ $< + +quiet_cmd_cc_i_c = CPP $@ + cmd_cc_i_c = $(CC) $(c_flags) -E -o $@ $< + +quiet_cmd_cc_s_c = AS $@ + cmd_cc_s_c = $(CC) $(c_flags) -S -o $@ $< + +# Link agregate command +# If there's nothing to link, create empty $@ object. +quiet_cmd_ld_multi = LD $@ + cmd_ld_multi = $(if $(strip $(obj-y)),\ + $(LD) -r -o $@ $(obj-y),rm -f $@; $(AR) rcs $@) + +# Build rules +$(OUTPUT)%.o: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.o: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)%.i: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.i: %.S FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_i_c) + +$(OUTPUT)%.s: %.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_s_c) + +# Gather build data: +# obj-y - list of build objects +# subdir-y - list of directories to nest +# subdir-obj-y - list of directories objects 'dir/$(obj)-in.o' +obj-y := $($(obj)-y) +subdir-y := $(patsubst %/,%,$(filter %/, $(obj-y))) +obj-y := $(patsubst %/, %/$(obj)-in.o, $(obj-y)) +subdir-obj-y := $(filter %/$(obj)-in.o, $(obj-y)) + +# '$(OUTPUT)/dir' prefix to all objects +objprefix := $(subst ./,,$(OUTPUT)$(dir)/) +obj-y := $(addprefix $(objprefix),$(obj-y)) +subdir-obj-y := $(addprefix $(objprefix),$(subdir-obj-y)) + +# Final '$(obj)-in.o' object +in-target := $(objprefix)$(obj)-in.o + +PHONY += $(subdir-y) + +$(subdir-y): + $(Q)$(MAKE) -f $(build-dir)/Makefile.build dir=$(dir)/$@ obj=$(obj) + +$(sort $(subdir-obj-y)): $(subdir-y) ; + +$(in-target): $(obj-y) FORCE + $(call rule_mkdir) + $(call if_changed,ld_multi) + +__build: $(in-target) + @: + +PHONY += FORCE +FORCE: + +# Include all cmd files to get all the dependency rules +# for all objects included +targets := $(wildcard $(sort $(obj-y) $(in-target) $(MAKECMDGOALS))) +cmd_files := $(wildcard $(foreach f,$(targets),$(dir $(f)).$(notdir $(f)).cmd)) + +ifneq ($(cmd_files),) + include $(cmd_files) +endif + +.PHONY: $(PHONY) diff --git a/tools/build/Makefile.feature b/tools/build/Makefile.feature new file mode 100644 index 000000000000..2975632d51e2 --- /dev/null +++ b/tools/build/Makefile.feature @@ -0,0 +1,171 @@ +feature_dir := $(srctree)/tools/build/feature + +ifneq ($(OUTPUT),) + OUTPUT_FEATURES = $(OUTPUT)feature/ + $(shell mkdir -p $(OUTPUT_FEATURES)) +endif + +feature_check = $(eval $(feature_check_code)) +define feature_check_code + feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C $(feature_dir) test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) +endef + +feature_set = $(eval $(feature_set_code)) +define feature_set_code + feature-$(1) := 1 +endef + +# +# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: +# + +# +# Note that this is not a complete list of all feature tests, just +# those that are typically built on a fully configured system. +# +# [ Feature tests not mentioned here have to be built explicitly in +# the rule that uses them - an example for that is the 'bionic' +# feature check. ] +# +FEATURE_TESTS ?= \ + backtrace \ + dwarf \ + fortify-source \ + sync-compare-and-swap \ + glibc \ + gtk2 \ + gtk2-infobar \ + libaudit \ + libbfd \ + libelf \ + libelf-getphdrnum \ + libelf-mmap \ + libnuma \ + libperl \ + libpython \ + libpython-version \ + libslang \ + libunwind \ + pthread-attr-setaffinity-np \ + stackprotector-all \ + timerfd \ + libdw-dwarf-unwind \ + zlib \ + lzma + +FEATURE_DISPLAY ?= \ + dwarf \ + glibc \ + gtk2 \ + libaudit \ + libbfd \ + libelf \ + libnuma \ + libperl \ + libpython \ + libslang \ + libunwind \ + libdw-dwarf-unwind \ + zlib \ + lzma + +# Set FEATURE_CHECK_(C|LD)FLAGS-all for all FEATURE_TESTS features. +# If in the future we need per-feature checks/flags for features not +# mentioned in this list we need to refactor this ;-). +set_test_all_flags = $(eval $(set_test_all_flags_code)) +define set_test_all_flags_code + FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) + FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) +endef + +$(foreach feat,$(FEATURE_TESTS),$(call set_test_all_flags,$(feat))) + +# +# Special fast-path for the 'all features are available' case: +# +$(call feature_check,all,$(MSG)) + +# +# Just in case the build freshly failed, make sure we print the +# feature matrix: +# +ifeq ($(feature-all), 1) + # + # test-all.c passed - just set all the core feature flags to 1: + # + $(foreach feat,$(FEATURE_TESTS),$(call feature_set,$(feat))) +else + $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C $(feature_dir) $(addsuffix .bin,$(FEATURE_TESTS)) >/dev/null 2>&1) + $(foreach feat,$(FEATURE_TESTS),$(call feature_check,$(feat))) +endif + +# +# Print the result of the feature test: +# +feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) + +define feature_print_status_code + ifeq ($(feature-$(1)), 1) + MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) + else + MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) + endif +endef + +feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) +define feature_print_text_code + MSG = $(shell printf '...%30s: %s' $(1) $(2)) +endef + +FEATURE_DUMP := $(foreach feat,$(FEATURE_DISPLAY),feature-$(feat)($(feature-$(feat)))) +FEATURE_DUMP_FILE := $(shell touch $(OUTPUT)FEATURE-DUMP; cat $(OUTPUT)FEATURE-DUMP) + +ifeq ($(dwarf-post-unwind),1) + FEATURE_DUMP += dwarf-post-unwind($(dwarf-post-unwind-text)) +endif + +# The $(feature_display) controls the default detection message +# output. It's set if: +# - detected features differes from stored features from +# last build (in FEATURE-DUMP file) +# - one of the $(FEATURE_DISPLAY) is not detected +# - VF is enabled + +ifneq ("$(FEATURE_DUMP)","$(FEATURE_DUMP_FILE)") + $(shell echo "$(FEATURE_DUMP)" > $(OUTPUT)FEATURE-DUMP) + feature_display := 1 +endif + +feature_display_check = $(eval $(feature_check_code)) +define feature_display_check_code + ifneq ($(feature-$(1)), 1) + feature_display := 1 + endif +endef + +$(foreach feat,$(FEATURE_DISPLAY),$(call feature_display_check,$(feat))) + +ifeq ($(VF),1) + feature_display := 1 + feature_verbose := 1 +endif + +ifeq ($(feature_display),1) + $(info ) + $(info Auto-detecting system features:) + $(foreach feat,$(FEATURE_DISPLAY),$(call feature_print_status,$(feat),)) + + ifeq ($(dwarf-post-unwind),1) + $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) + endif + + ifneq ($(feature_verbose),1) + $(info ) + endif +endif + +ifeq ($(feature_verbose),1) + TMP := $(filter-out $(FEATURE_DISPLAY),$(FEATURE_TESTS)) + $(foreach feat,$(TMP),$(call feature_print_status,$(feat),)) + $(info ) +endif diff --git a/tools/perf/config/feature-checks/.gitignore b/tools/build/feature/.gitignore index 80f3da0c3515..09b335b98842 100644 --- a/tools/perf/config/feature-checks/.gitignore +++ b/tools/build/feature/.gitignore @@ -1,2 +1,3 @@ *.d *.bin +*.output diff --git a/tools/perf/config/feature-checks/Makefile b/tools/build/feature/Makefile index b32ff3372514..463ed8f2a267 100644 --- a/tools/perf/config/feature-checks/Makefile +++ b/tools/build/feature/Makefile @@ -29,33 +29,36 @@ FILES= \ test-stackprotector-all.bin \ test-timerfd.bin \ test-libdw-dwarf-unwind.bin \ + test-libbabeltrace.bin \ test-compile-32.bin \ test-compile-x32.bin \ - test-zlib.bin + test-zlib.bin \ + test-lzma.bin CC := $(CROSS_COMPILE)gcc -MD PKG_CONFIG := $(CROSS_COMPILE)pkg-config all: $(FILES) -BUILD = $(CC) $(CFLAGS) -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) +__BUILD = $(CC) $(CFLAGS) -Wall -Werror -o $(OUTPUT)$@ $(patsubst %.bin,%.c,$@) $(LDFLAGS) + BUILD = $(__BUILD) > $(OUTPUT)$(@:.bin=.make.output) 2>&1 ############################### test-all.bin: - $(BUILD) -Werror -fstack-protector-all -O2 -Werror -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz + $(BUILD) -fstack-protector-all -O2 -D_FORTIFY_SOURCE=2 -ldw -lelf -lnuma -lelf -laudit -I/usr/include/slang -lslang $(shell $(PKG_CONFIG) --libs --cflags gtk+-2.0 2>/dev/null) $(FLAGS_PERL_EMBED) $(FLAGS_PYTHON_EMBED) -DPACKAGE='"perf"' -lbfd -ldl -lz -llzma test-hello.bin: $(BUILD) test-pthread-attr-setaffinity-np.bin: - $(BUILD) -D_GNU_SOURCE -Werror -lpthread + $(BUILD) -D_GNU_SOURCE -lpthread test-stackprotector-all.bin: - $(BUILD) -Werror -fstack-protector-all + $(BUILD) -fstack-protector-all test-fortify-source.bin: - $(BUILD) -O2 -Werror -D_FORTIFY_SOURCE=2 + $(BUILD) -O2 -D_FORTIFY_SOURCE=2 test-bionic.bin: $(BUILD) @@ -118,10 +121,10 @@ test-libbfd.bin: $(BUILD) -DPACKAGE='"perf"' -lbfd -lz -liberty -ldl test-liberty.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty test-liberty-z.bin: - $(CC) -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz + $(CC) -Wall -Werror -o $(OUTPUT)$@ test-libbfd.c -DPACKAGE='"perf"' -lbfd -ldl -liberty -lz test-cplus-demangle.bin: $(BUILD) -liberty @@ -133,10 +136,13 @@ test-timerfd.bin: $(BUILD) test-libdw-dwarf-unwind.bin: - $(BUILD) + $(BUILD) # -ldw provided by $(FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind) + +test-libbabeltrace.bin: + $(BUILD) # -lbabeltrace provided by $(FEATURE_CHECK_LDFLAGS-libbabeltrace) test-sync-compare-and-swap.bin: - $(BUILD) -Werror + $(BUILD) test-compile-32.bin: $(CC) -m32 -o $(OUTPUT)$@ test-compile.c @@ -147,9 +153,12 @@ test-compile-x32.bin: test-zlib.bin: $(BUILD) -lz +test-lzma.bin: + $(BUILD) -llzma + -include *.d ############################### clean: - rm -f $(FILES) *.d + rm -f $(FILES) *.d $(FILES:.bin=.make.output) diff --git a/tools/perf/config/feature-checks/test-all.c b/tools/build/feature/test-all.c index 6d4d09323922..84689a67814a 100644 --- a/tools/perf/config/feature-checks/test-all.c +++ b/tools/build/feature/test-all.c @@ -98,7 +98,23 @@ #undef main #define main main_test_pthread_attr_setaffinity_np -# include "test-pthread_attr_setaffinity_np.c" +# include "test-pthread-attr-setaffinity-np.c" +#undef main + +# if 0 +/* + * Disable libbabeltrace check for test-all, because the requested + * library version is not released yet in most distributions. Will + * reenable later. + */ + +#define main main_test_libbabeltrace +# include "test-libbabeltrace.c" +#undef main +#endif + +#define main main_test_lzma +# include "test-lzma.c" #undef main int main(int argc, char *argv[]) @@ -126,6 +142,7 @@ int main(int argc, char *argv[]) main_test_sync_compare_and_swap(argc, argv); main_test_zlib(); main_test_pthread_attr_setaffinity_np(); + main_test_lzma(); return 0; } diff --git a/tools/perf/config/feature-checks/test-backtrace.c b/tools/build/feature/test-backtrace.c index 7124aa1dc8fb..7124aa1dc8fb 100644 --- a/tools/perf/config/feature-checks/test-backtrace.c +++ b/tools/build/feature/test-backtrace.c diff --git a/tools/perf/config/feature-checks/test-bionic.c b/tools/build/feature/test-bionic.c index eac24e9513eb..eac24e9513eb 100644 --- a/tools/perf/config/feature-checks/test-bionic.c +++ b/tools/build/feature/test-bionic.c diff --git a/tools/perf/config/feature-checks/test-compile.c b/tools/build/feature/test-compile.c index 31dbf45bf99c..31dbf45bf99c 100644 --- a/tools/perf/config/feature-checks/test-compile.c +++ b/tools/build/feature/test-compile.c diff --git a/tools/perf/config/feature-checks/test-cplus-demangle.c b/tools/build/feature/test-cplus-demangle.c index 610c686e0009..610c686e0009 100644 --- a/tools/perf/config/feature-checks/test-cplus-demangle.c +++ b/tools/build/feature/test-cplus-demangle.c diff --git a/tools/perf/config/feature-checks/test-dwarf.c b/tools/build/feature/test-dwarf.c index 3fc1801ce4a9..3fc1801ce4a9 100644 --- a/tools/perf/config/feature-checks/test-dwarf.c +++ b/tools/build/feature/test-dwarf.c diff --git a/tools/perf/config/feature-checks/test-fortify-source.c b/tools/build/feature/test-fortify-source.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-fortify-source.c +++ b/tools/build/feature/test-fortify-source.c diff --git a/tools/perf/config/feature-checks/test-glibc.c b/tools/build/feature/test-glibc.c index b0820345cd98..b0820345cd98 100644 --- a/tools/perf/config/feature-checks/test-glibc.c +++ b/tools/build/feature/test-glibc.c diff --git a/tools/perf/config/feature-checks/test-gtk2-infobar.c b/tools/build/feature/test-gtk2-infobar.c index 397b4646d066..397b4646d066 100644 --- a/tools/perf/config/feature-checks/test-gtk2-infobar.c +++ b/tools/build/feature/test-gtk2-infobar.c diff --git a/tools/perf/config/feature-checks/test-gtk2.c b/tools/build/feature/test-gtk2.c index 6bd80e509439..6bd80e509439 100644 --- a/tools/perf/config/feature-checks/test-gtk2.c +++ b/tools/build/feature/test-gtk2.c diff --git a/tools/perf/config/feature-checks/test-hello.c b/tools/build/feature/test-hello.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-hello.c +++ b/tools/build/feature/test-hello.c diff --git a/tools/perf/config/feature-checks/test-libaudit.c b/tools/build/feature/test-libaudit.c index afc019f08641..afc019f08641 100644 --- a/tools/perf/config/feature-checks/test-libaudit.c +++ b/tools/build/feature/test-libaudit.c diff --git a/tools/build/feature/test-libbabeltrace.c b/tools/build/feature/test-libbabeltrace.c new file mode 100644 index 000000000000..9cf802a04885 --- /dev/null +++ b/tools/build/feature/test-libbabeltrace.c @@ -0,0 +1,9 @@ + +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-ir/stream-class.h> + +int main(void) +{ + bt_ctf_stream_class_get_packet_context_type((void *) 0); + return 0; +} diff --git a/tools/perf/config/feature-checks/test-libbfd.c b/tools/build/feature/test-libbfd.c index 24059907e990..24059907e990 100644 --- a/tools/perf/config/feature-checks/test-libbfd.c +++ b/tools/build/feature/test-libbfd.c diff --git a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c b/tools/build/feature/test-libdw-dwarf-unwind.c index f676a3ff442a..f676a3ff442a 100644 --- a/tools/perf/config/feature-checks/test-libdw-dwarf-unwind.c +++ b/tools/build/feature/test-libdw-dwarf-unwind.c diff --git a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c b/tools/build/feature/test-libelf-getphdrnum.c index d710459306c3..d710459306c3 100644 --- a/tools/perf/config/feature-checks/test-libelf-getphdrnum.c +++ b/tools/build/feature/test-libelf-getphdrnum.c diff --git a/tools/perf/config/feature-checks/test-libelf-mmap.c b/tools/build/feature/test-libelf-mmap.c index 564427d7ef18..564427d7ef18 100644 --- a/tools/perf/config/feature-checks/test-libelf-mmap.c +++ b/tools/build/feature/test-libelf-mmap.c diff --git a/tools/perf/config/feature-checks/test-libelf.c b/tools/build/feature/test-libelf.c index 08db322d8957..08db322d8957 100644 --- a/tools/perf/config/feature-checks/test-libelf.c +++ b/tools/build/feature/test-libelf.c diff --git a/tools/perf/config/feature-checks/test-libnuma.c b/tools/build/feature/test-libnuma.c index 4763d9cd587d..4763d9cd587d 100644 --- a/tools/perf/config/feature-checks/test-libnuma.c +++ b/tools/build/feature/test-libnuma.c diff --git a/tools/perf/config/feature-checks/test-libperl.c b/tools/build/feature/test-libperl.c index 8871f6a0fdb4..8871f6a0fdb4 100644 --- a/tools/perf/config/feature-checks/test-libperl.c +++ b/tools/build/feature/test-libperl.c diff --git a/tools/perf/config/feature-checks/test-libpython-version.c b/tools/build/feature/test-libpython-version.c index facea122d812..facea122d812 100644 --- a/tools/perf/config/feature-checks/test-libpython-version.c +++ b/tools/build/feature/test-libpython-version.c diff --git a/tools/perf/config/feature-checks/test-libpython.c b/tools/build/feature/test-libpython.c index b24b28ad6324..b24b28ad6324 100644 --- a/tools/perf/config/feature-checks/test-libpython.c +++ b/tools/build/feature/test-libpython.c diff --git a/tools/perf/config/feature-checks/test-libslang.c b/tools/build/feature/test-libslang.c index 22ff22ed94d1..22ff22ed94d1 100644 --- a/tools/perf/config/feature-checks/test-libslang.c +++ b/tools/build/feature/test-libslang.c diff --git a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c b/tools/build/feature/test-libunwind-debug-frame.c index 0ef8087a104a..0ef8087a104a 100644 --- a/tools/perf/config/feature-checks/test-libunwind-debug-frame.c +++ b/tools/build/feature/test-libunwind-debug-frame.c diff --git a/tools/perf/config/feature-checks/test-libunwind.c b/tools/build/feature/test-libunwind.c index 43b9369bcab7..43b9369bcab7 100644 --- a/tools/perf/config/feature-checks/test-libunwind.c +++ b/tools/build/feature/test-libunwind.c diff --git a/tools/build/feature/test-lzma.c b/tools/build/feature/test-lzma.c new file mode 100644 index 000000000000..95adc8ced3dd --- /dev/null +++ b/tools/build/feature/test-lzma.c @@ -0,0 +1,10 @@ +#include <lzma.h> + +int main(void) +{ + lzma_stream strm = LZMA_STREAM_INIT; + int ret; + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + return ret ? -1 : 0; +} diff --git a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c b/tools/build/feature/test-pthread-attr-setaffinity-np.c index 2b81b72eca23..fdada5e8d454 100644 --- a/tools/perf/config/feature-checks/test-pthread-attr-setaffinity-np.c +++ b/tools/build/feature/test-pthread-attr-setaffinity-np.c @@ -1,5 +1,6 @@ #include <stdint.h> #include <pthread.h> +#include <sched.h> int main(void) { @@ -8,7 +9,8 @@ int main(void) cpu_set_t cs; pthread_attr_init(&thread_attr); - /* don't care abt exact args, just the API itself in libpthread */ + CPU_ZERO(&cs); + ret = pthread_attr_setaffinity_np(&thread_attr, sizeof(cs), &cs); return ret; diff --git a/tools/perf/config/feature-checks/test-stackprotector-all.c b/tools/build/feature/test-stackprotector-all.c index c9f398d87868..c9f398d87868 100644 --- a/tools/perf/config/feature-checks/test-stackprotector-all.c +++ b/tools/build/feature/test-stackprotector-all.c diff --git a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c b/tools/build/feature/test-sync-compare-and-swap.c index c34d4ca4af56..c34d4ca4af56 100644 --- a/tools/perf/config/feature-checks/test-sync-compare-and-swap.c +++ b/tools/build/feature/test-sync-compare-and-swap.c diff --git a/tools/perf/config/feature-checks/test-timerfd.c b/tools/build/feature/test-timerfd.c index 8c5c083b4d3c..8c5c083b4d3c 100644 --- a/tools/perf/config/feature-checks/test-timerfd.c +++ b/tools/build/feature/test-timerfd.c diff --git a/tools/perf/config/feature-checks/test-zlib.c b/tools/build/feature/test-zlib.c index e111fff6240e..e111fff6240e 100644 --- a/tools/perf/config/feature-checks/test-zlib.c +++ b/tools/build/feature/test-zlib.c diff --git a/tools/build/tests/ex/Build b/tools/build/tests/ex/Build new file mode 100644 index 000000000000..70d876237c57 --- /dev/null +++ b/tools/build/tests/ex/Build @@ -0,0 +1,9 @@ +ex-y += ex.o +ex-y += a.o +ex-y += b.o +ex-y += empty/ +ex-y += empty2/ + +libex-y += c.o +libex-y += d.o +libex-y += arch/ diff --git a/tools/build/tests/ex/Makefile b/tools/build/tests/ex/Makefile new file mode 100644 index 000000000000..52d2476073a3 --- /dev/null +++ b/tools/build/tests/ex/Makefile @@ -0,0 +1,23 @@ +export srctree := ../../../.. +export CC := gcc +export LD := ld +export AR := ar + +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +ex: ex-in.o libex-in.o + gcc -o $@ $^ + +ex.%: FORCE + make -f $(srctree)/tools/build/Makefile.build dir=. $@ + +ex-in.o: FORCE + make $(build)=ex + +libex-in.o: FORCE + make $(build)=libex + +clean: + find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + rm -f ex ex.i ex.s + +.PHONY: FORCE diff --git a/tools/build/tests/ex/a.c b/tools/build/tests/ex/a.c new file mode 100644 index 000000000000..851762798c83 --- /dev/null +++ b/tools/build/tests/ex/a.c @@ -0,0 +1,5 @@ + +int a(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/Build b/tools/build/tests/ex/arch/Build new file mode 100644 index 000000000000..55506189efae --- /dev/null +++ b/tools/build/tests/ex/arch/Build @@ -0,0 +1,2 @@ +libex-y += e.o +libex-y += f.o diff --git a/tools/build/tests/ex/arch/e.c b/tools/build/tests/ex/arch/e.c new file mode 100644 index 000000000000..beaa4a1d7ba8 --- /dev/null +++ b/tools/build/tests/ex/arch/e.c @@ -0,0 +1,5 @@ + +int e(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/arch/f.c b/tools/build/tests/ex/arch/f.c new file mode 100644 index 000000000000..7c3e9e9da5b7 --- /dev/null +++ b/tools/build/tests/ex/arch/f.c @@ -0,0 +1,5 @@ + +int f(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/b.c b/tools/build/tests/ex/b.c new file mode 100644 index 000000000000..c24ff9ca9a97 --- /dev/null +++ b/tools/build/tests/ex/b.c @@ -0,0 +1,5 @@ + +int b(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/c.c b/tools/build/tests/ex/c.c new file mode 100644 index 000000000000..e216d0217499 --- /dev/null +++ b/tools/build/tests/ex/c.c @@ -0,0 +1,5 @@ + +int c(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/d.c b/tools/build/tests/ex/d.c new file mode 100644 index 000000000000..80dc0f06151b --- /dev/null +++ b/tools/build/tests/ex/d.c @@ -0,0 +1,5 @@ + +int d(void) +{ + return 0; +} diff --git a/tools/build/tests/ex/empty/Build b/tools/build/tests/ex/empty/Build new file mode 100644 index 000000000000..e69de29bb2d1 --- /dev/null +++ b/tools/build/tests/ex/empty/Build diff --git a/tools/build/tests/ex/empty2/README b/tools/build/tests/ex/empty2/README new file mode 100644 index 000000000000..2107cc5bf5a9 --- /dev/null +++ b/tools/build/tests/ex/empty2/README @@ -0,0 +1,2 @@ +This directory is left intentionally without Build file +to test proper nesting into Build-less directories. diff --git a/tools/build/tests/ex/ex.c b/tools/build/tests/ex/ex.c new file mode 100644 index 000000000000..dc42eb2e1a67 --- /dev/null +++ b/tools/build/tests/ex/ex.c @@ -0,0 +1,19 @@ + +int a(void); +int b(void); +int c(void); +int d(void); +int e(void); +int f(void); + +int main(void) +{ + a(); + b(); + c(); + d(); + e(); + f(); + + return 0; +} diff --git a/tools/build/tests/run.sh b/tools/build/tests/run.sh new file mode 100755 index 000000000000..5494f8ea7567 --- /dev/null +++ b/tools/build/tests/run.sh @@ -0,0 +1,42 @@ +#!/bin/sh + +function test_ex { + make -C ex V=1 clean > ex.out 2>&1 + make -C ex V=1 >> ex.out 2>&1 + + if [ ! -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} + +function test_ex_suffix { + make -C ex V=1 clean > ex.out 2>&1 + + # use -rR to disable make's builtin rules + make -rR -C ex V=1 ex.o >> ex.out 2>&1 + make -rR -C ex V=1 ex.i >> ex.out 2>&1 + make -rR -C ex V=1 ex.s >> ex.out 2>&1 + + if [ -x ./ex/ex ]; then + echo FAILED + exit -1 + fi + + if [ ! -f ./ex/ex.o -o ! -f ./ex/ex.i -o ! -f ./ex/ex.s ]; then + echo FAILED + exit -1 + fi + + make -C ex V=1 clean > /dev/null 2>&1 + rm -f ex.out +} +echo -n Testing.. + +test_ex +test_ex_suffix + +echo OK diff --git a/tools/hv/Makefile b/tools/hv/Makefile index 99ffe61051a7..a8ab79556926 100644 --- a/tools/hv/Makefile +++ b/tools/hv/Makefile @@ -3,7 +3,7 @@ CC = $(CROSS_COMPILE)gcc PTHREAD_LIBS = -lpthread WARNINGS = -Wall -Wextra -CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) +CFLAGS = $(WARNINGS) -g $(PTHREAD_LIBS) $(shell getconf LFS_CFLAGS) all: hv_kvp_daemon hv_vss_daemon hv_fcopy_daemon %: %.c diff --git a/tools/hv/hv_fcopy_daemon.c b/tools/hv/hv_fcopy_daemon.c index 9445d8f264a4..5480e4e424eb 100644 --- a/tools/hv/hv_fcopy_daemon.c +++ b/tools/hv/hv_fcopy_daemon.c @@ -137,6 +137,8 @@ int main(int argc, char *argv[]) int version = FCOPY_CURRENT_VERSION; char *buffer[4096 * 2]; struct hv_fcopy_hdr *in_msg; + int in_handshake = 1; + __u32 kernel_modver; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -191,6 +193,19 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "pread failed: %s", strerror(errno)); exit(EXIT_FAILURE); } + + if (in_handshake) { + if (len != sizeof(kernel_modver)) { + syslog(LOG_ERR, "invalid version negotiation"); + exit(EXIT_FAILURE); + } + kernel_modver = *(__u32 *)buffer; + in_handshake = 0; + syslog(LOG_INFO, "HV_FCOPY: kernel module version: %d", + kernel_modver); + continue; + } + in_msg = (struct hv_fcopy_hdr *)buffer; switch (in_msg->operation) { diff --git a/tools/hv/hv_kvp_daemon.c b/tools/hv/hv_kvp_daemon.c index 408bb076a234..0d9f48ec42bb 100644 --- a/tools/hv/hv_kvp_daemon.c +++ b/tools/hv/hv_kvp_daemon.c @@ -33,7 +33,6 @@ #include <ctype.h> #include <errno.h> #include <arpa/inet.h> -#include <linux/connector.h> #include <linux/hyperv.h> #include <linux/netlink.h> #include <ifaddrs.h> @@ -79,7 +78,6 @@ enum { DNS }; -static struct sockaddr_nl addr; static int in_hand_shake = 1; static char *os_name = ""; @@ -1387,34 +1385,6 @@ kvp_get_domain_name(char *buffer, int length) freeaddrinfo(info); } -static int -netlink_send(int fd, struct cn_msg *msg) -{ - struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE }; - unsigned int size; - struct msghdr message; - struct iovec iov[2]; - - size = sizeof(struct cn_msg) + msg->len; - - nlh.nlmsg_pid = getpid(); - nlh.nlmsg_len = NLMSG_LENGTH(size); - - iov[0].iov_base = &nlh; - iov[0].iov_len = sizeof(nlh); - - iov[1].iov_base = msg; - iov[1].iov_len = size; - - memset(&message, 0, sizeof(message)); - message.msg_name = &addr; - message.msg_namelen = sizeof(addr); - message.msg_iov = iov; - message.msg_iovlen = 2; - - return sendmsg(fd, &message, 0); -} - void print_usage(char *argv[]) { fprintf(stderr, "Usage: %s [options]\n" @@ -1425,22 +1395,17 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fd, len, nl_group; + int kvp_fd, len; int error; - struct cn_msg *message; struct pollfd pfd; - struct nlmsghdr *incoming_msg; - struct cn_msg *incoming_cn_msg; - struct hv_kvp_msg *hv_msg; - char *p; + char *p; + struct hv_kvp_msg hv_msg[1]; char *key_value; char *key_name; int op; int pool; char *if_name; struct hv_kvp_ipaddr_value *kvp_ip_val; - char *kvp_recv_buffer; - size_t kvp_recv_buffer_len; int daemonize = 1, long_index = 0, opt; static struct option long_options[] = { @@ -1468,12 +1433,14 @@ int main(int argc, char *argv[]) openlog("KVP", 0, LOG_USER); syslog(LOG_INFO, "KVP starting; pid is:%d", getpid()); - kvp_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_kvp_msg); - kvp_recv_buffer = calloc(1, kvp_recv_buffer_len); - if (!kvp_recv_buffer) { - syslog(LOG_ERR, "Failed to allocate netlink buffer"); + kvp_fd = open("/dev/vmbus/hv_kvp", O_RDWR); + + if (kvp_fd < 0) { + syslog(LOG_ERR, "open /dev/vmbus/hv_kvp failed; error: %d %s", + errno, strerror(errno)); exit(EXIT_FAILURE); } + /* * Retrieve OS release information. */ @@ -1489,100 +1456,44 @@ int main(int argc, char *argv[]) exit(EXIT_FAILURE); } - fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); - if (fd < 0) { - syslog(LOG_ERR, "netlink socket creation failed; error: %d %s", errno, - strerror(errno)); - exit(EXIT_FAILURE); - } - addr.nl_family = AF_NETLINK; - addr.nl_pad = 0; - addr.nl_pid = 0; - addr.nl_groups = 0; - - - error = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); - if (error < 0) { - syslog(LOG_ERR, "bind failed; error: %d %s", errno, strerror(errno)); - close(fd); - exit(EXIT_FAILURE); - } - nl_group = CN_KVP_IDX; - - if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) { - syslog(LOG_ERR, "setsockopt failed; error: %d %s", errno, strerror(errno)); - close(fd); - exit(EXIT_FAILURE); - } - /* * Register ourselves with the kernel. */ - message = (struct cn_msg *)kvp_recv_buffer; - message->id.idx = CN_KVP_IDX; - message->id.val = CN_KVP_VAL; - - hv_msg = (struct hv_kvp_msg *)message->data; hv_msg->kvp_hdr.operation = KVP_OP_REGISTER1; - message->ack = 0; - message->len = sizeof(struct hv_kvp_msg); - - len = netlink_send(fd, message); - if (len < 0) { - syslog(LOG_ERR, "netlink_send failed; error: %d %s", errno, strerror(errno)); - close(fd); + len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg)); + if (len != sizeof(struct hv_kvp_msg)) { + syslog(LOG_ERR, "registration to kernel failed; error: %d %s", + errno, strerror(errno)); + close(kvp_fd); exit(EXIT_FAILURE); } - pfd.fd = fd; + pfd.fd = kvp_fd; while (1) { - struct sockaddr *addr_p = (struct sockaddr *) &addr; - socklen_t addr_l = sizeof(addr); pfd.events = POLLIN; pfd.revents = 0; if (poll(&pfd, 1, -1) < 0) { syslog(LOG_ERR, "poll failed; error: %d %s", errno, strerror(errno)); if (errno == EINVAL) { - close(fd); + close(kvp_fd); exit(EXIT_FAILURE); } else continue; } - len = recvfrom(fd, kvp_recv_buffer, kvp_recv_buffer_len, 0, - addr_p, &addr_l); - - if (len < 0) { - int saved_errno = errno; - syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", - addr.nl_pid, errno, strerror(errno)); + len = read(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg)); - if (saved_errno == ENOBUFS) { - syslog(LOG_ERR, "receive error: ignored"); - continue; - } + if (len != sizeof(struct hv_kvp_msg)) { + syslog(LOG_ERR, "read failed; error:%d %s", + errno, strerror(errno)); - close(fd); - return -1; + close(kvp_fd); + return EXIT_FAILURE; } - if (addr.nl_pid) { - syslog(LOG_WARNING, "Received packet from untrusted pid:%u", - addr.nl_pid); - continue; - } - - incoming_msg = (struct nlmsghdr *)kvp_recv_buffer; - - if (incoming_msg->nlmsg_type != NLMSG_DONE) - continue; - - incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); - hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; - /* * We will use the KVP header information to pass back * the error from this daemon. So, first copy the state @@ -1603,7 +1514,7 @@ int main(int argc, char *argv[]) if (lic_version) { strcpy(lic_version, p); syslog(LOG_INFO, "KVP LIC Version: %s", - lic_version); + lic_version); } else { syslog(LOG_ERR, "malloc failed"); } @@ -1702,7 +1613,6 @@ int main(int argc, char *argv[]) goto kvp_done; } - hv_msg = (struct hv_kvp_msg *)incoming_cn_msg->data; key_name = (char *)hv_msg->body.kvp_enum_data.data.key; key_value = (char *)hv_msg->body.kvp_enum_data.data.value; @@ -1753,31 +1663,17 @@ int main(int argc, char *argv[]) hv_msg->error = HV_S_CONT; break; } - /* - * Send the value back to the kernel. The response is - * already in the receive buffer. Update the cn_msg header to - * reflect the key value that has been added to the message - */ -kvp_done: - - incoming_cn_msg->id.idx = CN_KVP_IDX; - incoming_cn_msg->id.val = CN_KVP_VAL; - incoming_cn_msg->ack = 0; - incoming_cn_msg->len = sizeof(struct hv_kvp_msg); - - len = netlink_send(fd, incoming_cn_msg); - if (len < 0) { - int saved_errno = errno; - syslog(LOG_ERR, "net_link send failed; error: %d %s", errno, - strerror(errno)); - - if (saved_errno == ENOMEM || saved_errno == ENOBUFS) { - syslog(LOG_ERR, "send error: ignored"); - continue; - } + /* Send the value back to the kernel. */ +kvp_done: + len = write(kvp_fd, hv_msg, sizeof(struct hv_kvp_msg)); + if (len != sizeof(struct hv_kvp_msg)) { + syslog(LOG_ERR, "write failed; error: %d %s", errno, + strerror(errno)); exit(EXIT_FAILURE); } } + close(kvp_fd); + exit(0); } diff --git a/tools/hv/hv_vss_daemon.c b/tools/hv/hv_vss_daemon.c index 5e63f70bd956..96234b638249 100644 --- a/tools/hv/hv_vss_daemon.c +++ b/tools/hv/hv_vss_daemon.c @@ -19,7 +19,6 @@ #include <sys/types.h> -#include <sys/socket.h> #include <sys/poll.h> #include <sys/ioctl.h> #include <fcntl.h> @@ -30,21 +29,11 @@ #include <string.h> #include <ctype.h> #include <errno.h> -#include <arpa/inet.h> #include <linux/fs.h> -#include <linux/connector.h> #include <linux/hyperv.h> -#include <linux/netlink.h> #include <syslog.h> #include <getopt.h> -static struct sockaddr_nl addr; - -#ifndef SOL_NETLINK -#define SOL_NETLINK 270 -#endif - - /* Don't use syslog() in the function since that can cause write to disk */ static int vss_do_freeze(char *dir, unsigned int cmd) { @@ -81,6 +70,7 @@ static int vss_operate(int operation) char match[] = "/dev/"; FILE *mounts; struct mntent *ent; + char errdir[1024] = {0}; unsigned int cmd; int error = 0, root_seen = 0, save_errno = 0; @@ -115,6 +105,8 @@ static int vss_operate(int operation) goto err; } + endmntent(mounts); + if (root_seen) { error |= vss_do_freeze("/", cmd); if (error && operation == VSS_OP_FREEZE) @@ -124,46 +116,22 @@ static int vss_operate(int operation) goto out; err: save_errno = errno; + if (ent) { + strncpy(errdir, ent->mnt_dir, sizeof(errdir)-1); + endmntent(mounts); + } vss_operate(VSS_OP_THAW); /* Call syslog after we thaw all filesystems */ if (ent) syslog(LOG_ERR, "FREEZE of %s failed; error:%d %s", - ent->mnt_dir, save_errno, strerror(save_errno)); + errdir, save_errno, strerror(save_errno)); else syslog(LOG_ERR, "FREEZE of / failed; error:%d %s", save_errno, strerror(save_errno)); out: - endmntent(mounts); return error; } -static int netlink_send(int fd, struct cn_msg *msg) -{ - struct nlmsghdr nlh = { .nlmsg_type = NLMSG_DONE }; - unsigned int size; - struct msghdr message; - struct iovec iov[2]; - - size = sizeof(struct cn_msg) + msg->len; - - nlh.nlmsg_pid = getpid(); - nlh.nlmsg_len = NLMSG_LENGTH(size); - - iov[0].iov_base = &nlh; - iov[0].iov_len = sizeof(nlh); - - iov[1].iov_base = msg; - iov[1].iov_len = size; - - memset(&message, 0, sizeof(message)); - message.msg_name = &addr; - message.msg_namelen = sizeof(addr); - message.msg_iov = iov; - message.msg_iovlen = 2; - - return sendmsg(fd, &message, 0); -} - void print_usage(char *argv[]) { fprintf(stderr, "Usage: %s [options]\n" @@ -174,17 +142,14 @@ void print_usage(char *argv[]) int main(int argc, char *argv[]) { - int fd, len, nl_group; + int vss_fd, len; int error; - struct cn_msg *message; struct pollfd pfd; - struct nlmsghdr *incoming_msg; - struct cn_msg *incoming_cn_msg; int op; - struct hv_vss_msg *vss_msg; - char *vss_recv_buffer; - size_t vss_recv_buffer_len; + struct hv_vss_msg vss_msg[1]; int daemonize = 1, long_index = 0, opt; + int in_handshake = 1; + __u32 kernel_modver; static struct option long_options[] = { {"help", no_argument, 0, 'h' }, @@ -211,98 +176,62 @@ int main(int argc, char *argv[]) openlog("Hyper-V VSS", 0, LOG_USER); syslog(LOG_INFO, "VSS starting; pid is:%d", getpid()); - vss_recv_buffer_len = NLMSG_LENGTH(0) + sizeof(struct cn_msg) + sizeof(struct hv_vss_msg); - vss_recv_buffer = calloc(1, vss_recv_buffer_len); - if (!vss_recv_buffer) { - syslog(LOG_ERR, "Failed to allocate netlink buffers"); - exit(EXIT_FAILURE); - } - - fd = socket(AF_NETLINK, SOCK_DGRAM, NETLINK_CONNECTOR); - if (fd < 0) { - syslog(LOG_ERR, "netlink socket creation failed; error:%d %s", - errno, strerror(errno)); - exit(EXIT_FAILURE); - } - addr.nl_family = AF_NETLINK; - addr.nl_pad = 0; - addr.nl_pid = 0; - addr.nl_groups = 0; - - - error = bind(fd, (struct sockaddr *)&addr, sizeof(addr)); - if (error < 0) { - syslog(LOG_ERR, "bind failed; error:%d %s", errno, strerror(errno)); - close(fd); - exit(EXIT_FAILURE); - } - nl_group = CN_VSS_IDX; - if (setsockopt(fd, SOL_NETLINK, NETLINK_ADD_MEMBERSHIP, &nl_group, sizeof(nl_group)) < 0) { - syslog(LOG_ERR, "setsockopt failed; error:%d %s", errno, strerror(errno)); - close(fd); + vss_fd = open("/dev/vmbus/hv_vss", O_RDWR); + if (vss_fd < 0) { + syslog(LOG_ERR, "open /dev/vmbus/hv_vss failed; error: %d %s", + errno, strerror(errno)); exit(EXIT_FAILURE); } /* * Register ourselves with the kernel. */ - message = (struct cn_msg *)vss_recv_buffer; - message->id.idx = CN_VSS_IDX; - message->id.val = CN_VSS_VAL; - message->ack = 0; - vss_msg = (struct hv_vss_msg *)message->data; - vss_msg->vss_hdr.operation = VSS_OP_REGISTER; - - message->len = sizeof(struct hv_vss_msg); + vss_msg->vss_hdr.operation = VSS_OP_REGISTER1; - len = netlink_send(fd, message); + len = write(vss_fd, vss_msg, sizeof(struct hv_vss_msg)); if (len < 0) { - syslog(LOG_ERR, "netlink_send failed; error:%d %s", errno, strerror(errno)); - close(fd); + syslog(LOG_ERR, "registration to kernel failed; error: %d %s", + errno, strerror(errno)); + close(vss_fd); exit(EXIT_FAILURE); } - pfd.fd = fd; + pfd.fd = vss_fd; while (1) { - struct sockaddr *addr_p = (struct sockaddr *) &addr; - socklen_t addr_l = sizeof(addr); pfd.events = POLLIN; pfd.revents = 0; if (poll(&pfd, 1, -1) < 0) { syslog(LOG_ERR, "poll failed; error:%d %s", errno, strerror(errno)); if (errno == EINVAL) { - close(fd); + close(vss_fd); exit(EXIT_FAILURE); } else continue; } - len = recvfrom(fd, vss_recv_buffer, vss_recv_buffer_len, 0, - addr_p, &addr_l); + len = read(vss_fd, vss_msg, sizeof(struct hv_vss_msg)); - if (len < 0) { - syslog(LOG_ERR, "recvfrom failed; pid:%u error:%d %s", - addr.nl_pid, errno, strerror(errno)); - close(fd); - return -1; - } - - if (addr.nl_pid) { - syslog(LOG_WARNING, - "Received packet from untrusted pid:%u", - addr.nl_pid); + if (in_handshake) { + if (len != sizeof(kernel_modver)) { + syslog(LOG_ERR, "invalid version negotiation"); + exit(EXIT_FAILURE); + } + kernel_modver = *(__u32 *)vss_msg; + in_handshake = 0; + syslog(LOG_INFO, "VSS: kernel module version: %d", + kernel_modver); continue; } - incoming_msg = (struct nlmsghdr *)vss_recv_buffer; - - if (incoming_msg->nlmsg_type != NLMSG_DONE) - continue; + if (len != sizeof(struct hv_vss_msg)) { + syslog(LOG_ERR, "read failed; error:%d %s", + errno, strerror(errno)); + close(vss_fd); + return EXIT_FAILURE; + } - incoming_cn_msg = (struct cn_msg *)NLMSG_DATA(incoming_msg); - vss_msg = (struct hv_vss_msg *)incoming_cn_msg->data; op = vss_msg->vss_hdr.operation; error = HV_S_OK; @@ -325,12 +254,14 @@ int main(int argc, char *argv[]) syslog(LOG_ERR, "Illegal op:%d\n", op); } vss_msg->error = error; - len = netlink_send(fd, incoming_cn_msg); - if (len < 0) { - syslog(LOG_ERR, "net_link send failed; error:%d %s", - errno, strerror(errno)); + len = write(vss_fd, &error, sizeof(struct hv_vss_msg)); + if (len != sizeof(struct hv_vss_msg)) { + syslog(LOG_ERR, "write failed; error: %d %s", errno, + strerror(errno)); exit(EXIT_FAILURE); } } + close(vss_fd); + exit(0); } diff --git a/tools/iio/Makefile b/tools/iio/Makefile new file mode 100644 index 000000000000..3a7a54f59713 --- /dev/null +++ b/tools/iio/Makefile @@ -0,0 +1,16 @@ +CC = $(CROSS_COMPILE)gcc +CFLAGS += -Wall -g -D_GNU_SOURCE + +all: iio_event_monitor lsiio generic_buffer + +iio_event_monitor: iio_event_monitor.o iio_utils.o + +lsiio: lsiio.o iio_utils.o + +generic_buffer: generic_buffer.o iio_utils.o + +%.o: %.c iio_utils.h + +.PHONY: clean +clean: + rm -f *.o iio_event_monitor lsiio generic_buffer diff --git a/tools/iio/generic_buffer.c b/tools/iio/generic_buffer.c new file mode 100644 index 000000000000..4eebb6616e5c --- /dev/null +++ b/tools/iio/generic_buffer.c @@ -0,0 +1,431 @@ +/* Industrialio buffer test code. + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is primarily intended as an example application. + * Reads the current buffer setup from sysfs and starts a short capture + * from the specified device, pretty printing the result after appropriate + * conversion. + * + * Command line parameters + * generic_buffer -n <device_name> -t <trigger_name> + * If trigger name is not specified the program assumes you want a dataready + * trigger associated with the device and goes looking for it. + * + */ + +#include <unistd.h> +#include <stdlib.h> +#include <dirent.h> +#include <fcntl.h> +#include <stdio.h> +#include <errno.h> +#include <sys/stat.h> +#include <sys/dir.h> +#include <linux/types.h> +#include <string.h> +#include <poll.h> +#include <endian.h> +#include <getopt.h> +#include <inttypes.h> +#include "iio_utils.h" + +/** + * size_from_channelarray() - calculate the storage size of a scan + * @channels: the channel info array + * @num_channels: number of channels + * + * Has the side effect of filling the channels[i].location values used + * in processing the buffer output. + **/ +int size_from_channelarray(struct iio_channel_info *channels, int num_channels) +{ + int bytes = 0; + int i = 0; + + while (i < num_channels) { + if (bytes % channels[i].bytes == 0) + channels[i].location = bytes; + else + channels[i].location = bytes - bytes%channels[i].bytes + + channels[i].bytes; + bytes = channels[i].location + channels[i].bytes; + i++; + } + return bytes; +} + +void print2byte(uint16_t input, struct iio_channel_info *info) +{ + /* First swap if incorrect endian */ + if (info->be) + input = be16toh(input); + else + input = le16toh(input); + + /* + * Shift before conversion to avoid sign extension + * of left aligned data + */ + input >>= info->shift; + input &= info->mask; + if (info->is_signed) { + int16_t val = (int16_t)(input << (16 - info->bits_used)) >> + (16 - info->bits_used); + printf("%05f ", ((float)val + info->offset) * info->scale); + } else { + printf("%05f ", ((float)input + info->offset) * info->scale); + } +} + +void print4byte(uint32_t input, struct iio_channel_info *info) +{ + /* First swap if incorrect endian */ + if (info->be) + input = be32toh(input); + else + input = le32toh(input); + + /* + * Shift before conversion to avoid sign extension + * of left aligned data + */ + input >>= info->shift; + input &= info->mask; + if (info->is_signed) { + int32_t val = (int32_t)(input << (32 - info->bits_used)) >> + (32 - info->bits_used); + printf("%05f ", ((float)val + info->offset) * info->scale); + } else { + printf("%05f ", ((float)input + info->offset) * info->scale); + } +} + +void print8byte(uint64_t input, struct iio_channel_info *info) +{ + /* First swap if incorrect endian */ + if (info->be) + input = be64toh(input); + else + input = le64toh(input); + + /* + * Shift before conversion to avoid sign extension + * of left aligned data + */ + input >>= info->shift; + input &= info->mask; + if (info->is_signed) { + int64_t val = (int64_t)(input << (64 - info->bits_used)) >> + (64 - info->bits_used); + /* special case for timestamp */ + if (info->scale == 1.0f && info->offset == 0.0f) + printf("%" PRId64 " ", val); + else + printf("%05f ", + ((float)val + info->offset) * info->scale); + } else { + printf("%05f ", ((float)input + info->offset) * info->scale); + } +} + +/** + * process_scan() - print out the values in SI units + * @data: pointer to the start of the scan + * @channels: information about the channels. Note + * size_from_channelarray must have been called first to fill the + * location offsets. + * @num_channels: number of channels + **/ +void process_scan(char *data, + struct iio_channel_info *channels, + int num_channels) +{ + int k; + + for (k = 0; k < num_channels; k++) + switch (channels[k].bytes) { + /* only a few cases implemented so far */ + case 2: + print2byte(*(uint16_t *)(data + channels[k].location), + &channels[k]); + break; + case 4: + print4byte(*(uint32_t *)(data + channels[k].location), + &channels[k]); + break; + case 8: + print8byte(*(uint64_t *)(data + channels[k].location), + &channels[k]); + break; + default: + break; + } + printf("\n"); +} + +void print_usage(void) +{ + printf("Usage: generic_buffer [options]...\n" + "Capture, convert and output data from IIO device buffer\n" + " -c <n> Do n conversions\n" + " -e Disable wait for event (new data)\n" + " -g Use trigger-less mode\n" + " -l <n> Set buffer length to n samples\n" + " -n <name> Set device name (mandatory)\n" + " -t <name> Set trigger name\n" + " -w <n> Set delay between reads in us (event-less mode)\n"); +} + +int main(int argc, char **argv) +{ + unsigned long num_loops = 2; + unsigned long timedelay = 1000000; + unsigned long buf_len = 128; + + int ret, c, i, j, toread; + int fp; + + int num_channels; + char *trigger_name = NULL, *device_name = NULL; + char *dev_dir_name, *buf_dir_name; + + int datardytrigger = 1; + char *data; + ssize_t read_size; + int dev_num, trig_num; + char *buffer_access; + int scan_size; + int noevents = 0; + int notrigger = 0; + char *dummy; + + struct iio_channel_info *channels; + + while ((c = getopt(argc, argv, "c:egl:n:t:w:")) != -1) { + switch (c) { + case 'c': + errno = 0; + num_loops = strtoul(optarg, &dummy, 10); + if (errno) + return -errno; + break; + case 'e': + noevents = 1; + break; + case 'g': + notrigger = 1; + break; + case 'l': + errno = 0; + buf_len = strtoul(optarg, &dummy, 10); + if (errno) + return -errno; + break; + case 'n': + device_name = optarg; + break; + case 't': + trigger_name = optarg; + datardytrigger = 0; + break; + case 'w': + errno = 0; + timedelay = strtoul(optarg, &dummy, 10); + if (errno) + return -errno; + break; + case '?': + print_usage(); + return -1; + } + } + + if (device_name == NULL) { + printf("Device name not set\n"); + print_usage(); + return -1; + } + + /* Find the device requested */ + dev_num = find_type_by_name(device_name, "iio:device"); + if (dev_num < 0) { + printf("Failed to find the %s\n", device_name); + return dev_num; + } + printf("iio device number being used is %d\n", dev_num); + + ret = asprintf(&dev_dir_name, "%siio:device%d", iio_dir, dev_num); + if (ret < 0) + return -ENOMEM; + + if (!notrigger) { + if (trigger_name == NULL) { + /* + * Build the trigger name. If it is device associated + * its name is <device_name>_dev[n] where n matches + * the device number found above. + */ + ret = asprintf(&trigger_name, + "%s-dev%d", device_name, dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_dev_dir_name; + } + } + + /* Verify the trigger exists */ + trig_num = find_type_by_name(trigger_name, "trigger"); + if (trig_num < 0) { + printf("Failed to find the trigger %s\n", trigger_name); + ret = trig_num; + goto error_free_triggername; + } + printf("iio trigger number being used is %d\n", trig_num); + } else + printf("trigger-less mode selected\n"); + + /* + * Parse the files in scan_elements to identify what channels are + * present + */ + ret = build_channel_array(dev_dir_name, &channels, &num_channels); + if (ret) { + printf("Problem reading scan element information\n"); + printf("diag %s\n", dev_dir_name); + goto error_free_triggername; + } + + /* + * Construct the directory name for the associated buffer. + * As we know that the lis3l02dq has only one buffer this may + * be built rather than found. + */ + ret = asprintf(&buf_dir_name, + "%siio:device%d/buffer", iio_dir, dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_channels; + } + + if (!notrigger) { + printf("%s %s\n", dev_dir_name, trigger_name); + /* Set the device trigger to be the data ready trigger found + * above */ + ret = write_sysfs_string_and_verify("trigger/current_trigger", + dev_dir_name, + trigger_name); + if (ret < 0) { + printf("Failed to write current_trigger file\n"); + goto error_free_buf_dir_name; + } + } + + /* Setup ring buffer parameters */ + ret = write_sysfs_int("length", buf_dir_name, buf_len); + if (ret < 0) + goto error_free_buf_dir_name; + + /* Enable the buffer */ + ret = write_sysfs_int("enable", buf_dir_name, 1); + if (ret < 0) + goto error_free_buf_dir_name; + scan_size = size_from_channelarray(channels, num_channels); + data = malloc(scan_size*buf_len); + if (!data) { + ret = -ENOMEM; + goto error_free_buf_dir_name; + } + + ret = asprintf(&buffer_access, "/dev/iio:device%d", dev_num); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_data; + } + + /* Attempt to open non blocking the access dev */ + fp = open(buffer_access, O_RDONLY | O_NONBLOCK); + if (fp == -1) { /* If it isn't there make the node */ + ret = -errno; + printf("Failed to open %s\n", buffer_access); + goto error_free_buffer_access; + } + + /* Wait for events 10 times */ + for (j = 0; j < num_loops; j++) { + if (!noevents) { + struct pollfd pfd = { + .fd = fp, + .events = POLLIN, + }; + + ret = poll(&pfd, 1, -1); + if (ret < 0) { + ret = -errno; + goto error_close_buffer_access; + } else if (ret == 0) { + continue; + } + + toread = buf_len; + + } else { + usleep(timedelay); + toread = 64; + } + + read_size = read(fp, + data, + toread*scan_size); + if (read_size < 0) { + if (errno == EAGAIN) { + printf("nothing available\n"); + continue; + } else + break; + } + for (i = 0; i < read_size/scan_size; i++) + process_scan(data + scan_size*i, + channels, + num_channels); + } + + /* Stop the buffer */ + ret = write_sysfs_int("enable", buf_dir_name, 0); + if (ret < 0) + goto error_close_buffer_access; + + if (!notrigger) + /* Disconnect the trigger - just write a dummy name. */ + ret = write_sysfs_string("trigger/current_trigger", + dev_dir_name, "NULL"); + if (ret < 0) + printf("Failed to write to %s\n", dev_dir_name); + +error_close_buffer_access: + if (close(fp) == -1) + perror("Failed to close buffer"); +error_free_buffer_access: + free(buffer_access); +error_free_data: + free(data); +error_free_buf_dir_name: + free(buf_dir_name); +error_free_channels: + for (i = num_channels - 1; i >= 0; i--) { + free(channels[i].name); + free(channels[i].generic_name); + } + free(channels); +error_free_triggername: + if (datardytrigger) + free(trigger_name); +error_free_dev_dir_name: + free(dev_dir_name); + + return ret; +} diff --git a/tools/iio/iio_event_monitor.c b/tools/iio/iio_event_monitor.c new file mode 100644 index 000000000000..016760e769c0 --- /dev/null +++ b/tools/iio/iio_event_monitor.c @@ -0,0 +1,312 @@ +/* Industrialio event test code. + * + * Copyright (c) 2011-2012 Lars-Peter Clausen <lars@metafoo.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + * + * This program is primarily intended as an example application. + * Reads the current buffer setup from sysfs and starts a short capture + * from the specified device, pretty printing the result after appropriate + * conversion. + * + * Usage: + * iio_event_monitor <device_name> + * + */ + +#include <unistd.h> +#include <stdlib.h> +#include <stdbool.h> +#include <stdio.h> +#include <errno.h> +#include <string.h> +#include <poll.h> +#include <fcntl.h> +#include <sys/ioctl.h> +#include "iio_utils.h" +#include <linux/iio/events.h> +#include <linux/iio/types.h> + +static const char * const iio_chan_type_name_spec[] = { + [IIO_VOLTAGE] = "voltage", + [IIO_CURRENT] = "current", + [IIO_POWER] = "power", + [IIO_ACCEL] = "accel", + [IIO_ANGL_VEL] = "anglvel", + [IIO_MAGN] = "magn", + [IIO_LIGHT] = "illuminance", + [IIO_INTENSITY] = "intensity", + [IIO_PROXIMITY] = "proximity", + [IIO_TEMP] = "temp", + [IIO_INCLI] = "incli", + [IIO_ROT] = "rot", + [IIO_ANGL] = "angl", + [IIO_TIMESTAMP] = "timestamp", + [IIO_CAPACITANCE] = "capacitance", + [IIO_ALTVOLTAGE] = "altvoltage", + [IIO_CCT] = "cct", + [IIO_PRESSURE] = "pressure", + [IIO_HUMIDITYRELATIVE] = "humidityrelative", + [IIO_ACTIVITY] = "activity", + [IIO_STEPS] = "steps", +}; + +static const char * const iio_ev_type_text[] = { + [IIO_EV_TYPE_THRESH] = "thresh", + [IIO_EV_TYPE_MAG] = "mag", + [IIO_EV_TYPE_ROC] = "roc", + [IIO_EV_TYPE_THRESH_ADAPTIVE] = "thresh_adaptive", + [IIO_EV_TYPE_MAG_ADAPTIVE] = "mag_adaptive", + [IIO_EV_TYPE_CHANGE] = "change", +}; + +static const char * const iio_ev_dir_text[] = { + [IIO_EV_DIR_EITHER] = "either", + [IIO_EV_DIR_RISING] = "rising", + [IIO_EV_DIR_FALLING] = "falling" +}; + +static const char * const iio_modifier_names[] = { + [IIO_MOD_X] = "x", + [IIO_MOD_Y] = "y", + [IIO_MOD_Z] = "z", + [IIO_MOD_X_AND_Y] = "x&y", + [IIO_MOD_X_AND_Z] = "x&z", + [IIO_MOD_Y_AND_Z] = "y&z", + [IIO_MOD_X_AND_Y_AND_Z] = "x&y&z", + [IIO_MOD_X_OR_Y] = "x|y", + [IIO_MOD_X_OR_Z] = "x|z", + [IIO_MOD_Y_OR_Z] = "y|z", + [IIO_MOD_X_OR_Y_OR_Z] = "x|y|z", + [IIO_MOD_LIGHT_BOTH] = "both", + [IIO_MOD_LIGHT_IR] = "ir", + [IIO_MOD_ROOT_SUM_SQUARED_X_Y] = "sqrt(x^2+y^2)", + [IIO_MOD_SUM_SQUARED_X_Y_Z] = "x^2+y^2+z^2", + [IIO_MOD_LIGHT_CLEAR] = "clear", + [IIO_MOD_LIGHT_RED] = "red", + [IIO_MOD_LIGHT_GREEN] = "green", + [IIO_MOD_LIGHT_BLUE] = "blue", + [IIO_MOD_QUATERNION] = "quaternion", + [IIO_MOD_TEMP_AMBIENT] = "ambient", + [IIO_MOD_TEMP_OBJECT] = "object", + [IIO_MOD_NORTH_MAGN] = "from_north_magnetic", + [IIO_MOD_NORTH_TRUE] = "from_north_true", + [IIO_MOD_NORTH_MAGN_TILT_COMP] = "from_north_magnetic_tilt_comp", + [IIO_MOD_NORTH_TRUE_TILT_COMP] = "from_north_true_tilt_comp", + [IIO_MOD_RUNNING] = "running", + [IIO_MOD_JOGGING] = "jogging", + [IIO_MOD_WALKING] = "walking", + [IIO_MOD_STILL] = "still", +}; + +static bool event_is_known(struct iio_event_data *event) +{ + enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id); + enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id); + enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id); + enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id); + + switch (type) { + case IIO_VOLTAGE: + case IIO_CURRENT: + case IIO_POWER: + case IIO_ACCEL: + case IIO_ANGL_VEL: + case IIO_MAGN: + case IIO_LIGHT: + case IIO_INTENSITY: + case IIO_PROXIMITY: + case IIO_TEMP: + case IIO_INCLI: + case IIO_ROT: + case IIO_ANGL: + case IIO_TIMESTAMP: + case IIO_CAPACITANCE: + case IIO_ALTVOLTAGE: + case IIO_CCT: + case IIO_PRESSURE: + case IIO_HUMIDITYRELATIVE: + case IIO_ACTIVITY: + case IIO_STEPS: + break; + default: + return false; + } + + switch (mod) { + case IIO_NO_MOD: + case IIO_MOD_X: + case IIO_MOD_Y: + case IIO_MOD_Z: + case IIO_MOD_X_AND_Y: + case IIO_MOD_X_AND_Z: + case IIO_MOD_Y_AND_Z: + case IIO_MOD_X_AND_Y_AND_Z: + case IIO_MOD_X_OR_Y: + case IIO_MOD_X_OR_Z: + case IIO_MOD_Y_OR_Z: + case IIO_MOD_X_OR_Y_OR_Z: + case IIO_MOD_LIGHT_BOTH: + case IIO_MOD_LIGHT_IR: + case IIO_MOD_ROOT_SUM_SQUARED_X_Y: + case IIO_MOD_SUM_SQUARED_X_Y_Z: + case IIO_MOD_LIGHT_CLEAR: + case IIO_MOD_LIGHT_RED: + case IIO_MOD_LIGHT_GREEN: + case IIO_MOD_LIGHT_BLUE: + case IIO_MOD_QUATERNION: + case IIO_MOD_TEMP_AMBIENT: + case IIO_MOD_TEMP_OBJECT: + case IIO_MOD_NORTH_MAGN: + case IIO_MOD_NORTH_TRUE: + case IIO_MOD_NORTH_MAGN_TILT_COMP: + case IIO_MOD_NORTH_TRUE_TILT_COMP: + case IIO_MOD_RUNNING: + case IIO_MOD_JOGGING: + case IIO_MOD_WALKING: + case IIO_MOD_STILL: + break; + default: + return false; + } + + switch (ev_type) { + case IIO_EV_TYPE_THRESH: + case IIO_EV_TYPE_MAG: + case IIO_EV_TYPE_ROC: + case IIO_EV_TYPE_THRESH_ADAPTIVE: + case IIO_EV_TYPE_MAG_ADAPTIVE: + case IIO_EV_TYPE_CHANGE: + break; + default: + return false; + } + + switch (dir) { + case IIO_EV_DIR_EITHER: + case IIO_EV_DIR_RISING: + case IIO_EV_DIR_FALLING: + case IIO_EV_DIR_NONE: + break; + default: + return false; + } + + return true; +} + +static void print_event(struct iio_event_data *event) +{ + enum iio_chan_type type = IIO_EVENT_CODE_EXTRACT_CHAN_TYPE(event->id); + enum iio_modifier mod = IIO_EVENT_CODE_EXTRACT_MODIFIER(event->id); + enum iio_event_type ev_type = IIO_EVENT_CODE_EXTRACT_TYPE(event->id); + enum iio_event_direction dir = IIO_EVENT_CODE_EXTRACT_DIR(event->id); + int chan = IIO_EVENT_CODE_EXTRACT_CHAN(event->id); + int chan2 = IIO_EVENT_CODE_EXTRACT_CHAN2(event->id); + bool diff = IIO_EVENT_CODE_EXTRACT_DIFF(event->id); + + if (!event_is_known(event)) { + printf("Unknown event: time: %lld, id: %llx\n", + event->timestamp, event->id); + return; + } + + printf("Event: time: %lld, type: %s", event->timestamp, + iio_chan_type_name_spec[type]); + + if (mod != IIO_NO_MOD) + printf("(%s)", iio_modifier_names[mod]); + + if (chan >= 0) { + printf(", channel: %d", chan); + if (diff && chan2 >= 0) + printf("-%d", chan2); + } + + printf(", evtype: %s", iio_ev_type_text[ev_type]); + + if (dir != IIO_EV_DIR_NONE) + printf(", direction: %s", iio_ev_dir_text[dir]); + printf("\n"); +} + +int main(int argc, char **argv) +{ + struct iio_event_data event; + const char *device_name; + char *chrdev_name; + int ret; + int dev_num; + int fd, event_fd; + + if (argc <= 1) { + printf("Usage: %s <device_name>\n", argv[0]); + return -1; + } + + device_name = argv[1]; + + dev_num = find_type_by_name(device_name, "iio:device"); + if (dev_num >= 0) { + printf("Found IIO device with name %s with device number %d\n", + device_name, dev_num); + ret = asprintf(&chrdev_name, "/dev/iio:device%d", dev_num); + if (ret < 0) { + return -ENOMEM; + } + } else { + /* If we can't find a IIO device by name assume device_name is a + IIO chrdev */ + chrdev_name = strdup(device_name); + if (!chrdev_name) + return -ENOMEM; + } + + fd = open(chrdev_name, 0); + if (fd == -1) { + ret = -errno; + fprintf(stdout, "Failed to open %s\n", chrdev_name); + goto error_free_chrdev_name; + } + + ret = ioctl(fd, IIO_GET_EVENT_FD_IOCTL, &event_fd); + if (ret == -1 || event_fd == -1) { + ret = -errno; + fprintf(stdout, "Failed to retrieve event fd\n"); + if (close(fd) == -1) + perror("Failed to close character device file"); + + goto error_free_chrdev_name; + } + + if (close(fd) == -1) { + ret = -errno; + goto error_free_chrdev_name; + } + + while (true) { + ret = read(event_fd, &event, sizeof(event)); + if (ret == -1) { + if (errno == EAGAIN) { + printf("nothing available\n"); + continue; + } else { + ret = -errno; + perror("Failed to read event from device"); + break; + } + } + + print_event(&event); + } + + if (close(event_fd) == -1) + perror("Failed to close event file"); + +error_free_chrdev_name: + free(chrdev_name); + + return ret; +} diff --git a/tools/iio/iio_utils.c b/tools/iio/iio_utils.c new file mode 100644 index 000000000000..ec9ab7f9ae4c --- /dev/null +++ b/tools/iio/iio_utils.c @@ -0,0 +1,962 @@ +/* IIO - useful set of util functionality + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ +#ifndef _IIO_UTILS_H +#define _IIO_UTILS_H + +#include <string.h> +#include <stdlib.h> +#include <stdio.h> +#include <stdint.h> +#include <dirent.h> +#include <errno.h> +#include <ctype.h> +#include "iio_utils.h" + +const char *iio_dir = "/sys/bus/iio/devices/"; + +static char * const iio_direction[] = { + "in", + "out", +}; + +/** + * iioutils_break_up_name() - extract generic name from full channel name + * @full_name: the full channel name + * @generic_name: the output generic channel name + * + * Returns 0 on success, or a negative error code if string extraction failed. + **/ +int iioutils_break_up_name(const char *full_name, + char **generic_name) +{ + char *current; + char *w, *r; + char *working, *prefix = ""; + int i, ret; + + for (i = 0; i < sizeof(iio_direction) / sizeof(iio_direction[0]); i++) + if (!strncmp(full_name, iio_direction[i], + strlen(iio_direction[i]))) { + prefix = iio_direction[i]; + break; + } + + current = strdup(full_name + strlen(prefix) + 1); + if (!current) + return -ENOMEM; + + working = strtok(current, "_\0"); + if (!working) { + free(current); + return -EINVAL; + } + + w = working; + r = working; + + while (*r != '\0') { + if (!isdigit(*r)) { + *w = *r; + w++; + } + r++; + } + *w = '\0'; + ret = asprintf(generic_name, "%s_%s", prefix, working); + free(current); + + return (ret == -1) ? -ENOMEM : 0; +} + +/** + * iioutils_get_type() - find and process _type attribute data + * @is_signed: output whether channel is signed + * @bytes: output how many bytes the channel storage occupies + * @bits_used: output number of valid bits of data + * @shift: output amount of bits to shift right data before applying bit mask + * @mask: output a bit mask for the raw data + * @be: output if data in big endian + * @device_dir: the IIO device directory + * @name: the channel name + * @generic_name: the channel type name + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int iioutils_get_type(unsigned *is_signed, + unsigned *bytes, + unsigned *bits_used, + unsigned *shift, + uint64_t *mask, + unsigned *be, + const char *device_dir, + const char *name, + const char *generic_name) +{ + FILE *sysfsfp; + int ret; + DIR *dp; + char *scan_el_dir, *builtname, *builtname_generic, *filename = 0; + char signchar, endianchar; + unsigned padint; + const struct dirent *ent; + + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + if (ret < 0) + return -ENOMEM; + + ret = asprintf(&builtname, FORMAT_TYPE_FILE, name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_scan_el_dir; + } + ret = asprintf(&builtname_generic, FORMAT_TYPE_FILE, generic_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_builtname; + } + + dp = opendir(scan_el_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_builtname_generic; + } + ret = -ENOENT; + while (ent = readdir(dp), ent != NULL) + /* + * Do we allow devices to override a generic name with + * a specific one? + */ + if ((strcmp(builtname, ent->d_name) == 0) || + (strcmp(builtname_generic, ent->d_name) == 0)) { + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_closedir; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + ret = -errno; + printf("failed to open %s\n", filename); + goto error_free_filename; + } + + ret = fscanf(sysfsfp, + "%ce:%c%u/%u>>%u", + &endianchar, + &signchar, + bits_used, + &padint, shift); + if (ret < 0) { + ret = -errno; + printf("failed to pass scan type description\n"); + goto error_close_sysfsfp; + } else if (ret != 5) { + ret = -EIO; + printf("scan type description didn't match\n"); + goto error_close_sysfsfp; + } + *be = (endianchar == 'b'); + *bytes = padint / 8; + if (*bits_used == 64) + *mask = ~0; + else + *mask = (1 << *bits_used) - 1; + *is_signed = (signchar == 's'); + if (fclose(sysfsfp)) { + ret = -errno; + printf("Failed to close %s\n", filename); + goto error_free_filename; + } + + sysfsfp = 0; + free(filename); + + filename = 0; + } +error_close_sysfsfp: + if (sysfsfp) + if (fclose(sysfsfp)) + perror("iioutils_get_type(): Failed to close file"); + +error_free_filename: + if (filename) + free(filename); +error_closedir: + if (closedir(dp) == -1) + perror("iioutils_get_type(): Failed to close directory"); + +error_free_builtname_generic: + free(builtname_generic); +error_free_builtname: + free(builtname); +error_free_scan_el_dir: + free(scan_el_dir); + + return ret; +} + +/** + * iioutils_get_param_float() - read a float value from a channel parameter + * @output: output the float value + * @param_name: the parameter name to read + * @device_dir: the IIO device directory in sysfs + * @name: the channel name + * @generic_name: the channel type name + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int iioutils_get_param_float(float *output, + const char *param_name, + const char *device_dir, + const char *name, + const char *generic_name) +{ + FILE *sysfsfp; + int ret; + DIR *dp; + char *builtname, *builtname_generic; + char *filename = NULL; + const struct dirent *ent; + + ret = asprintf(&builtname, "%s_%s", name, param_name); + if (ret < 0) + return -ENOMEM; + + ret = asprintf(&builtname_generic, + "%s_%s", generic_name, param_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_free_builtname; + } + dp = opendir(device_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_builtname_generic; + } + ret = -ENOENT; + while (ent = readdir(dp), ent != NULL) + if ((strcmp(builtname, ent->d_name) == 0) || + (strcmp(builtname_generic, ent->d_name) == 0)) { + ret = asprintf(&filename, + "%s/%s", device_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_closedir; + } + sysfsfp = fopen(filename, "r"); + if (!sysfsfp) { + ret = -errno; + goto error_free_filename; + } + errno = 0; + if (fscanf(sysfsfp, "%f", output) != 1) + ret = errno ? -errno : -ENODATA; + + break; + } +error_free_filename: + if (filename) + free(filename); +error_closedir: + if (closedir(dp) == -1) + perror("iioutils_get_param_float(): Failed to close directory"); + +error_free_builtname_generic: + free(builtname_generic); +error_free_builtname: + free(builtname); + + return ret; +} + +/** + * bsort_channel_array_by_index() - sort the array in index order + * @ci_array: the iio_channel_info array to be sorted + * @cnt: the amount of array elements + **/ + +void bsort_channel_array_by_index(struct iio_channel_info **ci_array, + int cnt) +{ + + struct iio_channel_info temp; + int x, y; + + for (x = 0; x < cnt; x++) + for (y = 0; y < (cnt - 1); y++) + if ((*ci_array)[y].index > (*ci_array)[y+1].index) { + temp = (*ci_array)[y + 1]; + (*ci_array)[y + 1] = (*ci_array)[y]; + (*ci_array)[y] = temp; + } +} + +/** + * build_channel_array() - function to figure out what channels are present + * @device_dir: the IIO device directory in sysfs + * @ci_array: output the resulting array of iio_channel_info + * @counter: output the amount of array elements + * + * Returns 0 on success, otherwise a negative error code. + **/ +int build_channel_array(const char *device_dir, + struct iio_channel_info **ci_array, + int *counter) +{ + DIR *dp; + FILE *sysfsfp; + int count = 0, i; + struct iio_channel_info *current; + int ret; + const struct dirent *ent; + char *scan_el_dir; + char *filename; + + *counter = 0; + ret = asprintf(&scan_el_dir, FORMAT_SCAN_ELEMENTS_DIR, device_dir); + if (ret < 0) + return -ENOMEM; + + dp = opendir(scan_el_dir); + if (dp == NULL) { + ret = -errno; + goto error_free_name; + } + while (ent = readdir(dp), ent != NULL) + if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), + "_en") == 0) { + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + goto error_close_dir; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + ret = -errno; + free(filename); + goto error_close_dir; + } + errno = 0; + if (fscanf(sysfsfp, "%i", &ret) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("build_channel_array(): Failed to close file"); + + free(filename); + goto error_close_dir; + } + + if (ret == 1) + (*counter)++; + if (fclose(sysfsfp)) { + ret = -errno; + free(filename); + goto error_close_dir; + } + + free(filename); + } + *ci_array = malloc(sizeof(**ci_array) * (*counter)); + if (*ci_array == NULL) { + ret = -ENOMEM; + goto error_close_dir; + } + seekdir(dp, 0); + while (ent = readdir(dp), ent != NULL) { + if (strcmp(ent->d_name + strlen(ent->d_name) - strlen("_en"), + "_en") == 0) { + int current_enabled = 0; + + current = &(*ci_array)[count++]; + ret = asprintf(&filename, + "%s/%s", scan_el_dir, ent->d_name); + if (ret < 0) { + ret = -ENOMEM; + /* decrement count to avoid freeing name */ + count--; + goto error_cleanup_array; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + ret = -errno; + free(filename); + count--; + goto error_cleanup_array; + } + errno = 0; + if (fscanf(sysfsfp, "%i", ¤t_enabled) != 1) { + ret = errno ? -errno : -ENODATA; + free(filename); + count--; + goto error_cleanup_array; + } + + if (fclose(sysfsfp)) { + ret = -errno; + free(filename); + count--; + goto error_cleanup_array; + } + + if (!current_enabled) { + free(filename); + count--; + continue; + } + + current->scale = 1.0; + current->offset = 0; + current->name = strndup(ent->d_name, + strlen(ent->d_name) - + strlen("_en")); + if (current->name == NULL) { + free(filename); + ret = -ENOMEM; + count--; + goto error_cleanup_array; + } + /* Get the generic and specific name elements */ + ret = iioutils_break_up_name(current->name, + ¤t->generic_name); + if (ret) { + free(filename); + free(current->name); + count--; + goto error_cleanup_array; + } + ret = asprintf(&filename, + "%s/%s_index", + scan_el_dir, + current->name); + if (ret < 0) { + free(filename); + ret = -ENOMEM; + goto error_cleanup_array; + } + sysfsfp = fopen(filename, "r"); + if (sysfsfp == NULL) { + ret = -errno; + printf("failed to open %s\n", filename); + free(filename); + goto error_cleanup_array; + } + + errno = 0; + if (fscanf(sysfsfp, "%u", ¤t->index) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("build_channel_array(): Failed to close file"); + + free(filename); + goto error_cleanup_array; + } + + if (fclose(sysfsfp)) { + ret = -errno; + free(filename); + goto error_cleanup_array; + } + + free(filename); + /* Find the scale */ + ret = iioutils_get_param_float(¤t->scale, + "scale", + device_dir, + current->name, + current->generic_name); + if (ret < 0) + goto error_cleanup_array; + ret = iioutils_get_param_float(¤t->offset, + "offset", + device_dir, + current->name, + current->generic_name); + if (ret < 0) + goto error_cleanup_array; + ret = iioutils_get_type(¤t->is_signed, + ¤t->bytes, + ¤t->bits_used, + ¤t->shift, + ¤t->mask, + ¤t->be, + device_dir, + current->name, + current->generic_name); + if (ret < 0) + goto error_cleanup_array; + } + } + + if (closedir(dp) == -1) { + ret = -errno; + goto error_cleanup_array; + } + + free(scan_el_dir); + /* reorder so that the array is in index order */ + bsort_channel_array_by_index(ci_array, *counter); + + return 0; + +error_cleanup_array: + for (i = count - 1; i >= 0; i--) { + free((*ci_array)[i].name); + free((*ci_array)[i].generic_name); + } + free(*ci_array); +error_close_dir: + if (dp) + if (closedir(dp) == -1) + perror("build_channel_array(): Failed to close dir"); + +error_free_name: + free(scan_el_dir); + + return ret; +} + +int calc_digits(int num) +{ + int count = 0; + + while (num != 0) { + num /= 10; + count++; + } + + return count; +} + +/** + * find_type_by_name() - function to match top level types by name + * @name: top level type instance name + * @type: the type of top level instance being searched + * + * Returns the device number of a matched IIO device on success, otherwise a + * negative error code. + * Typical types this is used for are device and trigger. + **/ +int find_type_by_name(const char *name, const char *type) +{ + const struct dirent *ent; + int number, numstrlen, ret; + + FILE *nameFile; + DIR *dp; + char thisname[IIO_MAX_NAME_LENGTH]; + char *filename; + + dp = opendir(iio_dir); + if (dp == NULL) { + printf("No industrialio devices available\n"); + return -ENODEV; + } + + while (ent = readdir(dp), ent != NULL) { + if (strcmp(ent->d_name, ".") != 0 && + strcmp(ent->d_name, "..") != 0 && + strlen(ent->d_name) > strlen(type) && + strncmp(ent->d_name, type, strlen(type)) == 0) { + errno = 0; + ret = sscanf(ent->d_name + strlen(type), "%d", &number); + if (ret < 0) { + ret = -errno; + printf("failed to read element number\n"); + goto error_close_dir; + } else if (ret != 1) { + ret = -EIO; + printf("failed to match element number\n"); + goto error_close_dir; + } + + numstrlen = calc_digits(number); + /* verify the next character is not a colon */ + if (strncmp(ent->d_name + strlen(type) + numstrlen, + ":", + 1) != 0) { + filename = malloc(strlen(iio_dir) + + strlen(type) + + numstrlen + + 6); + if (filename == NULL) { + ret = -ENOMEM; + goto error_close_dir; + } + + ret = sprintf(filename, "%s%s%d/name", iio_dir, + type, number); + if (ret < 0) { + free(filename); + goto error_close_dir; + } + + nameFile = fopen(filename, "r"); + if (!nameFile) { + free(filename); + continue; + } + free(filename); + errno = 0; + if (fscanf(nameFile, "%s", thisname) != 1) { + ret = errno ? -errno : -ENODATA; + goto error_close_dir; + } + + if (fclose(nameFile)) { + ret = -errno; + goto error_close_dir; + } + + if (strcmp(name, thisname) == 0) { + if (closedir(dp) == -1) + return -errno; + return number; + } + } + } + } + if (closedir(dp) == -1) + return -errno; + + return -ENODEV; + +error_close_dir: + if (closedir(dp) == -1) + perror("find_type_by_name(): Failed to close directory"); + return ret; +} + +static int _write_sysfs_int(const char *filename, const char *basedir, int val, + int verify) +{ + int ret = 0; + FILE *sysfsfp; + int test; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) + return -ENOMEM; + ret = sprintf(temp, "%s/%s", basedir, filename); + if (ret < 0) + goto error_free; + + sysfsfp = fopen(temp, "w"); + if (sysfsfp == NULL) { + ret = -errno; + printf("failed to open %s\n", temp); + goto error_free; + } + ret = fprintf(sysfsfp, "%d", val); + if (ret < 0) { + if (fclose(sysfsfp)) + perror("_write_sysfs_int(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) { + ret = -errno; + goto error_free; + } + + if (verify) { + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + printf("failed to open %s\n", temp); + goto error_free; + } + if (fscanf(sysfsfp, "%d", &test) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("_write_sysfs_int(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) { + ret = -errno; + goto error_free; + } + + if (test != val) { + printf("Possible failure in int write %d to %s%s\n", + val, + basedir, + filename); + ret = -1; + } + } +error_free: + free(temp); + return ret; +} + +/** + * write_sysfs_int() - write an integer value to a sysfs file + * @filename: name of the file to write to + * @basedir: the sysfs directory in which the file is to be found + * @val: integer value to write to file + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int write_sysfs_int(const char *filename, const char *basedir, int val) +{ + return _write_sysfs_int(filename, basedir, val, 0); +} + +/** + * write_sysfs_int_and_verify() - write an integer value to a sysfs file + * and verify + * @filename: name of the file to write to + * @basedir: the sysfs directory in which the file is to be found + * @val: integer value to write to file + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int write_sysfs_int_and_verify(const char *filename, const char *basedir, + int val) +{ + return _write_sysfs_int(filename, basedir, val, 1); +} + +static int _write_sysfs_string(const char *filename, const char *basedir, + const char *val, int verify) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed\n"); + return -ENOMEM; + } + ret = sprintf(temp, "%s/%s", basedir, filename); + if (ret < 0) + goto error_free; + + sysfsfp = fopen(temp, "w"); + if (sysfsfp == NULL) { + ret = -errno; + printf("Could not open %s\n", temp); + goto error_free; + } + ret = fprintf(sysfsfp, "%s", val); + if (ret < 0) { + if (fclose(sysfsfp)) + perror("_write_sysfs_string(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) { + ret = -errno; + goto error_free; + } + + if (verify) { + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + printf("could not open file to verify\n"); + goto error_free; + } + if (fscanf(sysfsfp, "%s", temp) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("_write_sysfs_string(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) { + ret = -errno; + goto error_free; + } + + if (strcmp(temp, val) != 0) { + printf("Possible failure in string write of %s " + "Should be %s " + "written to %s\%s\n", + temp, + val, + basedir, + filename); + ret = -1; + } + } +error_free: + free(temp); + + return ret; +} + +/** + * write_sysfs_string_and_verify() - string write, readback and verify + * @filename: name of file to write to + * @basedir: the sysfs directory in which the file is to be found + * @val: the string to write + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int write_sysfs_string_and_verify(const char *filename, const char *basedir, + const char *val) +{ + return _write_sysfs_string(filename, basedir, val, 1); +} + +/** + * write_sysfs_string() - write string to a sysfs file + * @filename: name of file to write to + * @basedir: the sysfs directory in which the file is to be found + * @val: the string to write + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int write_sysfs_string(const char *filename, const char *basedir, + const char *val) +{ + return _write_sysfs_string(filename, basedir, val, 0); +} + +/** + * read_sysfs_posint() - read an integer value from file + * @filename: name of file to read from + * @basedir: the sysfs directory in which the file is to be found + * + * Returns the read integer value >= 0 on success, otherwise a negative error + * code. + **/ +int read_sysfs_posint(const char *filename, const char *basedir) +{ + int ret; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + ret = sprintf(temp, "%s/%s", basedir, filename); + if (ret < 0) + goto error_free; + + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + errno = 0; + if (fscanf(sysfsfp, "%d\n", &ret) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("read_sysfs_posint(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) + ret = -errno; + +error_free: + free(temp); + return ret; +} + +/** + * read_sysfs_float() - read a float value from file + * @filename: name of file to read from + * @basedir: the sysfs directory in which the file is to be found + * @val: output the read float value + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int read_sysfs_float(const char *filename, const char *basedir, float *val) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + ret = sprintf(temp, "%s/%s", basedir, filename); + if (ret < 0) + goto error_free; + + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + errno = 0; + if (fscanf(sysfsfp, "%f\n", val) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("read_sysfs_float(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) + ret = -errno; + +error_free: + free(temp); + return ret; +} + +/** + * read_sysfs_string() - read a string from file + * @filename: name of file to read from + * @basedir: the sysfs directory in which the file is to be found + * @str: output the read string + * + * Returns a value >= 0 on success, otherwise a negative error code. + **/ +int read_sysfs_string(const char *filename, const char *basedir, char *str) +{ + int ret = 0; + FILE *sysfsfp; + char *temp = malloc(strlen(basedir) + strlen(filename) + 2); + + if (temp == NULL) { + printf("Memory allocation failed"); + return -ENOMEM; + } + ret = sprintf(temp, "%s/%s", basedir, filename); + if (ret < 0) + goto error_free; + + sysfsfp = fopen(temp, "r"); + if (sysfsfp == NULL) { + ret = -errno; + goto error_free; + } + errno = 0; + if (fscanf(sysfsfp, "%s\n", str) != 1) { + ret = errno ? -errno : -ENODATA; + if (fclose(sysfsfp)) + perror("read_sysfs_string(): Failed to close dir"); + + goto error_free; + } + + if (fclose(sysfsfp)) + ret = -errno; + +error_free: + free(temp); + return ret; +} + +#endif /* _IIO_UTILS_H */ diff --git a/tools/iio/iio_utils.h b/tools/iio/iio_utils.h new file mode 100644 index 000000000000..379eed9deaea --- /dev/null +++ b/tools/iio/iio_utils.h @@ -0,0 +1,77 @@ +#ifndef _IIO_UTILS_H_ +#define _IIO_UTILS_H_ + +/* IIO - useful set of util functionality + * + * Copyright (c) 2008 Jonathan Cameron + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <stdint.h> + +/* Made up value to limit allocation sizes */ +#define IIO_MAX_NAME_LENGTH 30 + +#define FORMAT_SCAN_ELEMENTS_DIR "%s/scan_elements" +#define FORMAT_TYPE_FILE "%s_type" + +extern const char *iio_dir; + +/** + * struct iio_channel_info - information about a given channel + * @name: channel name + * @generic_name: general name for channel type + * @scale: scale factor to be applied for conversion to si units + * @offset: offset to be applied for conversion to si units + * @index: the channel index in the buffer output + * @bytes: number of bytes occupied in buffer output + * @bits_used: number of valid bits of data + * @shift: amount of bits to shift right data before applying bit mask + * @mask: a bit mask for the raw output + * @be: flag if data is big endian + * @is_signed: is the raw value stored signed + * @location: data offset for this channel inside the buffer (in bytes) + **/ +struct iio_channel_info { + char *name; + char *generic_name; + float scale; + float offset; + unsigned index; + unsigned bytes; + unsigned bits_used; + unsigned shift; + uint64_t mask; + unsigned be; + unsigned is_signed; + unsigned location; +}; + +int iioutils_break_up_name(const char *full_name, char **generic_name); +int iioutils_get_type(unsigned *is_signed, unsigned *bytes, + unsigned *bits_used, unsigned *shift, + uint64_t *mask, unsigned *be, + const char *device_dir, const char *name, + const char *generic_name); +int iioutils_get_param_float(float *output, const char *param_name, + const char *device_dir, const char *name, + const char *generic_name); +void bsort_channel_array_by_index(struct iio_channel_info **ci_array, int cnt); +int build_channel_array(const char *device_dir, + struct iio_channel_info **ci_array, int *counter); +int find_type_by_name(const char *name, const char *type); +int write_sysfs_int(const char *filename, const char *basedir, int val); +int write_sysfs_int_and_verify(const char *filename, const char *basedir, + int val); +int write_sysfs_string_and_verify(const char *filename, const char *basedir, + const char *val); +int write_sysfs_string(const char *filename, const char *basedir, + const char *val); +int read_sysfs_posint(const char *filename, const char *basedir); +int read_sysfs_float(const char *filename, const char *basedir, float *val); +int read_sysfs_string(const char *filename, const char *basedir, char *str); + +#endif /* _IIO_UTILS_H_ */ diff --git a/tools/iio/lsiio.c b/tools/iio/lsiio.c new file mode 100644 index 000000000000..b59ee1733924 --- /dev/null +++ b/tools/iio/lsiio.c @@ -0,0 +1,187 @@ +/* + * Industrial I/O utilities - lsiio.c + * + * Copyright (c) 2010 Manuel Stahl <manuel.stahl@iis.fraunhofer.de> + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published by + * the Free Software Foundation. + */ + +#include <string.h> +#include <dirent.h> +#include <stdio.h> +#include <errno.h> +#include <stdint.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/dir.h> +#include "iio_utils.h" + + +static enum verbosity { + VERBLEVEL_DEFAULT, /* 0 gives lspci behaviour */ + VERBLEVEL_SENSORS, /* 1 lists sensors */ +} verblevel = VERBLEVEL_DEFAULT; + +const char *type_device = "iio:device"; +const char *type_trigger = "trigger"; + + +static inline int check_prefix(const char *str, const char *prefix) +{ + return strlen(str) > strlen(prefix) && + strncmp(str, prefix, strlen(prefix)) == 0; +} + +static inline int check_postfix(const char *str, const char *postfix) +{ + return strlen(str) > strlen(postfix) && + strcmp(str + strlen(str) - strlen(postfix), postfix) == 0; +} + +static int dump_channels(const char *dev_dir_name) +{ + DIR *dp; + const struct dirent *ent; + + dp = opendir(dev_dir_name); + if (dp == NULL) + return -errno; + while (ent = readdir(dp), ent != NULL) + if (check_prefix(ent->d_name, "in_") && + check_postfix(ent->d_name, "_raw")) { + printf(" %-10s\n", ent->d_name); + } + + return (closedir(dp) == -1) ? -errno : 0; +} + +static int dump_one_device(const char *dev_dir_name) +{ + char name[IIO_MAX_NAME_LENGTH]; + int dev_idx; + int retval; + + retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_device), + "%i", &dev_idx); + if (retval != 1) + return -EINVAL; + retval = read_sysfs_string("name", dev_dir_name, name); + if (retval) + return retval; + + printf("Device %03d: %s\n", dev_idx, name); + + if (verblevel >= VERBLEVEL_SENSORS) + return dump_channels(dev_dir_name); + return 0; +} + +static int dump_one_trigger(const char *dev_dir_name) +{ + char name[IIO_MAX_NAME_LENGTH]; + int dev_idx; + int retval; + + retval = sscanf(dev_dir_name + strlen(iio_dir) + strlen(type_trigger), + "%i", &dev_idx); + if (retval != 1) + return -EINVAL; + retval = read_sysfs_string("name", dev_dir_name, name); + if (retval) + return retval; + + printf("Trigger %03d: %s\n", dev_idx, name); + return 0; +} + +static int dump_devices(void) +{ + const struct dirent *ent; + int ret; + DIR *dp; + + dp = opendir(iio_dir); + if (dp == NULL) { + printf("No industrial I/O devices available\n"); + return -ENODEV; + } + + while (ent = readdir(dp), ent != NULL) { + if (check_prefix(ent->d_name, type_device)) { + char *dev_dir_name; + + if (asprintf(&dev_dir_name, "%s%s", iio_dir, + ent->d_name) < 0) { + ret = -ENOMEM; + goto error_close_dir; + } + + ret = dump_one_device(dev_dir_name); + if (ret) { + free(dev_dir_name); + goto error_close_dir; + } + + free(dev_dir_name); + if (verblevel >= VERBLEVEL_SENSORS) + printf("\n"); + } + } + rewinddir(dp); + while (ent = readdir(dp), ent != NULL) { + if (check_prefix(ent->d_name, type_trigger)) { + char *dev_dir_name; + + if (asprintf(&dev_dir_name, "%s%s", iio_dir, + ent->d_name) < 0) { + ret = -ENOMEM; + goto error_close_dir; + } + + ret = dump_one_trigger(dev_dir_name); + if (ret) { + free(dev_dir_name); + goto error_close_dir; + } + + free(dev_dir_name); + } + } + return (closedir(dp) == -1) ? -errno : 0; + +error_close_dir: + if (closedir(dp) == -1) + perror("dump_devices(): Failed to close directory"); + + return ret; +} + +int main(int argc, char **argv) +{ + int c, err = 0; + + while ((c = getopt(argc, argv, "v")) != EOF) { + switch (c) { + case 'v': + verblevel++; + break; + + case '?': + default: + err++; + break; + } + } + if (err || argc > optind) { + fprintf(stderr, "Usage: lsiio [options]...\n" + "List industrial I/O devices\n" + " -v Increase verbosity (may be given multiple times)\n"); + exit(1); + } + + return dump_devices(); +} diff --git a/tools/include/asm-generic/atomic-gcc.h b/tools/include/asm-generic/atomic-gcc.h new file mode 100644 index 000000000000..2ba78c9f5701 --- /dev/null +++ b/tools/include/asm-generic/atomic-gcc.h @@ -0,0 +1,63 @@ +#ifndef __TOOLS_ASM_GENERIC_ATOMIC_H +#define __TOOLS_ASM_GENERIC_ATOMIC_H + +#include <linux/compiler.h> +#include <linux/types.h> + +/* + * Atomic operations that C can't guarantee us. Useful for + * resource counting etc.. + * + * Excerpts obtained from the Linux kernel sources. + */ + +#define ATOMIC_INIT(i) { (i) } + +/** + * atomic_read - read atomic variable + * @v: pointer of type atomic_t + * + * Atomically reads the value of @v. + */ +static inline int atomic_read(const atomic_t *v) +{ + return ACCESS_ONCE((v)->counter); +} + +/** + * atomic_set - set atomic variable + * @v: pointer of type atomic_t + * @i: required value + * + * Atomically sets the value of @v to @i. + */ +static inline void atomic_set(atomic_t *v, int i) +{ + v->counter = i; +} + +/** + * atomic_inc - increment atomic variable + * @v: pointer of type atomic_t + * + * Atomically increments @v by 1. + */ +static inline void atomic_inc(atomic_t *v) +{ + __sync_add_and_fetch(&v->counter, 1); +} + +/** + * atomic_dec_and_test - decrement and test + * @v: pointer of type atomic_t + * + * Atomically decrements @v by 1 and + * returns true if the result is 0, or false for all other + * cases. + */ +static inline int atomic_dec_and_test(atomic_t *v) +{ + return __sync_sub_and_fetch(&v->counter, 1) == 0; +} + +#endif /* __TOOLS_ASM_GENERIC_ATOMIC_H */ diff --git a/tools/include/asm-generic/barrier.h b/tools/include/asm-generic/barrier.h new file mode 100644 index 000000000000..47b933903eaf --- /dev/null +++ b/tools/include/asm-generic/barrier.h @@ -0,0 +1,44 @@ +/* + * Copied from the kernel sources to tools/perf/: + * + * Generic barrier definitions, originally based on MN10300 definitions. + * + * It should be possible to use these on really simple architectures, + * but it serves more as a starting point for new ports. + * + * Copyright (C) 2007 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ +#ifndef __TOOLS_LINUX_ASM_GENERIC_BARRIER_H +#define __TOOLS_LINUX_ASM_GENERIC_BARRIER_H + +#ifndef __ASSEMBLY__ + +#include <linux/compiler.h> + +/* + * Force strict CPU ordering. And yes, this is required on UP too when we're + * talking to devices. + * + * Fall back to compiler barriers if nothing better is provided. + */ + +#ifndef mb +#define mb() barrier() +#endif + +#ifndef rmb +#define rmb() mb() +#endif + +#ifndef wmb +#define wmb() mb() +#endif + +#endif /* !__ASSEMBLY__ */ +#endif /* __TOOLS_LINUX_ASM_GENERIC_BARRIER_H */ diff --git a/tools/include/asm/atomic.h b/tools/include/asm/atomic.h new file mode 100644 index 000000000000..70794f538a86 --- /dev/null +++ b/tools/include/asm/atomic.h @@ -0,0 +1,10 @@ +#ifndef __TOOLS_LINUX_ASM_ATOMIC_H +#define __TOOLS_LINUX_ASM_ATOMIC_H + +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/atomic.h" +#else +#include <asm-generic/atomic-gcc.h> +#endif + +#endif /* __TOOLS_LINUX_ASM_ATOMIC_H */ diff --git a/tools/include/asm/barrier.h b/tools/include/asm/barrier.h new file mode 100644 index 000000000000..ac66ac594685 --- /dev/null +++ b/tools/include/asm/barrier.h @@ -0,0 +1,27 @@ +#if defined(__i386__) || defined(__x86_64__) +#include "../../arch/x86/include/asm/barrier.h" +#elif defined(__arm__) +#include "../../arch/arm/include/asm/barrier.h" +#elif defined(__aarch64__) +#include "../../arch/arm64/include/asm/barrier.h" +#elif defined(__powerpc__) +#include "../../arch/powerpc/include/asm/barrier.h" +#elif defined(__s390__) +#include "../../arch/s390/include/asm/barrier.h" +#elif defined(__sh__) +#include "../../arch/sh/include/asm/barrier.h" +#elif defined(__sparc__) +#include "../../arch/sparc/include/asm/barrier.h" +#elif defined(__tile__) +#include "../../arch/tile/include/asm/barrier.h" +#elif defined(__alpha__) +#include "../../arch/alpha/include/asm/barrier.h" +#elif defined(__mips__) +#include "../../arch/mips/include/asm/barrier.h" +#elif defined(__ia64__) +#include "../../arch/ia64/include/asm/barrier.h" +#elif defined(__xtensa__) +#include "../../arch/xtensa/include/asm/barrier.h" +#else +#include <asm-generic/barrier.h> +#endif diff --git a/tools/include/linux/atomic.h b/tools/include/linux/atomic.h new file mode 100644 index 000000000000..4e3d3d18ebab --- /dev/null +++ b/tools/include/linux/atomic.h @@ -0,0 +1,6 @@ +#ifndef __TOOLS_LINUX_ATOMIC_H +#define __TOOLS_LINUX_ATOMIC_H + +#include <asm/atomic.h> + +#endif /* __TOOLS_LINUX_ATOMIC_H */ diff --git a/tools/include/linux/compiler.h b/tools/include/linux/compiler.h index 88461f09cc86..9098083869c8 100644 --- a/tools/include/linux/compiler.h +++ b/tools/include/linux/compiler.h @@ -1,6 +1,10 @@ #ifndef _TOOLS_LINUX_COMPILER_H_ #define _TOOLS_LINUX_COMPILER_H_ +/* Optimization barrier */ +/* The "volatile" is due to gcc bugs */ +#define barrier() __asm__ __volatile__("": : :"memory") + #ifndef __always_inline # define __always_inline inline __attribute__((always_inline)) #endif @@ -37,4 +41,62 @@ #define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) +#include <linux/types.h> + +static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; + default: + barrier(); + __builtin_memcpy((void *)res, (const void *)p, size); + barrier(); + } +} + +static __always_inline void __write_once_size(volatile void *p, void *res, int size) +{ + switch (size) { + case 1: *(volatile __u8 *)p = *(__u8 *)res; break; + case 2: *(volatile __u16 *)p = *(__u16 *)res; break; + case 4: *(volatile __u32 *)p = *(__u32 *)res; break; + case 8: *(volatile __u64 *)p = *(__u64 *)res; break; + default: + barrier(); + __builtin_memcpy((void *)p, (const void *)res, size); + barrier(); + } +} + +/* + * Prevent the compiler from merging or refetching reads or writes. The + * compiler is also forbidden from reordering successive instances of + * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the + * compiler is aware of some particular ordering. One way to make the + * compiler aware of ordering is to put the two invocations of READ_ONCE, + * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * + * In contrast to ACCESS_ONCE these two macros will also work on aggregate + * data types like structs or unions. If the size of the accessed data + * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) + * READ_ONCE() and WRITE_ONCE() will fall back to memcpy and print a + * compile-time warning. + * + * Their two major use cases are: (1) Mediating communication between + * process-level code and irq/NMI handlers, all running on the same CPU, + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * mutilate accesses that either do not require ordering or that interact + * with an explicit memory barrier or atomic instruction that provides the + * required ordering. + */ + +#define READ_ONCE(x) \ + ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + +#define WRITE_ONCE(x, val) \ + ({ union { typeof(x) __val; char __c[1]; } __u = { .__val = (val) }; __write_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) + #endif /* _TOOLS_LINUX_COMPILER_H */ diff --git a/tools/include/linux/export.h b/tools/include/linux/export.h deleted file mode 100644 index d07e586b9ba0..000000000000 --- a/tools/include/linux/export.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _TOOLS_LINUX_EXPORT_H_ -#define _TOOLS_LINUX_EXPORT_H_ - -#define EXPORT_SYMBOL(sym) -#define EXPORT_SYMBOL_GPL(sym) -#define EXPORT_SYMBOL_GPL_FUTURE(sym) -#define EXPORT_UNUSED_SYMBOL(sym) -#define EXPORT_UNUSED_SYMBOL_GPL(sym) - -#endif diff --git a/tools/perf/util/include/linux/kernel.h b/tools/include/linux/kernel.h index 09e8e7aea7c6..76df53539c2a 100644 --- a/tools/perf/util/include/linux/kernel.h +++ b/tools/include/linux/kernel.h @@ -1,5 +1,5 @@ -#ifndef PERF_LINUX_KERNEL_H_ -#define PERF_LINUX_KERNEL_H_ +#ifndef __TOOLS_LINUX_KERNEL_H +#define __TOOLS_LINUX_KERNEL_H #include <stdarg.h> #include <stdio.h> diff --git a/tools/perf/util/include/linux/list.h b/tools/include/linux/list.h index 76ddbc726343..76b014c96893 100644 --- a/tools/perf/util/include/linux/list.h +++ b/tools/include/linux/list.h @@ -1,10 +1,10 @@ #include <linux/kernel.h> #include <linux/types.h> -#include "../../../../include/linux/list.h" +#include "../../../include/linux/list.h" -#ifndef PERF_LIST_H -#define PERF_LIST_H +#ifndef TOOLS_LIST_H +#define TOOLS_LIST_H /** * list_del_range - deletes range of entries from list. * @begin: first element in the range to delete from the list. diff --git a/tools/include/linux/poison.h b/tools/include/linux/poison.h new file mode 100644 index 000000000000..0c27bdf14233 --- /dev/null +++ b/tools/include/linux/poison.h @@ -0,0 +1 @@ +#include "../../../include/linux/poison.h" diff --git a/tools/include/linux/rbtree.h b/tools/include/linux/rbtree.h new file mode 100644 index 000000000000..112582253dd0 --- /dev/null +++ b/tools/include/linux/rbtree.h @@ -0,0 +1,104 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/include/linux/rbtree.h + + To use rbtrees you'll have to implement your own insert and search cores. + This will avoid us to use callbacks and to drop drammatically performances. + I know it's not the cleaner way, but in C (not in C++) to get + performances and genericity... + + See Documentation/rbtree.txt for documentation and samples. +*/ + +#ifndef __TOOLS_LINUX_PERF_RBTREE_H +#define __TOOLS_LINUX_PERF_RBTREE_H + +#include <linux/kernel.h> +#include <linux/stddef.h> + +struct rb_node { + unsigned long __rb_parent_color; + struct rb_node *rb_right; + struct rb_node *rb_left; +} __attribute__((aligned(sizeof(long)))); + /* The alignment might seem pointless, but allegedly CRIS needs it */ + +struct rb_root { + struct rb_node *rb_node; +}; + + +#define rb_parent(r) ((struct rb_node *)((r)->__rb_parent_color & ~3)) + +#define RB_ROOT (struct rb_root) { NULL, } +#define rb_entry(ptr, type, member) container_of(ptr, type, member) + +#define RB_EMPTY_ROOT(root) ((root)->rb_node == NULL) + +/* 'empty' nodes are nodes that are known not to be inserted in an rbtree */ +#define RB_EMPTY_NODE(node) \ + ((node)->__rb_parent_color == (unsigned long)(node)) +#define RB_CLEAR_NODE(node) \ + ((node)->__rb_parent_color = (unsigned long)(node)) + + +extern void rb_insert_color(struct rb_node *, struct rb_root *); +extern void rb_erase(struct rb_node *, struct rb_root *); + + +/* Find logical next and previous nodes in a tree */ +extern struct rb_node *rb_next(const struct rb_node *); +extern struct rb_node *rb_prev(const struct rb_node *); +extern struct rb_node *rb_first(const struct rb_root *); +extern struct rb_node *rb_last(const struct rb_root *); + +/* Postorder iteration - always visit the parent after its children */ +extern struct rb_node *rb_first_postorder(const struct rb_root *); +extern struct rb_node *rb_next_postorder(const struct rb_node *); + +/* Fast replacement of a single node without remove/rebalance/add/rebalance */ +extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root); + +static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, + struct rb_node **rb_link) +{ + node->__rb_parent_color = (unsigned long)parent; + node->rb_left = node->rb_right = NULL; + + *rb_link = node; +} + +#define rb_entry_safe(ptr, type, member) \ + ({ typeof(ptr) ____ptr = (ptr); \ + ____ptr ? rb_entry(____ptr, type, member) : NULL; \ + }) + + +/* + * Handy for checking that we are not deleting an entry that is + * already in a list, found in block/{blk-throttle,cfq-iosched}.c, + * probably should be moved to lib/rbtree.c... + */ +static inline void rb_erase_init(struct rb_node *n, struct rb_root *root) +{ + rb_erase(n, root); + RB_CLEAR_NODE(n); +} +#endif /* __TOOLS_LINUX_PERF_RBTREE_H */ diff --git a/tools/include/linux/rbtree_augmented.h b/tools/include/linux/rbtree_augmented.h new file mode 100644 index 000000000000..43be941db695 --- /dev/null +++ b/tools/include/linux/rbtree_augmented.h @@ -0,0 +1,245 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + tools/linux/include/linux/rbtree_augmented.h + + Copied from: + linux/include/linux/rbtree_augmented.h +*/ + +#ifndef _TOOLS_LINUX_RBTREE_AUGMENTED_H +#define _TOOLS_LINUX_RBTREE_AUGMENTED_H + +#include <linux/compiler.h> +#include <linux/rbtree.h> + +/* + * Please note - only struct rb_augment_callbacks and the prototypes for + * rb_insert_augmented() and rb_erase_augmented() are intended to be public. + * The rest are implementation details you are not expected to depend on. + * + * See Documentation/rbtree.txt for documentation and samples. + */ + +struct rb_augment_callbacks { + void (*propagate)(struct rb_node *node, struct rb_node *stop); + void (*copy)(struct rb_node *old, struct rb_node *new); + void (*rotate)(struct rb_node *old, struct rb_node *new); +}; + +extern void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); +/* + * Fixup the rbtree and update the augmented information when rebalancing. + * + * On insertion, the user must update the augmented information on the path + * leading to the inserted node, then call rb_link_node() as usual and + * rb_augment_inserted() instead of the usual rb_insert_color() call. + * If rb_augment_inserted() rebalances the rbtree, it will callback into + * a user provided function to update the augmented information on the + * affected subtrees. + */ +static inline void +rb_insert_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + __rb_insert_augmented(node, root, augment->rotate); +} + +#define RB_DECLARE_CALLBACKS(rbstatic, rbname, rbstruct, rbfield, \ + rbtype, rbaugmented, rbcompute) \ +static inline void \ +rbname ## _propagate(struct rb_node *rb, struct rb_node *stop) \ +{ \ + while (rb != stop) { \ + rbstruct *node = rb_entry(rb, rbstruct, rbfield); \ + rbtype augmented = rbcompute(node); \ + if (node->rbaugmented == augmented) \ + break; \ + node->rbaugmented = augmented; \ + rb = rb_parent(&node->rbfield); \ + } \ +} \ +static inline void \ +rbname ## _copy(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ +} \ +static void \ +rbname ## _rotate(struct rb_node *rb_old, struct rb_node *rb_new) \ +{ \ + rbstruct *old = rb_entry(rb_old, rbstruct, rbfield); \ + rbstruct *new = rb_entry(rb_new, rbstruct, rbfield); \ + new->rbaugmented = old->rbaugmented; \ + old->rbaugmented = rbcompute(old); \ +} \ +rbstatic const struct rb_augment_callbacks rbname = { \ + rbname ## _propagate, rbname ## _copy, rbname ## _rotate \ +}; + + +#define RB_RED 0 +#define RB_BLACK 1 + +#define __rb_parent(pc) ((struct rb_node *)(pc & ~3)) + +#define __rb_color(pc) ((pc) & 1) +#define __rb_is_black(pc) __rb_color(pc) +#define __rb_is_red(pc) (!__rb_color(pc)) +#define rb_color(rb) __rb_color((rb)->__rb_parent_color) +#define rb_is_red(rb) __rb_is_red((rb)->__rb_parent_color) +#define rb_is_black(rb) __rb_is_black((rb)->__rb_parent_color) + +static inline void rb_set_parent(struct rb_node *rb, struct rb_node *p) +{ + rb->__rb_parent_color = rb_color(rb) | (unsigned long)p; +} + +static inline void rb_set_parent_color(struct rb_node *rb, + struct rb_node *p, int color) +{ + rb->__rb_parent_color = (unsigned long)p | color; +} + +static inline void +__rb_change_child(struct rb_node *old, struct rb_node *new, + struct rb_node *parent, struct rb_root *root) +{ + if (parent) { + if (parent->rb_left == old) + parent->rb_left = new; + else + parent->rb_right = new; + } else + root->rb_node = new; +} + +extern void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)); + +static __always_inline struct rb_node * +__rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *child = node->rb_right, *tmp = node->rb_left; + struct rb_node *parent, *rebalance; + unsigned long pc; + + if (!tmp) { + /* + * Case 1: node to erase has no more than 1 child (easy!) + * + * Note that if there is one child it must be red due to 5) + * and node must be black due to 4). We adjust colors locally + * so as to bypass __rb_erase_color() later on. + */ + pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, child, parent, root); + if (child) { + child->__rb_parent_color = pc; + rebalance = NULL; + } else + rebalance = __rb_is_black(pc) ? parent : NULL; + tmp = parent; + } else if (!child) { + /* Still case 1, but this time the child is node->rb_left */ + tmp->__rb_parent_color = pc = node->__rb_parent_color; + parent = __rb_parent(pc); + __rb_change_child(node, tmp, parent, root); + rebalance = NULL; + tmp = parent; + } else { + struct rb_node *successor = child, *child2; + tmp = child->rb_left; + if (!tmp) { + /* + * Case 2: node's successor is its right child + * + * (n) (s) + * / \ / \ + * (x) (s) -> (x) (c) + * \ + * (c) + */ + parent = successor; + child2 = successor->rb_right; + augment->copy(node, successor); + } else { + /* + * Case 3: node's successor is leftmost under + * node's right child subtree + * + * (n) (s) + * / \ / \ + * (x) (y) -> (x) (y) + * / / + * (p) (p) + * / / + * (s) (c) + * \ + * (c) + */ + do { + parent = successor; + successor = tmp; + tmp = tmp->rb_left; + } while (tmp); + parent->rb_left = child2 = successor->rb_right; + successor->rb_right = child; + rb_set_parent(child, successor); + augment->copy(node, successor); + augment->propagate(parent, successor); + } + + successor->rb_left = tmp = node->rb_left; + rb_set_parent(tmp, successor); + + pc = node->__rb_parent_color; + tmp = __rb_parent(pc); + __rb_change_child(node, successor, tmp, root); + if (child2) { + successor->__rb_parent_color = pc; + rb_set_parent_color(child2, parent, RB_BLACK); + rebalance = NULL; + } else { + unsigned long pc2 = successor->__rb_parent_color; + successor->__rb_parent_color = pc; + rebalance = __rb_is_black(pc2) ? parent : NULL; + } + tmp = successor; + } + + augment->propagate(tmp, NULL); + return rebalance; +} + +static __always_inline void +rb_erase_augmented(struct rb_node *node, struct rb_root *root, + const struct rb_augment_callbacks *augment) +{ + struct rb_node *rebalance = __rb_erase_augmented(node, root, augment); + if (rebalance) + __rb_erase_color(rebalance, root, augment->rotate); +} + +#endif /* _TOOLS_LINUX_RBTREE_AUGMENTED_H */ diff --git a/tools/include/linux/types.h b/tools/include/linux/types.h index b5cf25e05df2..8ebf6278b2ef 100644 --- a/tools/include/linux/types.h +++ b/tools/include/linux/types.h @@ -60,6 +60,14 @@ typedef __u32 __bitwise __be32; typedef __u64 __bitwise __le64; typedef __u64 __bitwise __be64; +typedef struct { + int counter; +} atomic_t; + +#ifndef __aligned_u64 +# define __aligned_u64 __u64 __attribute__((aligned(8))) +#endif + struct list_head { struct list_head *next, *prev; }; diff --git a/tools/laptop/freefall/Makefile b/tools/laptop/freefall/Makefile new file mode 100644 index 000000000000..48c6c9328419 --- /dev/null +++ b/tools/laptop/freefall/Makefile @@ -0,0 +1,17 @@ +PREFIX ?= /usr +SBINDIR ?= sbin +INSTALL ?= install +CC = $(CROSS_COMPILE)gcc + +TARGET = freefall + +all: $(TARGET) + +%: %.c + $(CC) $(CFLAGS) $(LDFLAGS) -o $@ $< + +clean: + $(RM) $(TARGET) + +install: freefall + $(INSTALL) -D -m 755 $(TARGET) $(DESTDIR)$(PREFIX)/$(SBINDIR)/$(TARGET) diff --git a/tools/laptop/freefall/freefall.c b/tools/laptop/freefall/freefall.c new file mode 100644 index 000000000000..5e44b20b1848 --- /dev/null +++ b/tools/laptop/freefall/freefall.c @@ -0,0 +1,174 @@ +/* Disk protection for HP/DELL machines. + * + * Copyright 2008 Eric Piel + * Copyright 2009 Pavel Machek <pavel@ucw.cz> + * Copyright 2012 Sonal Santan + * Copyright 2014 Pali Rohár <pali.rohar@gmail.com> + * + * GPLv2. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <fcntl.h> +#include <sys/stat.h> +#include <sys/types.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> +#include <signal.h> +#include <sys/mman.h> +#include <sched.h> +#include <syslog.h> + +static int noled; +static char unload_heads_path[64]; +static char device_path[32]; +static const char app_name[] = "FREE FALL"; + +static int set_unload_heads_path(char *device) +{ + if (strlen(device) <= 5 || strncmp(device, "/dev/", 5) != 0) + return -EINVAL; + strncpy(device_path, device, sizeof(device_path) - 1); + + snprintf(unload_heads_path, sizeof(unload_heads_path) - 1, + "/sys/block/%s/device/unload_heads", device+5); + return 0; +} + +static int valid_disk(void) +{ + int fd = open(unload_heads_path, O_RDONLY); + + if (fd < 0) { + perror(unload_heads_path); + return 0; + } + + close(fd); + return 1; +} + +static void write_int(char *path, int i) +{ + char buf[1024]; + int fd = open(path, O_RDWR); + + if (fd < 0) { + perror("open"); + exit(1); + } + + sprintf(buf, "%d", i); + + if (write(fd, buf, strlen(buf)) != strlen(buf)) { + perror("write"); + exit(1); + } + + close(fd); +} + +static void set_led(int on) +{ + if (noled) + return; + write_int("/sys/class/leds/hp::hddprotect/brightness", on); +} + +static void protect(int seconds) +{ + const char *str = (seconds == 0) ? "Unparked" : "Parked"; + + write_int(unload_heads_path, seconds*1000); + syslog(LOG_INFO, "%s %s disk head\n", str, device_path); +} + +static int on_ac(void) +{ + /* /sys/class/power_supply/AC0/online */ + return 1; +} + +static int lid_open(void) +{ + /* /proc/acpi/button/lid/LID/state */ + return 1; +} + +static void ignore_me(int signum) +{ + protect(0); + set_led(0); +} + +int main(int argc, char **argv) +{ + int fd, ret; + struct stat st; + struct sched_param param; + + if (argc == 1) + ret = set_unload_heads_path("/dev/sda"); + else if (argc == 2) + ret = set_unload_heads_path(argv[1]); + else + ret = -EINVAL; + + if (ret || !valid_disk()) { + fprintf(stderr, "usage: %s <device> (default: /dev/sda)\n", + argv[0]); + exit(1); + } + + fd = open("/dev/freefall", O_RDONLY); + if (fd < 0) { + perror("/dev/freefall"); + return EXIT_FAILURE; + } + + if (stat("/sys/class/leds/hp::hddprotect/brightness", &st)) + noled = 1; + + if (daemon(0, 0) != 0) { + perror("daemon"); + return EXIT_FAILURE; + } + + openlog(app_name, LOG_CONS | LOG_PID | LOG_NDELAY, LOG_LOCAL1); + + param.sched_priority = sched_get_priority_max(SCHED_FIFO); + sched_setscheduler(0, SCHED_FIFO, ¶m); + mlockall(MCL_CURRENT|MCL_FUTURE); + + signal(SIGALRM, ignore_me); + + for (;;) { + unsigned char count; + + ret = read(fd, &count, sizeof(count)); + alarm(0); + if ((ret == -1) && (errno == EINTR)) { + /* Alarm expired, time to unpark the heads */ + continue; + } + + if (ret != sizeof(count)) { + perror("read"); + break; + } + + protect(21); + set_led(1); + if (1 || on_ac() || lid_open()) + alarm(2); + else + alarm(20); + } + + closelog(); + close(fd); + return EXIT_SUCCESS; +} diff --git a/tools/lib/api/Build b/tools/lib/api/Build new file mode 100644 index 000000000000..3653965cf481 --- /dev/null +++ b/tools/lib/api/Build @@ -0,0 +1,2 @@ +libapi-y += fd/ +libapi-y += fs/ diff --git a/tools/lib/api/Makefile b/tools/lib/api/Makefile index 36c08b1f4afb..fe1b02c2c95b 100644 --- a/tools/lib/api/Makefile +++ b/tools/lib/api/Makefile @@ -1,49 +1,43 @@ include ../../scripts/Makefile.include include ../../perf/config/utilities.mak # QUIET_CLEAN +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) +endif + CC = $(CROSS_COMPILE)gcc AR = $(CROSS_COMPILE)ar -# guard against environment variables -LIB_H= -LIB_OBJS= - -LIB_H += fs/debugfs.h -LIB_H += fs/fs.h -# See comment below about piggybacking... -LIB_H += fd/array.h - -LIB_OBJS += $(OUTPUT)fs/debugfs.o -LIB_OBJS += $(OUTPUT)fs/fs.o -# XXX piggybacking here, need to introduce libapikfd, or rename this -# to plain libapik.a and make it have it all api goodies -LIB_OBJS += $(OUTPUT)fd/array.o +MAKEFLAGS += --no-print-directory -LIBFILE = libapikfs.a +LIBFILE = $(OUTPUT)libapi.a -CFLAGS = -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -D_FORTIFY_SOURCE=2 $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) -fPIC -EXTLIBS = -lelf -lpthread -lrt -lm -ALL_CFLAGS = $(CFLAGS) $(BASIC_CFLAGS) -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -ALL_LDFLAGS = $(LDFLAGS) +CFLAGS := $(EXTRA_WARNINGS) $(EXTRA_CFLAGS) +CFLAGS += -ggdb3 -Wall -Wextra -std=gnu99 -Werror -O6 -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=2 -fPIC +CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 RM = rm -f -$(LIBFILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $(OUTPUT)$@ $(LIB_OBJS) +build := -f $(srctree)/tools/build/Makefile.build dir=. obj +API_IN := $(OUTPUT)libapi-in.o -$(LIB_OBJS): $(LIB_H) +export srctree OUTPUT CC LD CFLAGS V -libapi_dirs: - $(QUIET_MKDIR)mkdir -p $(OUTPUT)fd $(OUTPUT)fs +all: $(LIBFILE) -$(OUTPUT)%.o: %.c libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< -$(OUTPUT)%.s: %.c libapi_dirs - $(QUIET_CC)$(CC) -S $(ALL_CFLAGS) $< -$(OUTPUT)%.o: %.S libapi_dirs - $(QUIET_CC)$(CC) -o $@ -c $(ALL_CFLAGS) $< +$(API_IN): FORCE + @$(MAKE) $(build)=libapi + +$(LIBFILE): $(API_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(API_IN) clean: - $(call QUIET_CLEAN, libapi) $(RM) $(LIB_OBJS) $(LIBFILE) + $(call QUIET_CLEAN, libapi) $(RM) $(LIBFILE); \ + find $(if $(OUTPUT),$(OUTPUT),.) -name \*.o -or -name \*.o.cmd -or -name \*.o.d | xargs $(RM) + +FORCE: -.PHONY: clean +.PHONY: clean FORCE diff --git a/tools/lib/api/fd/Build b/tools/lib/api/fd/Build new file mode 100644 index 000000000000..605d99f6d71a --- /dev/null +++ b/tools/lib/api/fd/Build @@ -0,0 +1 @@ +libapi-y += array.o diff --git a/tools/lib/api/fs/Build b/tools/lib/api/fs/Build new file mode 100644 index 000000000000..6de5a4f0b501 --- /dev/null +++ b/tools/lib/api/fs/Build @@ -0,0 +1,4 @@ +libapi-y += fs.o +libapi-y += debugfs.o +libapi-y += findfs.o +libapi-y += tracefs.o diff --git a/tools/lib/api/fs/debugfs.c b/tools/lib/api/fs/debugfs.c index d2b18e887071..8305b3e9d48e 100644 --- a/tools/lib/api/fs/debugfs.c +++ b/tools/lib/api/fs/debugfs.c @@ -3,75 +3,50 @@ #include <stdio.h> #include <stdlib.h> #include <string.h> +#include <unistd.h> #include <stdbool.h> #include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> #include <sys/mount.h> #include <linux/kernel.h> #include "debugfs.h" -char debugfs_mountpoint[PATH_MAX + 1] = "/sys/kernel/debug"; +#ifndef DEBUGFS_DEFAULT_PATH +#define DEBUGFS_DEFAULT_PATH "/sys/kernel/debug" +#endif + +char debugfs_mountpoint[PATH_MAX + 1] = DEBUGFS_DEFAULT_PATH; static const char * const debugfs_known_mountpoints[] = { - "/sys/kernel/debug", + DEBUGFS_DEFAULT_PATH, "/debug", 0, }; static bool debugfs_found; +bool debugfs_configured(void) +{ + return debugfs_find_mountpoint() != NULL; +} + /* find the path to the mounted debugfs */ const char *debugfs_find_mountpoint(void) { - const char * const *ptr; - char type[100]; - FILE *fp; + const char *ret; if (debugfs_found) return (const char *)debugfs_mountpoint; - ptr = debugfs_known_mountpoints; - while (*ptr) { - if (debugfs_valid_mountpoint(*ptr) == 0) { - debugfs_found = true; - strcpy(debugfs_mountpoint, *ptr); - return debugfs_mountpoint; - } - ptr++; - } - - /* give up and parse /proc/mounts */ - fp = fopen("/proc/mounts", "r"); - if (fp == NULL) - return NULL; - - while (fscanf(fp, "%*s %" STR(PATH_MAX) "s %99s %*s %*d %*d\n", - debugfs_mountpoint, type) == 2) { - if (strcmp(type, "debugfs") == 0) - break; - } - fclose(fp); + ret = find_mountpoint("debugfs", (long) DEBUGFS_MAGIC, + debugfs_mountpoint, PATH_MAX + 1, + debugfs_known_mountpoints); + if (ret) + debugfs_found = true; - if (strcmp(type, "debugfs") != 0) - return NULL; - - debugfs_found = true; - - return debugfs_mountpoint; -} - -/* verify that a mountpoint is actually a debugfs instance */ - -int debugfs_valid_mountpoint(const char *debugfs) -{ - struct statfs st_fs; - - if (statfs(debugfs, &st_fs) < 0) - return -ENOENT; - else if ((long)st_fs.f_type != (long)DEBUGFS_MAGIC) - return -ENOENT; - - return 0; + return ret; } /* mount the debugfs somewhere if it's not mounted */ @@ -87,7 +62,7 @@ char *debugfs_mount(const char *mountpoint) mountpoint = getenv(PERF_DEBUGFS_ENVIRONMENT); /* if no environment variable, use default */ if (mountpoint == NULL) - mountpoint = "/sys/kernel/debug"; + mountpoint = DEBUGFS_DEFAULT_PATH; } if (mount(NULL, mountpoint, "debugfs", 0, NULL) < 0) diff --git a/tools/lib/api/fs/debugfs.h b/tools/lib/api/fs/debugfs.h index 0739881a9897..455023698d2b 100644 --- a/tools/lib/api/fs/debugfs.h +++ b/tools/lib/api/fs/debugfs.h @@ -1,16 +1,7 @@ #ifndef __API_DEBUGFS_H__ #define __API_DEBUGFS_H__ -#define _STR(x) #x -#define STR(x) _STR(x) - -/* - * On most systems <limits.h> would have given us this, but not on some systems - * (e.g. GNU/Hurd). - */ -#ifndef PATH_MAX -#define PATH_MAX 4096 -#endif +#include "findfs.h" #ifndef DEBUGFS_MAGIC #define DEBUGFS_MAGIC 0x64626720 @@ -20,8 +11,8 @@ #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" #endif +bool debugfs_configured(void); const char *debugfs_find_mountpoint(void); -int debugfs_valid_mountpoint(const char *debugfs); char *debugfs_mount(const char *mountpoint); extern char debugfs_mountpoint[]; diff --git a/tools/lib/api/fs/findfs.c b/tools/lib/api/fs/findfs.c new file mode 100644 index 000000000000..49946cb6d7af --- /dev/null +++ b/tools/lib/api/fs/findfs.c @@ -0,0 +1,63 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <stdbool.h> +#include <sys/vfs.h> + +#include "findfs.h" + +/* verify that a mountpoint is actually the type we want */ + +int valid_mountpoint(const char *mount, long magic) +{ + struct statfs st_fs; + + if (statfs(mount, &st_fs) < 0) + return -ENOENT; + else if ((long)st_fs.f_type != magic) + return -ENOENT; + + return 0; +} + +/* find the path to a mounted file system */ +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints) +{ + const char * const *ptr; + char format[128]; + char type[100]; + FILE *fp; + + if (known_mountpoints) { + ptr = known_mountpoints; + while (*ptr) { + if (valid_mountpoint(*ptr, magic) == 0) { + strncpy(mountpoint, *ptr, len - 1); + mountpoint[len-1] = 0; + return mountpoint; + } + ptr++; + } + } + + /* give up and parse /proc/mounts */ + fp = fopen("/proc/mounts", "r"); + if (fp == NULL) + return NULL; + + snprintf(format, 128, "%%*s %%%ds %%99s %%*s %%*d %%*d\n", len); + + while (fscanf(fp, format, mountpoint, type) == 2) { + if (strcmp(type, fstype) == 0) + break; + } + fclose(fp); + + if (strcmp(type, fstype) != 0) + return NULL; + + return mountpoint; +} diff --git a/tools/lib/api/fs/findfs.h b/tools/lib/api/fs/findfs.h new file mode 100644 index 000000000000..b6f5d05acc42 --- /dev/null +++ b/tools/lib/api/fs/findfs.h @@ -0,0 +1,23 @@ +#ifndef __API_FINDFS_H__ +#define __API_FINDFS_H__ + +#include <stdbool.h> + +#define _STR(x) #x +#define STR(x) _STR(x) + +/* + * On most systems <limits.h> would have given us this, but not on some systems + * (e.g. GNU/Hurd). + */ +#ifndef PATH_MAX +#define PATH_MAX 4096 +#endif + +const char *find_mountpoint(const char *fstype, long magic, + char *mountpoint, int len, + const char * const *known_mountpoints); + +int valid_mountpoint(const char *mount, long magic); + +#endif /* __API_FINDFS_H__ */ diff --git a/tools/lib/api/fs/tracefs.c b/tools/lib/api/fs/tracefs.c new file mode 100644 index 000000000000..e4aa9688b71e --- /dev/null +++ b/tools/lib/api/fs/tracefs.c @@ -0,0 +1,78 @@ +#include <errno.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <unistd.h> +#include <stdbool.h> +#include <sys/vfs.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/mount.h> +#include <linux/kernel.h> + +#include "tracefs.h" + +#ifndef TRACEFS_DEFAULT_PATH +#define TRACEFS_DEFAULT_PATH "/sys/kernel/tracing" +#endif + +char tracefs_mountpoint[PATH_MAX + 1] = TRACEFS_DEFAULT_PATH; + +static const char * const tracefs_known_mountpoints[] = { + TRACEFS_DEFAULT_PATH, + "/sys/kernel/debug/tracing", + "/tracing", + "/trace", + 0, +}; + +static bool tracefs_found; + +bool tracefs_configured(void) +{ + return tracefs_find_mountpoint() != NULL; +} + +/* find the path to the mounted tracefs */ +const char *tracefs_find_mountpoint(void) +{ + const char *ret; + + if (tracefs_found) + return (const char *)tracefs_mountpoint; + + ret = find_mountpoint("tracefs", (long) TRACEFS_MAGIC, + tracefs_mountpoint, PATH_MAX + 1, + tracefs_known_mountpoints); + + if (ret) + tracefs_found = true; + + return ret; +} + +/* mount the tracefs somewhere if it's not mounted */ +char *tracefs_mount(const char *mountpoint) +{ + /* see if it's already mounted */ + if (tracefs_find_mountpoint()) + goto out; + + /* if not mounted and no argument */ + if (mountpoint == NULL) { + /* see if environment variable set */ + mountpoint = getenv(PERF_TRACEFS_ENVIRONMENT); + /* if no environment variable, use default */ + if (mountpoint == NULL) + mountpoint = TRACEFS_DEFAULT_PATH; + } + + if (mount(NULL, mountpoint, "tracefs", 0, NULL) < 0) + return NULL; + + /* save the mountpoint */ + tracefs_found = true; + strncpy(tracefs_mountpoint, mountpoint, sizeof(tracefs_mountpoint)); +out: + return tracefs_mountpoint; +} diff --git a/tools/lib/api/fs/tracefs.h b/tools/lib/api/fs/tracefs.h new file mode 100644 index 000000000000..da780ac49acb --- /dev/null +++ b/tools/lib/api/fs/tracefs.h @@ -0,0 +1,21 @@ +#ifndef __API_TRACEFS_H__ +#define __API_TRACEFS_H__ + +#include "findfs.h" + +#ifndef TRACEFS_MAGIC +#define TRACEFS_MAGIC 0x74726163 +#endif + +#ifndef PERF_TRACEFS_ENVIRONMENT +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" +#endif + +bool tracefs_configured(void); +const char *tracefs_find_mountpoint(void); +int tracefs_valid_mountpoint(const char *debugfs); +char *tracefs_mount(const char *mountpoint); + +extern char tracefs_mountpoint[]; + +#endif /* __API_DEBUGFS_H__ */ diff --git a/tools/lib/hweight.c b/tools/lib/hweight.c new file mode 100644 index 000000000000..0b859b884339 --- /dev/null +++ b/tools/lib/hweight.c @@ -0,0 +1,62 @@ +#include <linux/bitops.h> +#include <asm/types.h> + +/** + * hweightN - returns the hamming weight of a N-bit word + * @x: the word to weigh + * + * The Hamming Weight of a number is the total number of bits set in it. + */ + +unsigned int __sw_hweight32(unsigned int w) +{ +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x55555555; + w = (w & 0x33333333) + ((w >> 2) & 0x33333333); + w = (w + (w >> 4)) & 0x0f0f0f0f; + return (w * 0x01010101) >> 24; +#else + unsigned int res = w - ((w >> 1) & 0x55555555); + res = (res & 0x33333333) + ((res >> 2) & 0x33333333); + res = (res + (res >> 4)) & 0x0F0F0F0F; + res = res + (res >> 8); + return (res + (res >> 16)) & 0x000000FF; +#endif +} + +unsigned int __sw_hweight16(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x5555); + res = (res & 0x3333) + ((res >> 2) & 0x3333); + res = (res + (res >> 4)) & 0x0F0F; + return (res + (res >> 8)) & 0x00FF; +} + +unsigned int __sw_hweight8(unsigned int w) +{ + unsigned int res = w - ((w >> 1) & 0x55); + res = (res & 0x33) + ((res >> 2) & 0x33); + return (res + (res >> 4)) & 0x0F; +} + +unsigned long __sw_hweight64(__u64 w) +{ +#if BITS_PER_LONG == 32 + return __sw_hweight32((unsigned int)(w >> 32)) + + __sw_hweight32((unsigned int)w); +#elif BITS_PER_LONG == 64 +#ifdef CONFIG_ARCH_HAS_FAST_MULTIPLIER + w -= (w >> 1) & 0x5555555555555555ul; + w = (w & 0x3333333333333333ul) + ((w >> 2) & 0x3333333333333333ul); + w = (w + (w >> 4)) & 0x0f0f0f0f0f0f0f0ful; + return (w * 0x0101010101010101ul) >> 56; +#else + __u64 res = w - ((w >> 1) & 0x5555555555555555ul); + res = (res & 0x3333333333333333ul) + ((res >> 2) & 0x3333333333333333ul); + res = (res + (res >> 4)) & 0x0F0F0F0F0F0F0F0Ful; + res = res + (res >> 8); + res = res + (res >> 16); + return (res + (res >> 32)) & 0x00000000000000FFul; +#endif +#endif +} diff --git a/tools/lib/lockdep/Build b/tools/lib/lockdep/Build new file mode 100644 index 000000000000..6f667355b068 --- /dev/null +++ b/tools/lib/lockdep/Build @@ -0,0 +1 @@ +liblockdep-y += common.o lockdep.o preload.o rbtree.o diff --git a/tools/lib/lockdep/Makefile b/tools/lib/lockdep/Makefile index 4b866c54f624..18ffccf00426 100644 --- a/tools/lib/lockdep/Makefile +++ b/tools/lib/lockdep/Makefile @@ -14,9 +14,10 @@ define allow-override $(eval $(1) = $(2))) endef -# Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) INSTALL = install @@ -35,6 +36,10 @@ bindir = $(prefix)/$(bindir_relative) export DESTDIR DESTDIR_SQ INSTALL +MAKEFLAGS += --no-print-directory + +include ../../scripts/Makefile.include + # copy a bit from Linux kbuild ifeq ("$(origin V)", "command line") @@ -44,56 +49,21 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(BUILD_OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)$(MAKE) -C $(BUILD_OUTPUT) \ - BUILD_SRC=$(CURDIR) -f $(CURDIR)/Makefile $1 -endef - -saved-output := $(BUILD_OUTPUT) -BUILD_OUTPUT := $(shell cd $(BUILD_OUTPUT) && /bin/pwd) -$(if $(BUILD_OUTPUT),, \ - $(error output directory "$(saved-output)" does not exist)) - -all: sub-make - -gui: force - $(call build_output, all_cmd) - -$(filter-out gui,$(MAKECMDGOALS)): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # BUILD_OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(realpath $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR))) -objtree := $(realpath $(CURDIR)) -src := $(srctree) -obj := $(objtree) - -export prefix libdir bindir src obj - # Shell quotes libdir_SQ = $(subst ','\'',$(libdir)) bindir_SQ = $(subst ','\'',$(bindir)) -LIB_FILE = liblockdep.a liblockdep.so.$(LIBLOCKDEP_VERSION) +LIB_IN := $(OUTPUT)liblockdep-in.o + BIN_FILE = lockdep +LIB_FILE = $(OUTPUT)liblockdep.a $(OUTPUT)liblockdep.so.$(LIBLOCKDEP_VERSION) CONFIG_INCLUDES = CONFIG_LIBS = @@ -108,33 +78,23 @@ INCLUDES = -I. -I./uinclude -I./include -I../../include $(CONFIG_INCLUDES) # Set compile option CFLAGS if not set elsewhere CFLAGS ?= -g -DCONFIG_LOCKDEP -DCONFIG_STACKTRACE -DCONFIG_PROVE_LOCKING -DBITS_PER_LONG=__WORDSIZE -DLIBLOCKDEP_VERSION='"$(LIBLOCKDEP_VERSION)"' -rdynamic -O0 -g +CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) ifeq ($(VERBOSE),1) Q = - print_compile = - print_app_build = - print_fpic_compile = print_shared_lib_compile = print_install = else Q = @ - print_compile = echo ' CC '$(OBJ); - print_app_build = echo ' BUILD '$(OBJ); - print_fpic_compile = echo ' CC FPIC '$(OBJ); - print_shared_lib_compile = echo ' BUILD SHARED LIB '$(OBJ); - print_static_lib_build = echo ' BUILD STATIC LIB '$(OBJ); - print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; + print_shared_lib_compile = echo ' LD '$(OBJ); + print_static_lib_build = echo ' LD '$(OBJ); + print_install = echo ' INSTALL '$1' to $(DESTDIR_SQ)$2'; endif -do_fpic_compile = \ - ($(print_fpic_compile) \ - $(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@) - -do_app_build = \ - ($(print_app_build) \ - $(CC) $^ -rdynamic -o $@ $(CONFIG_LIBS) $(LIBS)) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj do_compile_shared_library = \ ($(print_shared_lib_compile) \ @@ -144,22 +104,6 @@ do_build_static_lib = \ ($(print_static_lib_build) \ $(RM) $@; $(AR) rcs $@ $^) - -define do_compile - $(print_compile) \ - $(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; -endef - -$(obj)/%.o: $(src)/%.c - $(Q)$(call do_compile) - -%.o: $(src)/%.c - $(Q)$(call do_compile) - -PEVENT_LIB_OBJS = common.o lockdep.o preload.o rbtree.o - -ALL_OBJS = $(PEVENT_LIB_OBJS) - CMD_TARGETS = $(LIB_FILE) TARGETS = $(CMD_TARGETS) @@ -169,42 +113,15 @@ all: all_cmd all_cmd: $(CMD_TARGETS) -liblockdep.so.$(LIBLOCKDEP_VERSION): $(PEVENT_LIB_OBJS) +$(LIB_IN): force + $(Q)$(MAKE) $(build)=liblockdep + +liblockdep.so.$(LIBLOCKDEP_VERSION): $(LIB_IN) $(Q)$(do_compile_shared_library) -liblockdep.a: $(PEVENT_LIB_OBJS) +liblockdep.a: $(LIB_IN) $(Q)$(do_build_static_lib) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c - $(Q)$(do_fpic_compile) - -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -233,8 +150,6 @@ clean: $(RM) *.o *~ $(TARGETS) *.a *liblockdep*.so* $(VERSION_FILES) .*.d $(RM) tags TAGS -endif # skip-makefile - PHONY += force force: diff --git a/tools/lib/lockdep/uinclude/linux/kernel.h b/tools/lib/lockdep/uinclude/linux/kernel.h index a11e3c357be7..cd2cc59a5da7 100644 --- a/tools/lib/lockdep/uinclude/linux/kernel.h +++ b/tools/lib/lockdep/uinclude/linux/kernel.h @@ -28,6 +28,9 @@ #define __init #define noinline #define list_add_tail_rcu list_add_tail +#define list_for_each_entry_rcu list_for_each_entry +#define barrier() +#define synchronize_sched() #ifndef CALLER_ADDR0 #define CALLER_ADDR0 ((unsigned long)__builtin_return_address(0)) diff --git a/tools/lib/rbtree.c b/tools/lib/rbtree.c new file mode 100644 index 000000000000..17c2b596f043 --- /dev/null +++ b/tools/lib/rbtree.c @@ -0,0 +1,548 @@ +/* + Red Black Trees + (C) 1999 Andrea Arcangeli <andrea@suse.de> + (C) 2002 David Woodhouse <dwmw2@infradead.org> + (C) 2012 Michel Lespinasse <walken@google.com> + + This program is free software; you can redistribute it and/or modify + it under the terms of the GNU General Public License as published by + the Free Software Foundation; either version 2 of the License, or + (at your option) any later version. + + This program is distributed in the hope that it will be useful, + but WITHOUT ANY WARRANTY; without even the implied warranty of + MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + GNU General Public License for more details. + + You should have received a copy of the GNU General Public License + along with this program; if not, write to the Free Software + Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + + linux/lib/rbtree.c +*/ + +#include <linux/rbtree_augmented.h> + +/* + * red-black trees properties: http://en.wikipedia.org/wiki/Rbtree + * + * 1) A node is either red or black + * 2) The root is black + * 3) All leaves (NULL) are black + * 4) Both children of every red node are black + * 5) Every simple path from root to leaves contains the same number + * of black nodes. + * + * 4 and 5 give the O(log n) guarantee, since 4 implies you cannot have two + * consecutive red nodes in a path and every red node is therefore followed by + * a black. So if B is the number of black nodes on every simple path (as per + * 5), then the longest possible path due to 4 is 2B. + * + * We shall indicate color with case, where black nodes are uppercase and red + * nodes will be lowercase. Unknown color nodes shall be drawn as red within + * parentheses and have some accompanying text comment. + */ + +static inline void rb_set_black(struct rb_node *rb) +{ + rb->__rb_parent_color |= RB_BLACK; +} + +static inline struct rb_node *rb_red_parent(struct rb_node *red) +{ + return (struct rb_node *)red->__rb_parent_color; +} + +/* + * Helper function for rotations: + * - old's parent and color get assigned to new + * - old gets assigned new as a parent and 'color' as a color. + */ +static inline void +__rb_rotate_set_parents(struct rb_node *old, struct rb_node *new, + struct rb_root *root, int color) +{ + struct rb_node *parent = rb_parent(old); + new->__rb_parent_color = old->__rb_parent_color; + rb_set_parent_color(old, new, color); + __rb_change_child(old, new, parent, root); +} + +static __always_inline void +__rb_insert(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *parent = rb_red_parent(node), *gparent, *tmp; + + while (true) { + /* + * Loop invariant: node is red + * + * If there is a black parent, we are done. + * Otherwise, take some corrective action as we don't + * want a red root or two consecutive red nodes. + */ + if (!parent) { + rb_set_parent_color(node, NULL, RB_BLACK); + break; + } else if (rb_is_black(parent)) + break; + + gparent = rb_red_parent(parent); + + tmp = gparent->rb_right; + if (parent != tmp) { /* parent == gparent->rb_left */ + if (tmp && rb_is_red(tmp)) { + /* + * Case 1 - color flips + * + * G g + * / \ / \ + * p u --> P U + * / / + * n n + * + * However, since g's parent might be red, and + * 4) does not allow this, we need to recurse + * at g. + */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_right; + if (node == tmp) { + /* + * Case 2 - left rotate at parent + * + * G G + * / \ / \ + * p U --> n U + * \ / + * n p + * + * This still leaves us in violation of 4), the + * continuation into Case 3 will fix that. + */ + parent->rb_right = tmp = node->rb_left; + node->rb_left = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_right; + } + + /* + * Case 3 - right rotate at gparent + * + * G P + * / \ / \ + * p U --> n g + * / \ + * n U + */ + gparent->rb_left = tmp; /* == parent->rb_right */ + parent->rb_right = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } else { + tmp = gparent->rb_left; + if (tmp && rb_is_red(tmp)) { + /* Case 1 - color flips */ + rb_set_parent_color(tmp, gparent, RB_BLACK); + rb_set_parent_color(parent, gparent, RB_BLACK); + node = gparent; + parent = rb_parent(node); + rb_set_parent_color(node, parent, RB_RED); + continue; + } + + tmp = parent->rb_left; + if (node == tmp) { + /* Case 2 - right rotate at parent */ + parent->rb_left = tmp = node->rb_right; + node->rb_right = parent; + if (tmp) + rb_set_parent_color(tmp, parent, + RB_BLACK); + rb_set_parent_color(parent, node, RB_RED); + augment_rotate(parent, node); + parent = node; + tmp = node->rb_left; + } + + /* Case 3 - left rotate at gparent */ + gparent->rb_right = tmp; /* == parent->rb_left */ + parent->rb_left = gparent; + if (tmp) + rb_set_parent_color(tmp, gparent, RB_BLACK); + __rb_rotate_set_parents(gparent, parent, root, RB_RED); + augment_rotate(gparent, parent); + break; + } + } +} + +/* + * Inline version for rb_erase() use - we want to be able to inline + * and eliminate the dummy_rotate callback there + */ +static __always_inline void +____rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + struct rb_node *node = NULL, *sibling, *tmp1, *tmp2; + + while (true) { + /* + * Loop invariants: + * - node is black (or NULL on first iteration) + * - node is not the root (parent is not NULL) + * - All leaf paths going through parent and node have a + * black node count that is 1 lower than other leaf paths. + */ + sibling = parent->rb_right; + if (node != sibling) { /* node == parent->rb_left */ + if (rb_is_red(sibling)) { + /* + * Case 1 - left rotate at parent + * + * P S + * / \ / \ + * N s --> p Sr + * / \ / \ + * Sl Sr N Sl + */ + parent->rb_right = tmp1 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_right; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_left; + if (!tmp2 || rb_is_black(tmp2)) { + /* + * Case 2 - sibling color flip + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N s + * / \ / \ + * Sl Sr Sl Sr + * + * This leaves us violating 5) which + * can be fixed by flipping p to black + * if it was red, or by recursing at p. + * p is red when coming from Case 1. + */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* + * Case 3 - right rotate at sibling + * (p could be either color here) + * + * (p) (p) + * / \ / \ + * N S --> N Sl + * / \ \ + * sl Sr s + * \ + * Sr + */ + sibling->rb_left = tmp1 = tmp2->rb_right; + tmp2->rb_right = sibling; + parent->rb_right = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* + * Case 4 - left rotate at parent + color flips + * (p and sl could be either color here. + * After rotation, p becomes black, s acquires + * p's color, and sl keeps its color) + * + * (p) (s) + * / \ / \ + * N S --> P Sr + * / \ / \ + * (sl) sr N (sl) + */ + parent->rb_right = tmp2 = sibling->rb_left; + sibling->rb_left = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } else { + sibling = parent->rb_left; + if (rb_is_red(sibling)) { + /* Case 1 - right rotate at parent */ + parent->rb_left = tmp1 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, parent, RB_BLACK); + __rb_rotate_set_parents(parent, sibling, root, + RB_RED); + augment_rotate(parent, sibling); + sibling = tmp1; + } + tmp1 = sibling->rb_left; + if (!tmp1 || rb_is_black(tmp1)) { + tmp2 = sibling->rb_right; + if (!tmp2 || rb_is_black(tmp2)) { + /* Case 2 - sibling color flip */ + rb_set_parent_color(sibling, parent, + RB_RED); + if (rb_is_red(parent)) + rb_set_black(parent); + else { + node = parent; + parent = rb_parent(node); + if (parent) + continue; + } + break; + } + /* Case 3 - right rotate at sibling */ + sibling->rb_right = tmp1 = tmp2->rb_left; + tmp2->rb_left = sibling; + parent->rb_left = tmp2; + if (tmp1) + rb_set_parent_color(tmp1, sibling, + RB_BLACK); + augment_rotate(sibling, tmp2); + tmp1 = sibling; + sibling = tmp2; + } + /* Case 4 - left rotate at parent + color flips */ + parent->rb_left = tmp2 = sibling->rb_right; + sibling->rb_right = parent; + rb_set_parent_color(tmp1, sibling, RB_BLACK); + if (tmp2) + rb_set_parent(tmp2, parent); + __rb_rotate_set_parents(parent, sibling, root, + RB_BLACK); + augment_rotate(parent, sibling); + break; + } + } +} + +/* Non-inline version for rb_erase_augmented() use */ +void __rb_erase_color(struct rb_node *parent, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + ____rb_erase_color(parent, root, augment_rotate); +} + +/* + * Non-augmented rbtree manipulation functions. + * + * We use dummy augmented callbacks here, and have the compiler optimize them + * out of the rb_insert_color() and rb_erase() function definitions. + */ + +static inline void dummy_propagate(struct rb_node *node, struct rb_node *stop) {} +static inline void dummy_copy(struct rb_node *old, struct rb_node *new) {} +static inline void dummy_rotate(struct rb_node *old, struct rb_node *new) {} + +static const struct rb_augment_callbacks dummy_callbacks = { + dummy_propagate, dummy_copy, dummy_rotate +}; + +void rb_insert_color(struct rb_node *node, struct rb_root *root) +{ + __rb_insert(node, root, dummy_rotate); +} + +void rb_erase(struct rb_node *node, struct rb_root *root) +{ + struct rb_node *rebalance; + rebalance = __rb_erase_augmented(node, root, &dummy_callbacks); + if (rebalance) + ____rb_erase_color(rebalance, root, dummy_rotate); +} + +/* + * Augmented rbtree manipulation functions. + * + * This instantiates the same __always_inline functions as in the non-augmented + * case, but this time with user-defined callbacks. + */ + +void __rb_insert_augmented(struct rb_node *node, struct rb_root *root, + void (*augment_rotate)(struct rb_node *old, struct rb_node *new)) +{ + __rb_insert(node, root, augment_rotate); +} + +/* + * This function returns the first node (in sort order) of the tree. + */ +struct rb_node *rb_first(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_left) + n = n->rb_left; + return n; +} + +struct rb_node *rb_last(const struct rb_root *root) +{ + struct rb_node *n; + + n = root->rb_node; + if (!n) + return NULL; + while (n->rb_right) + n = n->rb_right; + return n; +} + +struct rb_node *rb_next(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a right-hand child, go down and then left as far + * as we can. + */ + if (node->rb_right) { + node = node->rb_right; + while (node->rb_left) + node=node->rb_left; + return (struct rb_node *)node; + } + + /* + * No right-hand children. Everything down and left is smaller than us, + * so any 'next' node must be in the general direction of our parent. + * Go up the tree; any time the ancestor is a right-hand child of its + * parent, keep going up. First time it's a left-hand child of its + * parent, said parent is our 'next' node. + */ + while ((parent = rb_parent(node)) && node == parent->rb_right) + node = parent; + + return parent; +} + +struct rb_node *rb_prev(const struct rb_node *node) +{ + struct rb_node *parent; + + if (RB_EMPTY_NODE(node)) + return NULL; + + /* + * If we have a left-hand child, go down and then right as far + * as we can. + */ + if (node->rb_left) { + node = node->rb_left; + while (node->rb_right) + node=node->rb_right; + return (struct rb_node *)node; + } + + /* + * No left-hand children. Go up till we find an ancestor which + * is a right-hand child of its parent. + */ + while ((parent = rb_parent(node)) && node == parent->rb_left) + node = parent; + + return parent; +} + +void rb_replace_node(struct rb_node *victim, struct rb_node *new, + struct rb_root *root) +{ + struct rb_node *parent = rb_parent(victim); + + /* Set the surrounding nodes to point to the replacement */ + __rb_change_child(victim, new, parent, root); + if (victim->rb_left) + rb_set_parent(victim->rb_left, new); + if (victim->rb_right) + rb_set_parent(victim->rb_right, new); + + /* Copy the pointers/colour from the victim to the replacement */ + *new = *victim; +} + +static struct rb_node *rb_left_deepest_node(const struct rb_node *node) +{ + for (;;) { + if (node->rb_left) + node = node->rb_left; + else if (node->rb_right) + node = node->rb_right; + else + return (struct rb_node *)node; + } +} + +struct rb_node *rb_next_postorder(const struct rb_node *node) +{ + const struct rb_node *parent; + if (!node) + return NULL; + parent = rb_parent(node); + + /* If we're sitting on node, we've already seen our children */ + if (parent && node == parent->rb_left && parent->rb_right) { + /* If we are the parent's left node, go to the parent's right + * node then all the way down to the left */ + return rb_left_deepest_node(parent->rb_right); + } else + /* Otherwise we are the parent's right node, and the parent + * should be next */ + return (struct rb_node *)parent; +} + +struct rb_node *rb_first_postorder(const struct rb_root *root) +{ + if (!root->rb_node) + return NULL; + + return rb_left_deepest_node(root->rb_node); +} diff --git a/tools/lib/traceevent/.gitignore b/tools/lib/traceevent/.gitignore index 35f56be5a4cd..3c60335fe7be 100644 --- a/tools/lib/traceevent/.gitignore +++ b/tools/lib/traceevent/.gitignore @@ -1 +1,2 @@ TRACEEVENT-CFLAGS +libtraceevent-dynamic-list diff --git a/tools/lib/traceevent/Build b/tools/lib/traceevent/Build new file mode 100644 index 000000000000..c681d0575d16 --- /dev/null +++ b/tools/lib/traceevent/Build @@ -0,0 +1,17 @@ +libtraceevent-y += event-parse.o +libtraceevent-y += event-plugin.o +libtraceevent-y += trace-seq.o +libtraceevent-y += parse-filter.o +libtraceevent-y += parse-utils.o +libtraceevent-y += kbuffer-parse.o + +plugin_jbd2-y += plugin_jbd2.o +plugin_hrtimer-y += plugin_hrtimer.o +plugin_kmem-y += plugin_kmem.o +plugin_kvm-y += plugin_kvm.o +plugin_mac80211-y += plugin_mac80211.o +plugin_sched_switch-y += plugin_sched_switch.o +plugin_function-y += plugin_function.o +plugin_xen-y += plugin_xen.o +plugin_scsi-y += plugin_scsi.o +plugin_cfg80211-y += plugin_cfg80211.o diff --git a/tools/lib/traceevent/Makefile b/tools/lib/traceevent/Makefile index 005c9cc06935..7851df1490e0 100644 --- a/tools/lib/traceevent/Makefile +++ b/tools/lib/traceevent/Makefile @@ -23,6 +23,7 @@ endef # Allow setting CC and AR, or setting CROSS_COMPILE as a prefix. $(call allow-override,CC,$(CROSS_COMPILE)gcc) $(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,NM,$(CROSS_COMPILE)nm) EXT = -std=gnu99 INSTALL = install @@ -34,9 +35,15 @@ INSTALL = install DESTDIR ?= DESTDIR_SQ = '$(subst ','\'',$(DESTDIR))' +LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) +ifeq ($(LP64), 1) + libdir_relative = lib64 +else + libdir_relative = lib +endif + prefix ?= /usr/local -bindir_relative = bin -bindir = $(prefix)/$(bindir_relative) +libdir = $(prefix)/$(libdir_relative) man_dir = $(prefix)/share/man man_dir_SQ = '$(subst ','\'',$(man_dir))' @@ -58,7 +65,7 @@ ifeq ($(prefix),$(HOME)) override plugin_dir = $(HOME)/.traceevent/plugins set_plugin_dir := 0 else -override plugin_dir = $(prefix)/lib/traceevent/plugins +override plugin_dir = $(libdir)/traceevent/plugins endif endif @@ -67,7 +74,7 @@ PLUGIN_DIR = -DPLUGIN_DIR="$(plugin_dir)" PLUGIN_DIR_SQ = '$(subst ','\'',$(PLUGIN_DIR))' endif -include $(if $(BUILD_SRC),$(BUILD_SRC)/)../../scripts/Makefile.include +include ../../scripts/Makefile.include # copy a bit from Linux kbuild @@ -78,45 +85,18 @@ ifndef VERBOSE VERBOSE = 0 endif -ifeq ("$(origin O)", "command line") - BUILD_OUTPUT := $(O) +ifeq ($(srctree),) +srctree := $(patsubst %/,%,$(dir $(shell pwd))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +srctree := $(patsubst %/,%,$(dir $(srctree))) +#$(info Determined 'srctree' to be $(srctree)) endif -ifeq ($(BUILD_SRC),) -ifneq ($(OUTPUT),) - -define build_output - $(if $(VERBOSE:1=),@)+$(MAKE) -C $(OUTPUT) \ - BUILD_SRC=$(CURDIR)/ -f $(CURDIR)/Makefile $1 -endef - -all: sub-make - -$(MAKECMDGOALS): sub-make - -sub-make: force - $(call build_output, $(MAKECMDGOALS)) - - -# Leave processing to above invocation of make -skip-makefile := 1 - -endif # OUTPUT -endif # BUILD_SRC - -# We process the rest of the Makefile if this is the final invocation of make -ifeq ($(skip-makefile),) - -srctree := $(if $(BUILD_SRC),$(BUILD_SRC),$(CURDIR)) -objtree := $(CURDIR) -src := $(srctree) -obj := $(objtree) - -export prefix bindir src obj +export prefix libdir src obj # Shell quotes -bindir_SQ = $(subst ','\'',$(bindir)) -bindir_relative_SQ = $(subst ','\'',$(bindir_relative)) +libdir_SQ = $(subst ','\'',$(libdir)) +libdir_relative_SQ = $(subst ','\'',$(libdir_relative)) plugin_dir_SQ = $(subst ','\'',$(plugin_dir)) LIB_FILE = libtraceevent.a libtraceevent.so @@ -132,16 +112,19 @@ EXTRAVERSION = $(EP_EXTRAVERSION) OBJ = $@ N = -export Q VERBOSE - EVENT_PARSE_VERSION = $(EP_VERSION).$(EP_PATCHLEVEL).$(EP_EXTRAVERSION) -INCLUDES = -I. -I $(srctree)/../../include $(CONFIG_INCLUDES) +INCLUDES = -I. -I $(srctree)/tools/include $(CONFIG_INCLUDES) -# Set compile option CFLAGS if not set elsewhere -CFLAGS ?= -g -Wall +# Set compile option CFLAGS +ifdef EXTRA_CFLAGS + CFLAGS := $(EXTRA_CFLAGS) +else + CFLAGS := -g -Wall +endif # Append required CFLAGS +override CFLAGS += -fPIC override CFLAGS += $(CONFIG_FLAGS) $(INCLUDES) $(PLUGIN_DIR_SQ) override CFLAGS += $(udis86-flags) -D_GNU_SOURCE @@ -151,74 +134,62 @@ else Q = @ endif -do_compile_shared_library = \ - ($(print_shared_lib_compile) \ - $(CC) --shared $^ -o $@) - -do_plugin_build = \ - ($(print_plugin_build) \ - $(CC) $(CFLAGS) -shared -nostartfiles -o $@ $<) - -do_build_static_lib = \ - ($(print_static_lib_build) \ - $(RM) $@; $(AR) rcs $@ $^) - - -do_compile = $(QUIET_CC)$(CC) -c $(CFLAGS) $(EXT) $< -o $(obj)/$@; +# Disable command line variables (CFLAGS) overide from top +# level Makefile (perf), otherwise build Makefile will get +# the same command line setup. +MAKEOVERRIDES= -$(obj)/%.o: $(src)/%.c - $(call do_compile) +export srctree OUTPUT CC LD CFLAGS V +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -%.o: $(src)/%.c - $(call do_compile) +PLUGINS = plugin_jbd2.so +PLUGINS += plugin_hrtimer.so +PLUGINS += plugin_kmem.so +PLUGINS += plugin_kvm.so +PLUGINS += plugin_mac80211.so +PLUGINS += plugin_sched_switch.so +PLUGINS += plugin_function.so +PLUGINS += plugin_xen.so +PLUGINS += plugin_scsi.so +PLUGINS += plugin_cfg80211.so -PEVENT_LIB_OBJS = event-parse.o -PEVENT_LIB_OBJS += event-plugin.o -PEVENT_LIB_OBJS += trace-seq.o -PEVENT_LIB_OBJS += parse-filter.o -PEVENT_LIB_OBJS += parse-utils.o -PEVENT_LIB_OBJS += kbuffer-parse.o +PLUGINS := $(addprefix $(OUTPUT),$(PLUGINS)) +PLUGINS_IN := $(PLUGINS:.so=-in.o) -PLUGIN_OBJS = plugin_jbd2.o -PLUGIN_OBJS += plugin_hrtimer.o -PLUGIN_OBJS += plugin_kmem.o -PLUGIN_OBJS += plugin_kvm.o -PLUGIN_OBJS += plugin_mac80211.o -PLUGIN_OBJS += plugin_sched_switch.o -PLUGIN_OBJS += plugin_function.o -PLUGIN_OBJS += plugin_xen.o -PLUGIN_OBJS += plugin_scsi.o -PLUGIN_OBJS += plugin_cfg80211.o +TE_IN := $(OUTPUT)libtraceevent-in.o +LIB_FILE := $(addprefix $(OUTPUT),$(LIB_FILE)) +DYNAMIC_LIST_FILE := $(OUTPUT)libtraceevent-dynamic-list -PLUGINS := $(PLUGIN_OBJS:.o=.so) - -ALL_OBJS = $(PEVENT_LIB_OBJS) $(PLUGIN_OBJS) - -CMD_TARGETS = $(LIB_FILE) $(PLUGINS) +CMD_TARGETS = $(LIB_FILE) $(PLUGINS) $(DYNAMIC_LIST_FILE) TARGETS = $(CMD_TARGETS) - all: all_cmd all_cmd: $(CMD_TARGETS) -libtraceevent.so: $(PEVENT_LIB_OBJS) +$(TE_IN): force + $(Q)$(MAKE) $(build)=libtraceevent + +$(OUTPUT)libtraceevent.so: $(TE_IN) $(QUIET_LINK)$(CC) --shared $^ -o $@ -libtraceevent.a: $(PEVENT_LIB_OBJS) +$(OUTPUT)libtraceevent.a: $(TE_IN) $(QUIET_LINK)$(RM) $@; $(AR) rcs $@ $^ +$(OUTPUT)libtraceevent-dynamic-list: $(PLUGINS) + $(QUIET_GEN)$(call do_generate_dynamic_list_file, $(PLUGINS), $@) + plugins: $(PLUGINS) -$(PEVENT_LIB_OBJS): %.o: $(src)/%.c TRACEEVENT-CFLAGS - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) $(EXT) -fPIC $< -o $@ +__plugin_obj = $(notdir $@) + plugin_obj = $(__plugin_obj:-in.o=) -$(PLUGIN_OBJS): %.o : $(src)/%.c - $(QUIET_CC_FPIC)$(CC) -c $(CFLAGS) -fPIC -o $@ $< +$(PLUGINS_IN): force + $(Q)$(MAKE) $(build)=$(plugin_obj) -$(PLUGINS): %.so: %.o - $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $< +$(OUTPUT)%.so: $(OUTPUT)%-in.o + $(QUIET_LINK)$(CC) $(CFLAGS) -shared -nostartfiles -o $@ $^ define make_version.h (echo '/* This file is automatically generated. Do not modify. */'; \ @@ -255,40 +226,6 @@ define update_dir fi); endef -## make deps - -all_objs := $(sort $(ALL_OBJS)) -all_deps := $(all_objs:%.o=.%.d) - -# let .d file also depends on the source and header files -define check_deps - @set -e; $(RM) $@; \ - $(CC) -MM $(CFLAGS) $< > $@.$$$$; \ - sed 's,\($*\)\.o[ :]*,\1.o $@ : ,g' < $@.$$$$ > $@; \ - $(RM) $@.$$$$ -endef - -$(all_deps): .%.d: $(src)/%.c - $(Q)$(call check_deps) - -$(all_objs) : %.o : .%.d - -dep_includes := $(wildcard $(all_deps)) - -ifneq ($(dep_includes),) - include $(dep_includes) -endif - -### Detect environment changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):$(ARCH):$(CROSS_COMPILE) - -TRACEEVENT-CFLAGS: force - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat TRACEEVENT-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or cross compiler"; \ - echo "$$FLAGS" >TRACEEVENT-CFLAGS; \ - fi - tags: force $(RM) tags find . -name '*.[ch]' | xargs ctags --extra=+f --c-kinds=+px \ @@ -312,9 +249,16 @@ define do_install_plugins done endef +define do_generate_dynamic_list_file + (echo '{'; \ + $(NM) -u -D $1 | awk 'NF>1 {print "\t"$$2";"}' | sort -u; \ + echo '};'; \ + ) > $2 +endef + install_lib: all_cmd install_plugins $(call QUIET_INSTALL, $(LIB_FILE)) \ - $(call do_install,$(LIB_FILE),$(bindir_SQ)) + $(call do_install,$(LIB_FILE),$(libdir_SQ)) install_plugins: $(PLUGINS) $(call QUIET_INSTALL, trace_plugins) \ @@ -324,17 +268,12 @@ install: install_lib clean: $(call QUIET_CLEAN, libtraceevent) \ - $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d \ + $(RM) *.o *~ $(TARGETS) *.a *.so $(VERSION_FILES) .*.d .*.cmd \ $(RM) TRACEEVENT-CFLAGS tags TAGS -endif # skip-makefile - PHONY += force plugins force: -plugins: - @echo > /dev/null - # Declare the contents of the .PHONY variable as phony. We keep that # information in a variable so we can use it in if_changed and friends. .PHONY: $(PHONY) diff --git a/tools/lib/traceevent/event-parse.c b/tools/lib/traceevent/event-parse.c index afe20ed9fac8..cc25f059ab3d 100644 --- a/tools/lib/traceevent/event-parse.c +++ b/tools/lib/traceevent/event-parse.c @@ -304,7 +304,10 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) if (!item) return -1; - item->comm = strdup(comm); + if (comm) + item->comm = strdup(comm); + else + item->comm = strdup("<...>"); if (!item->comm) { free(item); return -1; @@ -318,9 +321,14 @@ int pevent_register_comm(struct pevent *pevent, const char *comm, int pid) return 0; } -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock) +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock) { - pevent->trace_clock = trace_clock; + pevent->trace_clock = strdup(trace_clock); + if (!pevent->trace_clock) { + errno = ENOMEM; + return -1; + } + return 0; } struct func_map { @@ -758,6 +766,11 @@ static void free_arg(struct print_arg *arg) free_arg(arg->hex.field); free_arg(arg->hex.size); break; + case PRINT_INT_ARRAY: + free_arg(arg->int_array.field); + free_arg(arg->int_array.count); + free_arg(arg->int_array.el_size); + break; case PRINT_TYPE: free(arg->typecast.type); free_arg(arg->typecast.item); @@ -1374,7 +1387,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f do_warning_event(event, "%s: no type found", __func__); goto fail; } - field->name = last_token; + field->name = field->alias = last_token; if (test_type(type, EVENT_OP)) goto fail; @@ -1456,7 +1469,7 @@ static int event_read_fields(struct event_format *event, struct format_field **f size_dynamic = type_size(field->name); free_token(field->name); strcat(field->type, brackets); - field->name = token; + field->name = field->alias = token; type = read_token(&token); } else { char *new_type; @@ -1926,7 +1939,22 @@ process_op(struct event_format *event, struct print_arg *arg, char **tok) goto out_warn_free; type = process_arg_token(event, right, tok, type); - arg->op.right = right; + + if (right->type == PRINT_OP && + get_op_prio(arg->op.op) < get_op_prio(right->op.op)) { + struct print_arg tmp; + + /* rotate ops according to the priority */ + arg->op.right = right->op.left; + + tmp = *arg; + *arg = *right; + *right = tmp; + + arg->op.left = right; + } else { + arg->op.right = right; + } } else if (strcmp(token, "[") == 0) { @@ -2014,6 +2042,38 @@ process_entry(struct event_format *event __maybe_unused, struct print_arg *arg, return EVENT_ERROR; } +static int alloc_and_process_delim(struct event_format *event, char *next_token, + struct print_arg **print_arg) +{ + struct print_arg *field; + enum event_type type; + char *token; + int ret = 0; + + field = alloc_arg(); + if (!field) { + do_warning_event(event, "%s: not enough memory!", __func__); + errno = ENOMEM; + return -1; + } + + type = process_arg(event, field, &token); + + if (test_type_token(type, token, EVENT_DELIM, next_token)) { + errno = EINVAL; + ret = -1; + free_arg(field); + goto out_free_token; + } + + *print_arg = field; + +out_free_token: + free_token(token); + + return ret; +} + static char *arg_eval (struct print_arg *arg); static unsigned long long @@ -2486,49 +2546,46 @@ out_free: static enum event_type process_hex(struct event_format *event, struct print_arg *arg, char **tok) { - struct print_arg *field; - enum event_type type; - char *token = NULL; - memset(arg, 0, sizeof(*arg)); arg->type = PRINT_HEX; - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - goto out_free; - } + if (alloc_and_process_delim(event, ",", &arg->hex.field)) + goto out; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ")", &arg->hex.size)) + goto free_field; - if (test_type_token(type, token, EVENT_DELIM, ",")) - goto out_free; - - arg->hex.field = field; + return read_token_item(tok); - free_token(token); +free_field: + free_arg(arg->hex.field); +out: + *tok = NULL; + return EVENT_ERROR; +} - field = alloc_arg(); - if (!field) { - do_warning_event(event, "%s: not enough memory!", __func__); - *tok = NULL; - return EVENT_ERROR; - } +static enum event_type +process_int_array(struct event_format *event, struct print_arg *arg, char **tok) +{ + memset(arg, 0, sizeof(*arg)); + arg->type = PRINT_INT_ARRAY; - type = process_arg(event, field, &token); + if (alloc_and_process_delim(event, ",", &arg->int_array.field)) + goto out; - if (test_type_token(type, token, EVENT_DELIM, ")")) - goto out_free; + if (alloc_and_process_delim(event, ",", &arg->int_array.count)) + goto free_field; - arg->hex.size = field; + if (alloc_and_process_delim(event, ")", &arg->int_array.el_size)) + goto free_size; - free_token(token); - type = read_token_item(tok); - return type; + return read_token_item(tok); - out_free: - free_arg(field); - free_token(token); +free_size: + free_arg(arg->int_array.count); +free_field: + free_arg(arg->int_array.field); +out: *tok = NULL; return EVENT_ERROR; } @@ -2828,6 +2885,10 @@ process_function(struct event_format *event, struct print_arg *arg, free_token(token); return process_hex(event, arg, tok); } + if (strcmp(token, "__print_array") == 0) { + free_token(token); + return process_int_array(event, arg, tok); + } if (strcmp(token, "__get_str") == 0) { free_token(token); return process_str(event, arg, tok); @@ -3356,6 +3417,7 @@ eval_num_arg(void *data, int size, struct event_format *event, struct print_arg break; case PRINT_FLAGS: case PRINT_SYMBOL: + case PRINT_INT_ARRAY: case PRINT_HEX: break; case PRINT_TYPE: @@ -3568,7 +3630,7 @@ static const struct flag flags[] = { { "HRTIMER_RESTART", 1 }, }; -static unsigned long long eval_flag(const char *flag) +static long long eval_flag(const char *flag) { int i; @@ -3584,7 +3646,7 @@ static unsigned long long eval_flag(const char *flag) if (strcmp(flags[i].name, flag) == 0) return flags[i].value; - return 0; + return -1LL; } static void print_str_to_seq(struct trace_seq *s, const char *format, @@ -3658,7 +3720,7 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, struct print_flag_sym *flag; struct format_field *field; struct printk_map *printk; - unsigned long long val, fval; + long long val, fval; unsigned long addr; char *str; unsigned char *hex; @@ -3717,11 +3779,11 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, print = 0; for (flag = arg->flags.flags; flag; flag = flag->next) { fval = eval_flag(flag->value); - if (!val && !fval) { + if (!val && fval < 0) { print_str_to_seq(s, format, len_arg, flag->str); break; } - if (fval && (val & fval) == fval) { + if (fval > 0 && (val & fval) == fval) { if (print && arg->flags.delim) trace_seq_puts(s, arg->flags.delim); print_str_to_seq(s, format, len_arg, flag->str); @@ -3766,6 +3828,54 @@ static void print_str_arg(struct trace_seq *s, void *data, int size, } break; + case PRINT_INT_ARRAY: { + void *num; + int el_size; + + if (arg->int_array.field->type == PRINT_DYNAMIC_ARRAY) { + unsigned long offset; + struct format_field *field = + arg->int_array.field->dynarray.field; + offset = pevent_read_number(pevent, + data + field->offset, + field->size); + num = data + (offset & 0xffff); + } else { + field = arg->int_array.field->field.field; + if (!field) { + str = arg->int_array.field->field.name; + field = pevent_find_any_field(event, str); + if (!field) + goto out_warning_field; + arg->int_array.field->field.field = field; + } + num = data + field->offset; + } + len = eval_num_arg(data, size, event, arg->int_array.count); + el_size = eval_num_arg(data, size, event, + arg->int_array.el_size); + for (i = 0; i < len; i++) { + if (i) + trace_seq_putc(s, ' '); + + if (el_size == 1) { + trace_seq_printf(s, "%u", *(uint8_t *)num); + } else if (el_size == 2) { + trace_seq_printf(s, "%u", *(uint16_t *)num); + } else if (el_size == 4) { + trace_seq_printf(s, "%u", *(uint32_t *)num); + } else if (el_size == 8) { + trace_seq_printf(s, "%"PRIu64, *(uint64_t *)num); + } else { + trace_seq_printf(s, "BAD SIZE:%d 0x%x", + el_size, *(uint8_t *)num); + el_size = 1; + } + + num += el_size; + } + break; + } case PRINT_TYPE: break; case PRINT_STRING: { @@ -3976,7 +4086,7 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc if (asprintf(&arg->atom.atom, "%lld", ip) < 0) goto out_free; - /* skip the first "%pf: " */ + /* skip the first "%ps: " */ for (ptr = fmt + 5, bptr = data + field->offset; bptr < data + size && *ptr; ptr++) { int ls = 0; @@ -3997,6 +4107,10 @@ static struct print_arg *make_bprint_args(char *fmt, void *data, int size, struc goto process_again; case '.': goto process_again; + case 'z': + case 'Z': + ls = 1; + goto process_again; case 'p': ls = 1; /* fall through */ @@ -4939,6 +5053,96 @@ const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid) return comm; } +static struct cmdline * +pid_from_cmdlist(struct pevent *pevent, const char *comm, struct cmdline *next) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)next; + + if (cmdlist) + cmdlist = cmdlist->next; + else + cmdlist = pevent->cmdlist; + + while (cmdlist && strcmp(cmdlist->comm, comm) != 0) + cmdlist = cmdlist->next; + + return (struct cmdline *)cmdlist; +} + +/** + * pevent_data_pid_from_comm - return the pid from a given comm + * @pevent: a handle to the pevent + * @comm: the cmdline to find the pid from + * @next: the cmdline structure to find the next comm + * + * This returns the cmdline structure that holds a pid for a given + * comm, or NULL if none found. As there may be more than one pid for + * a given comm, the result of this call can be passed back into + * a recurring call in the @next paramater, and then it will find the + * next pid. + * Also, it does a linear seach, so it may be slow. + */ +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next) +{ + struct cmdline *cmdline; + + /* + * If the cmdlines have not been converted yet, then use + * the list. + */ + if (!pevent->cmdlines) + return pid_from_cmdlist(pevent, comm, next); + + if (next) { + /* + * The next pointer could have been still from + * a previous call before cmdlines were created + */ + if (next < pevent->cmdlines || + next >= pevent->cmdlines + pevent->cmdline_count) + next = NULL; + else + cmdline = next++; + } + + if (!next) + cmdline = pevent->cmdlines; + + while (cmdline < pevent->cmdlines + pevent->cmdline_count) { + if (strcmp(cmdline->comm, comm) == 0) + return cmdline; + cmdline++; + } + return NULL; +} + +/** + * pevent_cmdline_pid - return the pid associated to a given cmdline + * @cmdline: The cmdline structure to get the pid from + * + * Returns the pid for a give cmdline. If @cmdline is NULL, then + * -1 is returned. + */ +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline) +{ + struct cmdline_list *cmdlist = (struct cmdline_list *)cmdline; + + if (!cmdline) + return -1; + + /* + * If cmdlines have not been created yet, or cmdline is + * not part of the array, then treat it as a cmdlist instead. + */ + if (!pevent->cmdlines || + cmdline < pevent->cmdlines || + cmdline >= pevent->cmdlines + pevent->cmdline_count) + return cmdlist->pid; + + return cmdline->pid; +} + /** * pevent_data_comm_from_pid - parse the data into the print format * @s: the trace_seq to write to @@ -5256,6 +5460,15 @@ static void print_args(struct print_arg *args) print_args(args->hex.size); printf(")"); break; + case PRINT_INT_ARRAY: + printf("__print_array("); + print_args(args->int_array.field); + printf(", "); + print_args(args->int_array.count); + printf(", "); + print_args(args->int_array.el_size); + printf(")"); + break; case PRINT_STRING: case PRINT_BSTRING: printf("__get_str(%s)", args->string.string); @@ -6228,15 +6441,22 @@ void pevent_ref(struct pevent *pevent) pevent->ref_count++; } +void pevent_free_format_field(struct format_field *field) +{ + free(field->type); + if (field->alias != field->name) + free(field->alias); + free(field->name); + free(field); +} + static void free_format_fields(struct format_field *field) { struct format_field *next; while (field) { next = field->next; - free(field->type); - free(field->name); - free(field); + pevent_free_format_field(field); field = next; } } @@ -6341,6 +6561,7 @@ void pevent_free(struct pevent *pevent) free_handler(handle); } + free(pevent->trace_clock); free(pevent->events); free(pevent->sort_events); diff --git a/tools/lib/traceevent/event-parse.h b/tools/lib/traceevent/event-parse.h index 7a3873ff9a4f..063b1971eb35 100644 --- a/tools/lib/traceevent/event-parse.h +++ b/tools/lib/traceevent/event-parse.h @@ -22,6 +22,7 @@ #include <stdbool.h> #include <stdarg.h> +#include <stdio.h> #include <regex.h> #include <string.h> @@ -91,6 +92,7 @@ extern int trace_seq_putc(struct trace_seq *s, unsigned char c); extern void trace_seq_terminate(struct trace_seq *s); +extern int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp); extern int trace_seq_do_printf(struct trace_seq *s); @@ -114,7 +116,7 @@ struct pevent_plugin_option { char *name; char *plugin_alias; char *description; - char *value; + const char *value; void *priv; int set; }; @@ -152,6 +154,10 @@ struct pevent_plugin_option { * .plugin_alias is used to give a shorter name to access * the vairable. Useful if a plugin handles more than one event. * + * If .value is not set, then it is considered a boolean and only + * .set will be processed. If .value is defined, then it is considered + * a string option and .set will be ignored. + * * PEVENT_PLUGIN_ALIAS: (optional) * The name to use for finding options (uses filename if not defined) */ @@ -185,6 +191,7 @@ struct format_field { struct event_format *event; char *type; char *name; + char *alias; int offset; int size; unsigned int arraylen; @@ -245,6 +252,12 @@ struct print_arg_hex { struct print_arg *size; }; +struct print_arg_int_array { + struct print_arg *field; + struct print_arg *count; + struct print_arg *el_size; +}; + struct print_arg_dynarray { struct format_field *field; struct print_arg *index; @@ -273,6 +286,7 @@ enum print_arg_type { PRINT_FLAGS, PRINT_SYMBOL, PRINT_HEX, + PRINT_INT_ARRAY, PRINT_TYPE, PRINT_STRING, PRINT_BSTRING, @@ -292,6 +306,7 @@ struct print_arg { struct print_arg_flags flags; struct print_arg_symbol symbol; struct print_arg_hex hex; + struct print_arg_int_array int_array; struct print_arg_func func; struct print_arg_string string; struct print_arg_bitmask bitmask; @@ -597,7 +612,7 @@ enum trace_flag_type { }; int pevent_register_comm(struct pevent *pevent, const char *comm, int pid); -void pevent_register_trace_clock(struct pevent *pevent, char *trace_clock); +int pevent_register_trace_clock(struct pevent *pevent, const char *trace_clock); int pevent_register_function(struct pevent *pevent, char *name, unsigned long long addr, char *mod); int pevent_register_print_string(struct pevent *pevent, const char *fmt, @@ -617,6 +632,7 @@ enum pevent_errno pevent_parse_format(struct pevent *pevent, const char *buf, unsigned long size, const char *sys); void pevent_free_format(struct event_format *event); +void pevent_free_format_field(struct format_field *field); void *pevent_get_field_raw(struct trace_seq *s, struct event_format *event, const char *name, struct pevent_record *record, @@ -675,6 +691,11 @@ int pevent_data_type(struct pevent *pevent, struct pevent_record *rec); struct event_format *pevent_data_event_from_type(struct pevent *pevent, int type); int pevent_data_pid(struct pevent *pevent, struct pevent_record *rec); const char *pevent_data_comm_from_pid(struct pevent *pevent, int pid); +struct cmdline; +struct cmdline *pevent_data_pid_from_comm(struct pevent *pevent, const char *comm, + struct cmdline *next); +int pevent_cmdline_pid(struct pevent *pevent, struct cmdline *cmdline); + void pevent_event_info(struct trace_seq *s, struct event_format *event, struct pevent_record *record); int pevent_strerror(struct pevent *pevent, enum pevent_errno errnum, diff --git a/tools/lib/traceevent/event-plugin.c b/tools/lib/traceevent/event-plugin.c index 136162c03af1..a16756ae3526 100644 --- a/tools/lib/traceevent/event-plugin.c +++ b/tools/lib/traceevent/event-plugin.c @@ -18,6 +18,7 @@ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ */ +#include <ctype.h> #include <stdio.h> #include <string.h> #include <dlfcn.h> @@ -49,6 +50,52 @@ struct plugin_list { void *handle; }; +static void lower_case(char *str) +{ + if (!str) + return; + for (; *str; str++) + *str = tolower(*str); +} + +static int update_option_value(struct pevent_plugin_option *op, const char *val) +{ + char *op_val; + + if (!val) { + /* toggle, only if option is boolean */ + if (op->value) + /* Warn? */ + return 0; + op->set ^= 1; + return 0; + } + + /* + * If the option has a value then it takes a string + * otherwise the option is a boolean. + */ + if (op->value) { + op->value = val; + return 0; + } + + /* Option is boolean, must be either "1", "0", "true" or "false" */ + + op_val = strdup(val); + if (!op_val) + return -1; + lower_case(op_val); + + if (strcmp(val, "1") == 0 || strcmp(val, "true") == 0) + op->set = 1; + else if (strcmp(val, "0") == 0 || strcmp(val, "false") == 0) + op->set = 0; + free(op_val); + + return 0; +} + /** * traceevent_plugin_list_options - get list of plugin options * @@ -120,6 +167,7 @@ update_option(const char *file, struct pevent_plugin_option *option) { struct trace_plugin_options *op; char *plugin; + int ret = 0; if (option->plugin_alias) { plugin = strdup(option->plugin_alias); @@ -144,9 +192,10 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; - goto out; + ret = update_option_value(option, op->value); + if (ret) + goto out; + break; } /* first look for unnamed options */ @@ -156,14 +205,13 @@ update_option(const char *file, struct pevent_plugin_option *option) if (strcmp(op->option, option->name) != 0) continue; - option->value = op->value; - option->set ^= 1; + ret = update_option_value(option, op->value); break; } out: free(plugin); - return 0; + return ret; } /** diff --git a/tools/lib/traceevent/kbuffer-parse.c b/tools/lib/traceevent/kbuffer-parse.c index dcc665228c71..3bcada3ae05a 100644 --- a/tools/lib/traceevent/kbuffer-parse.c +++ b/tools/lib/traceevent/kbuffer-parse.c @@ -372,7 +372,6 @@ translate_data(struct kbuffer *kbuf, void *data, void **rptr, switch (type_len) { case KBUFFER_TYPE_PADDING: *length = read_4(kbuf, data); - data += *length; break; case KBUFFER_TYPE_TIME_EXTEND: @@ -730,3 +729,14 @@ void kbuffer_set_old_format(struct kbuffer *kbuf) kbuf->next_event = __old_next_event; } + +/** + * kbuffer_start_of_data - return offset of where data starts on subbuffer + * @kbuf: The kbuffer + * + * Returns the location on the subbuffer where the data starts. + */ +int kbuffer_start_of_data(struct kbuffer *kbuf) +{ + return kbuf->start; +} diff --git a/tools/lib/traceevent/kbuffer.h b/tools/lib/traceevent/kbuffer.h index c831f64b17a0..03dce757553f 100644 --- a/tools/lib/traceevent/kbuffer.h +++ b/tools/lib/traceevent/kbuffer.h @@ -63,5 +63,6 @@ int kbuffer_missed_events(struct kbuffer *kbuf); int kbuffer_subbuffer_size(struct kbuffer *kbuf); void kbuffer_set_old_format(struct kbuffer *kbuf); +int kbuffer_start_of_data(struct kbuffer *kbuf); #endif /* _K_BUFFER_H */ diff --git a/tools/lib/traceevent/parse-filter.c b/tools/lib/traceevent/parse-filter.c index b50234402fc2..0144b3d1bb77 100644 --- a/tools/lib/traceevent/parse-filter.c +++ b/tools/lib/traceevent/parse-filter.c @@ -1058,6 +1058,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; else *parg = current_exp; + free(token); return PEVENT_ERRNO__UNBALANCED_PAREN; } break; @@ -1168,6 +1169,7 @@ process_filter(struct event_format *event, struct filter_arg **parg, *parg = current_op; + free(token); return 0; fail_alloc: diff --git a/tools/lib/traceevent/plugin_cfg80211.c b/tools/lib/traceevent/plugin_cfg80211.c index 4592d8438318..ec57d0c1fbc2 100644 --- a/tools/lib/traceevent/plugin_cfg80211.c +++ b/tools/lib/traceevent/plugin_cfg80211.c @@ -4,6 +4,19 @@ #include <endian.h> #include "event-parse.h" +/* + * From glibc endian.h, for older systems where it is not present, e.g.: RHEL5, + * Fedora6. + */ +#ifndef le16toh +# if __BYTE_ORDER == __LITTLE_ENDIAN +# define le16toh(x) (x) +# else +# define le16toh(x) __bswap_16 (x) +# endif +#endif + + static unsigned long long process___le16_to_cpup(struct trace_seq *s, unsigned long long *args) { diff --git a/tools/lib/traceevent/trace-seq.c b/tools/lib/traceevent/trace-seq.c index ec3bd16a5488..292dc9f1d233 100644 --- a/tools/lib/traceevent/trace-seq.c +++ b/tools/lib/traceevent/trace-seq.c @@ -231,19 +231,24 @@ void trace_seq_terminate(struct trace_seq *s) s->buffer[s->len] = 0; } -int trace_seq_do_printf(struct trace_seq *s) +int trace_seq_do_fprintf(struct trace_seq *s, FILE *fp) { TRACE_SEQ_CHECK(s); switch (s->state) { case TRACE_SEQ__GOOD: - return printf("%.*s", s->len, s->buffer); + return fprintf(fp, "%.*s", s->len, s->buffer); case TRACE_SEQ__BUFFER_POISONED: - puts("Usage of trace_seq after it was destroyed"); + fprintf(fp, "%s\n", "Usage of trace_seq after it was destroyed"); break; case TRACE_SEQ__MEM_ALLOC_FAILED: - puts("Can't allocate trace_seq buffer memory"); + fprintf(fp, "%s\n", "Can't allocate trace_seq buffer memory"); break; } return -1; } + +int trace_seq_do_printf(struct trace_seq *s) +{ + return trace_seq_do_fprintf(s, stdout); +} diff --git a/tools/net/bpf_exp.l b/tools/net/bpf_exp.l index 833a96611da6..7cc72a336645 100644 --- a/tools/net/bpf_exp.l +++ b/tools/net/bpf_exp.l @@ -90,8 +90,10 @@ extern void yyerror(const char *str); "#"?("hatype") { return K_HATYPE; } "#"?("rxhash") { return K_RXHASH; } "#"?("cpu") { return K_CPU; } -"#"?("vlan_tci") { return K_VLANT; } -"#"?("vlan_pr") { return K_VLANP; } +"#"?("vlan_tci") { return K_VLAN_TCI; } +"#"?("vlan_pr") { return K_VLAN_AVAIL; } +"#"?("vlan_avail") { return K_VLAN_AVAIL; } +"#"?("vlan_tpid") { return K_VLAN_TPID; } "#"?("rand") { return K_RAND; } ":" { return ':'; } diff --git a/tools/net/bpf_exp.y b/tools/net/bpf_exp.y index e6306c51c26f..e24eea1b0db5 100644 --- a/tools/net/bpf_exp.y +++ b/tools/net/bpf_exp.y @@ -56,7 +56,7 @@ static void bpf_set_jmp_label(char *label, enum jmp_type type); %token OP_LDXI %token K_PKT_LEN K_PROTO K_TYPE K_NLATTR K_NLATTR_NEST K_MARK K_QUEUE K_HATYPE -%token K_RXHASH K_CPU K_IFIDX K_VLANT K_VLANP K_POFF K_RAND +%token K_RXHASH K_CPU K_IFIDX K_VLAN_TCI K_VLAN_AVAIL K_VLAN_TPID K_POFF K_RAND %token ':' ',' '[' ']' '(' ')' 'x' 'a' '+' 'M' '*' '&' '#' '%' @@ -155,10 +155,10 @@ ldb | OP_LDB K_CPU { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDB K_VLANT { + | OP_LDB K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDB K_VLANP { + | OP_LDB K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LDB K_POFF { @@ -167,6 +167,9 @@ ldb | OP_LDB K_RAND { bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDB K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_B | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldh @@ -206,10 +209,10 @@ ldh | OP_LDH K_CPU { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LDH K_VLANT { + | OP_LDH K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LDH K_VLANP { + | OP_LDH K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LDH K_POFF { @@ -218,6 +221,9 @@ ldh | OP_LDH K_RAND { bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LDH K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_H | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } ; ldi @@ -262,10 +268,10 @@ ld | OP_LD K_CPU { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_CPU); } - | OP_LD K_VLANT { + | OP_LD K_VLAN_TCI { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG); } - | OP_LD K_VLANP { + | OP_LD K_VLAN_AVAIL { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT); } | OP_LD K_POFF { @@ -274,6 +280,9 @@ ld | OP_LD K_RAND { bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, SKF_AD_OFF + SKF_AD_RANDOM); } + | OP_LD K_VLAN_TPID { + bpf_set_curr_instr(BPF_LD | BPF_W | BPF_ABS, 0, 0, + SKF_AD_OFF + SKF_AD_VLAN_TPID); } | OP_LD 'M' '[' number ']' { bpf_set_curr_instr(BPF_LD | BPF_MEM, 0, 0, $4); } | OP_LD '[' 'x' '+' number ']' { diff --git a/tools/net/bpf_jit_disasm.c b/tools/net/bpf_jit_disasm.c index c5baf9c591b7..618c2bcd4eab 100644 --- a/tools/net/bpf_jit_disasm.c +++ b/tools/net/bpf_jit_disasm.c @@ -123,6 +123,8 @@ static int get_last_jit_image(char *haystack, size_t hlen, assert(ret == 0); ptr = haystack; + memset(pmatch, 0, sizeof(pmatch)); + while (1) { ret = regexec(®ex, ptr, 1, pmatch, 0); if (ret == 0) { diff --git a/tools/perf/.gitignore b/tools/perf/.gitignore index 40399c3d97d6..09db62ba5786 100644 --- a/tools/perf/.gitignore +++ b/tools/perf/.gitignore @@ -1,6 +1,7 @@ PERF-CFLAGS PERF-GUI-VARS PERF-VERSION-FILE +FEATURE-DUMP perf perf-read-vdso32 perf-read-vdsox32 @@ -27,3 +28,4 @@ config.mak.autogen *-flex.* *.pyc *.pyo +.config-detected diff --git a/tools/perf/Build b/tools/perf/Build new file mode 100644 index 000000000000..b77370ef7005 --- /dev/null +++ b/tools/perf/Build @@ -0,0 +1,44 @@ +perf-y += builtin-bench.o +perf-y += builtin-annotate.o +perf-y += builtin-diff.o +perf-y += builtin-evlist.o +perf-y += builtin-help.o +perf-y += builtin-sched.o +perf-y += builtin-buildid-list.o +perf-y += builtin-buildid-cache.o +perf-y += builtin-list.o +perf-y += builtin-record.o +perf-y += builtin-report.o +perf-y += builtin-stat.o +perf-y += builtin-timechart.o +perf-y += builtin-top.o +perf-y += builtin-script.o +perf-y += builtin-kmem.o +perf-y += builtin-lock.o +perf-y += builtin-kvm.o +perf-y += builtin-inject.o +perf-y += builtin-mem.o +perf-y += builtin-data.o + +perf-$(CONFIG_AUDIT) += builtin-trace.o +perf-$(CONFIG_LIBELF) += builtin-probe.o + +perf-y += bench/ +perf-y += tests/ + +perf-y += perf.o + +paths += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" +paths += -DPERF_INFO_PATH="BUILD_STR($(infodir_SQ))" +paths += -DPERF_MAN_PATH="BUILD_STR($(mandir_SQ))" + +CFLAGS_builtin-help.o += $(paths) +CFLAGS_builtin-timechart.o += $(paths) +CFLAGS_perf.o += -DPERF_HTML_PATH="BUILD_STR($(htmldir_SQ))" -include $(OUTPUT)PERF-VERSION-FILE + +libperf-y += util/ +libperf-y += arch/ +libperf-y += ui/ +libperf-y += scripts/ + +gtk-y += ui/gtk/ diff --git a/tools/perf/Documentation/Build.txt b/tools/perf/Documentation/Build.txt new file mode 100644 index 000000000000..f6fc6507ba55 --- /dev/null +++ b/tools/perf/Documentation/Build.txt @@ -0,0 +1,49 @@ + +1) perf build +============= +The perf build process consists of several separated building blocks, +which are linked together to form the perf binary: + - libperf library (static) + - perf builtin commands + - traceevent library (static) + - GTK ui library + +Several makefiles govern the perf build: + + - Makefile + top level Makefile working as a wrapper that calls the main + Makefile.perf with a -j option to do parallel builds. + + - Makefile.perf + main makefile that triggers build of all perf objects including + installation and documentation processing. + + - tools/build/Makefile.build + main makefile of the build framework + + - tools/build/Build.include + build framework generic definitions + + - Build makefiles + makefiles that defines build objects + +Please refer to tools/build/Documentation/Build.txt for more +information about build framework. + + +2) perf build +============= +The Makefile.perf triggers the build framework for build objects: + perf, libperf, gtk + +resulting in following objects: + $ ls *-in.o + gtk-in.o libperf-in.o perf-in.o + +Those objects are then used in final linking: + libperf-gtk.so <- gtk-in.o libperf-in.o + perf <- perf-in.o libperf-in.o + + +NOTE this description is omitting other libraries involved, only + focusing on build framework outcomes diff --git a/tools/perf/Documentation/callchain-overhead-calculation.txt b/tools/perf/Documentation/callchain-overhead-calculation.txt new file mode 100644 index 000000000000..1a757927195e --- /dev/null +++ b/tools/perf/Documentation/callchain-overhead-calculation.txt @@ -0,0 +1,108 @@ +Overhead calculation +-------------------- +The overhead can be shown in two columns as 'Children' and 'Self' when +perf collects callchains. The 'self' overhead is simply calculated by +adding all period values of the entry - usually a function (symbol). +This is the value that perf shows traditionally and sum of all the +'self' overhead values should be 100%. + +The 'children' overhead is calculated by adding all period values of +the child functions so that it can show the total overhead of the +higher level functions even if they don't directly execute much. +'Children' here means functions that are called from another (parent) +function. + +It might be confusing that the sum of all the 'children' overhead +values exceeds 100% since each of them is already an accumulation of +'self' overhead of its child functions. But with this enabled, users +can find which function has the most overhead even if samples are +spread over the children. + +Consider the following example; there are three functions like below. + +----------------------- +void foo(void) { + /* do something */ +} + +void bar(void) { + /* do something */ + foo(); +} + +int main(void) { + bar() + return 0; +} +----------------------- + +In this case 'foo' is a child of 'bar', and 'bar' is an immediate +child of 'main' so 'foo' also is a child of 'main'. In other words, +'main' is a parent of 'foo' and 'bar', and 'bar' is a parent of 'foo'. + +Suppose all samples are recorded in 'foo' and 'bar' only. When it's +recorded with callchains the output will show something like below +in the usual (self-overhead-only) output of perf report: + +---------------------------------- +Overhead Symbol +........ ..................... + 60.00% foo + | + --- foo + bar + main + __libc_start_main + + 40.00% bar + | + --- bar + main + __libc_start_main +---------------------------------- + +When the --children option is enabled, the 'self' overhead values of +child functions (i.e. 'foo' and 'bar') are added to the parents to +calculate the 'children' overhead. In this case the report could be +displayed as: + +------------------------------------------- +Children Self Symbol +........ ........ .................... + 100.00% 0.00% __libc_start_main + | + --- __libc_start_main + + 100.00% 0.00% main + | + --- main + __libc_start_main + + 100.00% 40.00% bar + | + --- bar + main + __libc_start_main + + 60.00% 60.00% foo + | + --- foo + bar + main + __libc_start_main +------------------------------------------- + +In the above output, the 'self' overhead of 'foo' (60%) was add to the +'children' overhead of 'bar', 'main' and '\_\_libc_start_main'. +Likewise, the 'self' overhead of 'bar' (40%) was added to the +'children' overhead of 'main' and '\_\_libc_start_main'. + +So '\_\_libc_start_main' and 'main' are shown first since they have +same (100%) 'children' overhead (even though they have zero 'self' +overhead) and they are the parents of 'foo' and 'bar'. + +Since v3.16 the 'children' overhead is shown by default and the output +is sorted by its values. The 'children' overhead is disabled by +specifying --no-children option on the command line or by adding +'report.children = false' or 'top.children = false' in the perf config +file. diff --git a/tools/perf/Documentation/perf-bench.txt b/tools/perf/Documentation/perf-bench.txt index f6480cbf309b..bf3d0644bf10 100644 --- a/tools/perf/Documentation/perf-bench.txt +++ b/tools/perf/Documentation/perf-bench.txt @@ -210,6 +210,9 @@ Suite for evaluating hash tables. *wake*:: Suite for evaluating wake calls. +*wake-parallel*:: +Suite for evaluating parallel wake calls. + *requeue*:: Suite for evaluating requeue calls. diff --git a/tools/perf/Documentation/perf-buildid-cache.txt b/tools/perf/Documentation/perf-buildid-cache.txt index 0294c57b1f5e..dd07b55f58d8 100644 --- a/tools/perf/Documentation/perf-buildid-cache.txt +++ b/tools/perf/Documentation/perf-buildid-cache.txt @@ -12,9 +12,9 @@ SYNOPSIS DESCRIPTION ----------- -This command manages the build-id cache. It can add and remove files to/from -the cache. In the future it should as well purge older entries, set upper -limits for the space used by the cache, etc. +This command manages the build-id cache. It can add, remove, update and purge +files to/from the cache. In the future it should as well set upper limits for +the space used by the cache, etc. OPTIONS ------- @@ -36,14 +36,24 @@ OPTIONS actually made. -r:: --remove=:: - Remove specified file from the cache. + Remove a cached binary which has same build-id of specified file + from the cache. +-p:: +--purge=:: + Purge all cached binaries including older caches which have specified + path from the cache. -M:: --missing=:: List missing build ids in the cache for the specified file. -u:: ---update:: - Update specified file of the cache. It can be used to update kallsyms - kernel dso to vmlinux in order to support annotation. +--update=:: + Update specified file of the cache. Note that this doesn't remove + older entires since those may be still needed for annotating old + (or remote) perf.data. Only if there is already a cache which has + exactly same build-id, that is replaced by new one. It can be used + to update kallsyms and kernel dso to vmlinux in order to support + annotation. + -v:: --verbose:: Be more verbose. diff --git a/tools/perf/Documentation/perf-data.txt b/tools/perf/Documentation/perf-data.txt new file mode 100644 index 000000000000..be8fa1a0a97e --- /dev/null +++ b/tools/perf/Documentation/perf-data.txt @@ -0,0 +1,40 @@ +perf-data(1) +============== + +NAME +---- +perf-data - Data file related processing + +SYNOPSIS +-------- +[verse] +'perf data' [<common options>] <command> [<options>]", + +DESCRIPTION +----------- +Data file related processing. + +COMMANDS +-------- +convert:: + Converts perf data file into another format (only CTF [1] format is support by now). + It's possible to set data-convert debug variable to get debug messages from conversion, + like: + perf --debug data-convert data convert ... + +OPTIONS for 'convert' +--------------------- +--to-ctf:: + Triggers the CTF conversion, specify the path of CTF data directory. + +-i:: + Specify input perf data file path. + +-v:: +--verbose:: + Be more verbose (show counter open errors, etc). + +SEE ALSO +-------- +linkperf:perf[1] +[1] Common Trace Format - http://www.efficios.com/ctf diff --git a/tools/perf/Documentation/perf-diff.txt b/tools/perf/Documentation/perf-diff.txt index e463caa3eb49..d1deb573877f 100644 --- a/tools/perf/Documentation/perf-diff.txt +++ b/tools/perf/Documentation/perf-diff.txt @@ -20,12 +20,20 @@ If no parameters are passed it will assume perf.data.old and perf.data. The differential profile is displayed only for events matching both specified perf.data files. +If no parameters are passed the samples will be sorted by dso and symbol. +As the perf.data files could come from different binaries, the symbols addresses +could vary. So perf diff is based on the comparison of the files and +symbols name. + OPTIONS ------- -D:: --dump-raw-trace:: Dump raw trace in ASCII. +--kallsyms=<file>:: + kallsyms pathname + -m:: --modules:: Load module symbols. WARNING: use only with -k and LIVE kernel diff --git a/tools/perf/Documentation/perf-inject.txt b/tools/perf/Documentation/perf-inject.txt index dc7442cf3d7f..b876ae312699 100644 --- a/tools/perf/Documentation/perf-inject.txt +++ b/tools/perf/Documentation/perf-inject.txt @@ -44,6 +44,33 @@ OPTIONS --kallsyms=<file>:: kallsyms pathname +--itrace:: + Decode Instruction Tracing data, replacing it with synthesized events. + Options are: + + i synthesize instructions events + b synthesize branches events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + x synthesize transactions events + e synthesize error events + d create a debug log + g synthesize a call chain (use with i or x) + + The default is all events i.e. the same as --itrace=ibxe + + In addition, the period (default 100000) for instructions events + can be specified in units of: + + i instructions + t ticks + ms milliseconds + us microseconds + ns nanoseconds (default) + + Also the call chain size (default 16, max. 1024) for instructions or + transactions events can be specified. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-report[1], linkperf:perf-archive[1] diff --git a/tools/perf/Documentation/perf-kmem.txt b/tools/perf/Documentation/perf-kmem.txt index 7c8fbbf3f61c..ff0f433b3fce 100644 --- a/tools/perf/Documentation/perf-kmem.txt +++ b/tools/perf/Documentation/perf-kmem.txt @@ -3,7 +3,7 @@ perf-kmem(1) NAME ---- -perf-kmem - Tool to trace/measure kernel memory(slab) properties +perf-kmem - Tool to trace/measure kernel memory properties SYNOPSIS -------- @@ -25,6 +25,10 @@ OPTIONS --input=<file>:: Select the input file (default: perf.data unless stdin is a fifo) +-v:: +--verbose:: + Be more verbose. (show symbol address, etc) + --caller:: Show per-callsite statistics @@ -33,7 +37,11 @@ OPTIONS -s <key[,key2...]>:: --sort=<key[,key2...]>:: - Sort the output (default: frag,hit,bytes) + Sort the output (default: 'frag,hit,bytes' for slab and 'bytes,hit' + for page). Available sort keys are 'ptr, callsite, bytes, hit, + pingpong, frag' for slab and 'page, callsite, bytes, hit, order, + migtype, gfp' for page. This option should be preceded by one of the + mode selection options - i.e. --slab, --page, --alloc and/or --caller. -l <num>:: --line=<num>:: @@ -42,6 +50,17 @@ OPTIONS --raw-ip:: Print raw ip instead of symbol +--slab:: + Analyze SLAB allocator events. + +--page:: + Analyze page allocator events + +--live:: + Show live page stat. The perf kmem shows total allocation stat by + default, but this option shows live (currently allocated) pages + instead. (This option works with --page option only) + SEE ALSO -------- linkperf:perf-record[1] diff --git a/tools/perf/Documentation/perf-kvm.txt b/tools/perf/Documentation/perf-kvm.txt index 6252e776009c..6a5bb2b17039 100644 --- a/tools/perf/Documentation/perf-kvm.txt +++ b/tools/perf/Documentation/perf-kvm.txt @@ -151,6 +151,12 @@ STAT LIVE OPTIONS Show events other than HLT (x86 only) or Wait state (s390 only) that take longer than duration usecs. +--proc-map-timeout:: + When processing pre-existing threads /proc/XXX/mmap, it may take + a long time, because the file may be huge. A time out is needed + in such cases. + This option sets the time out limit. The default value is 500 ms. + SEE ALSO -------- linkperf:perf-top[1], linkperf:perf-record[1], linkperf:perf-report[1], diff --git a/tools/perf/Documentation/perf-list.txt b/tools/perf/Documentation/perf-list.txt index 3e2aec94f806..bada8933fdd4 100644 --- a/tools/perf/Documentation/perf-list.txt +++ b/tools/perf/Documentation/perf-list.txt @@ -26,6 +26,7 @@ counted. The following modifiers exist: u - user-space counting k - kernel counting h - hypervisor counting + I - non idle counting G - guest counting (in KVM guests) H - host counting (not in KVM guests) p - precise level @@ -127,6 +128,12 @@ To limit the list use: One or more types can be used at the same time, listing the events for the types specified. +Support raw format: + +. '--raw-dump', shows the raw-dump of all the events. +. '--raw-dump [hw|sw|cache|tracepoint|pmu|event_glob]', shows the raw-dump of + a certain kind of events. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-top[1], diff --git a/tools/perf/Documentation/perf-probe.txt b/tools/perf/Documentation/perf-probe.txt index aaa869be3dc1..3a8a9ba2b041 100644 --- a/tools/perf/Documentation/perf-probe.txt +++ b/tools/perf/Documentation/perf-probe.txt @@ -14,11 +14,13 @@ or or 'perf probe' [options] --del='[GROUP:]EVENT' [...] or -'perf probe' --list +'perf probe' --list[=[GROUP:]EVENT] or 'perf probe' [options] --line='LINE' or 'perf probe' [options] --vars='PROBEPOINT' +or +'perf probe' [options] --funcs DESCRIPTION ----------- @@ -47,6 +49,12 @@ OPTIONS -v:: --verbose:: Be more verbose (show parsed arguments, etc). + Can not use with -q. + +-q:: +--quiet:: + Be quiet (do not show any messages including errors). + Can not use with -v. -a:: --add=:: @@ -58,8 +66,8 @@ OPTIONS classes(e.g. [a-z], [!A-Z]). -l:: ---list:: - List up current probe events. +--list[=[GROUP:]EVENT]:: + List up current probe events. This can also accept filtering patterns of event names. -L:: --line=:: @@ -75,10 +83,15 @@ OPTIONS (Only for --vars) Show external defined variables in addition to local variables. +--no-inlines:: + (Only for --add) Search only for non-inlined functions. The functions + which do not have instances are ignored. + -F:: ---funcs:: +--funcs[=FILTER]:: Show available functions in given module or kernel. With -x/--exec, can also list functions in a user space executable / shared library. + This also can accept a FILTER rule argument. --filter=FILTER:: (Only for --vars and --funcs) Set filter. FILTER is a combination of glob @@ -96,7 +109,7 @@ OPTIONS Dry run. With this option, --add and --del doesn't execute actual adding and removal operations. ---max-probes:: +--max-probes=NUM:: Set the maximum number of probe points for an event. Default is 128. -x:: @@ -104,8 +117,13 @@ OPTIONS Specify path to the executable or shared library file for user space tracing. Can also be used with --funcs option. +--demangle:: + Demangle application symbols. --no-demangle is also available + for disabling demangling. + --demangle-kernel:: - Demangle kernel symbols. + Demangle kernel symbols. --no-demangle-kernel is also available + for disabling kernel demangling. In absence of -m/-x options, perf probe checks if the first argument after the options is an absolute path name. If its an absolute path, perf probe @@ -137,6 +155,7 @@ Each probe argument follows below syntax. [NAME=]LOCALVAR|$retval|%REG|@SYMBOL[:TYPE] 'NAME' specifies the name of this argument (optional). You can use the name of local variable, local data structure member (e.g. var->field, var.field2), local array with fixed index (e.g. array[1], var->array[0], var->pointer[2]), or kprobe-tracer argument format (e.g. $retval, %ax, etc). Note that the name of this argument will be set as the last member name if you specify a local data structure member (e.g. field2 for 'var->field1.field2'.) +'$vars' and '$params' special arguments are also available for NAME, '$vars' is expanded to the local variables (including function parameters) which can access at given probe point. '$params' is expanded to only the function parameters. 'TYPE' casts the type of this argument (optional). If omitted, perf probe automatically set the type based on debuginfo. You can specify 'string' type only for the local variable or structure member which is an array of or a pointer to 'char' or 'unsigned char' type. On x86 systems %REG is always the short form of the register: for example %AX. %RAX or %EAX is not valid. diff --git a/tools/perf/Documentation/perf-record.txt b/tools/perf/Documentation/perf-record.txt index 31e977459c51..9b9d9d086680 100644 --- a/tools/perf/Documentation/perf-record.txt +++ b/tools/perf/Documentation/perf-record.txt @@ -55,6 +55,11 @@ OPTIONS If you want to profile write accesses in [0x1000~1008), just set 'mem:0x1000/8:w'. + - a group of events surrounded by a pair of brace ("{event1,event2,...}"). + Each event is separated by commas and the group should be quoted to + prevent the shell interpretation. You also need to use --group on + "perf report" to view group events together. + --filter=<filter>:: Event filter. @@ -62,9 +67,6 @@ OPTIONS --all-cpus:: System-wide collection from all CPUs. --l:: - Scale counter values. - -p:: --pid=:: Record events on existing process ID (comma separated list). @@ -106,6 +108,12 @@ OPTIONS Number of mmap data pages (must be a power of two) or size specification with appended unit character - B/K/M/G. The size is rounded up to have nearest pages power of two value. + Also, by adding a comma, the number of mmap pages for AUX + area tracing can be specified. + +--group:: + Put all events in a single event group. This precedes the --event + option and remains only for backward compatibility. See --event. -g:: Enables call-graph (stack chain/backtrace) recording. @@ -115,13 +123,19 @@ OPTIONS implies -g. Allows specifying "fp" (frame pointer) or "dwarf" - (DWARF's CFI - Call Frame Information) as the method to collect + (DWARF's CFI - Call Frame Information) or "lbr" + (Hardware Last Branch Record facility) as the method to collect the information used to show the call graphs. In some systems, where binaries are build with gcc --fomit-frame-pointer, using the "fp" method will produce bogus call graphs, using "dwarf", if available (perf tools linked to the libunwind library) should be used instead. + Using the "lbr" method doesn't require any compiler options. It + will produce call graphs from the hardware LBR registers. The + main limition is that it is only available on new Intel + platforms, such as Haswell. It can only get user call chain. It + doesn't work with branch stack sampling at the same time. -q:: --quiet:: @@ -133,16 +147,21 @@ OPTIONS -s:: --stat:: - Per thread counts. + Record per-thread event counts. Use it with 'perf report -T' to see + the values. -d:: --data:: - Sample addresses. + Record the sample addresses. -T:: --timestamp:: - Sample timestamps. Use it with 'perf report -D' to see the timestamps, - for instance. + Record the sample timestamps. Use it with 'perf report -D' to see the + timestamps, for instance. + +-P:: +--period:: + Record the sample period. -n:: --no-samples:: @@ -235,6 +254,28 @@ Capture machine state (registers) at interrupt, i.e., on counter overflows for each sample. List of captured registers depends on the architecture. This option is off by default. +--running-time:: +Record running and enabled time for read events (:S) + +-k:: +--clockid:: +Sets the clock id to use for the various time fields in the perf_event_type +records. See clock_gettime(). In particular CLOCK_MONOTONIC and +CLOCK_MONOTONIC_RAW are supported, some events might also allow +CLOCK_BOOTTIME, CLOCK_REALTIME and CLOCK_TAI. + +-S:: +--snapshot:: +Select AUX area tracing Snapshot Mode. This option is valid only with an +AUX area tracing event. Optionally the number of bytes to capture per +snapshot can be specified. In Snapshot Mode, trace data is captured only when +signal SIGUSR2 is received. + +--proc-map-timeout:: +When processing pre-existing threads /proc/XXX/mmap, it may take a long time, +because the file may be huge. A time out is needed in such cases. +This option sets the time out limit. The default value is 500 ms. + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-list[1] diff --git a/tools/perf/Documentation/perf-report.txt b/tools/perf/Documentation/perf-report.txt index dd7cccdde498..c33b69f3374f 100644 --- a/tools/perf/Documentation/perf-report.txt +++ b/tools/perf/Documentation/perf-report.txt @@ -34,12 +34,18 @@ OPTIONS -T:: --threads:: - Show per-thread event counters + Show per-thread event counters. The input data file should be recorded + with -s option. -c:: --comms=:: Only consider symbols in these comms. CSV that understands file://filename entries. This option will affect the percentage of the overhead column. See --percentage for more info. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). -d:: --dsos=:: Only consider symbols in these dsos. CSV that understands @@ -188,6 +194,7 @@ OPTIONS Accumulate callchain of children to parent entry so that then can show up in the output. The output will have a new "Children" column and will be sorted on the data. It requires callchains are recorded. + See the `overhead calculation' section for more details. --max-stack:: Set the stack depth limit when parsing the callchain, anything @@ -318,6 +325,37 @@ OPTIONS --header-only:: Show only perf.data header (forces --stdio). +--itrace:: + Options for decoding instruction tracing data. The options are: + + i synthesize instructions events + b synthesize branches events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + x synthesize transactions events + e synthesize error events + d create a debug log + g synthesize a call chain (use with i or x) + + The default is all events i.e. the same as --itrace=ibxe + + In addition, the period (default 100000) for instructions events + can be specified in units of: + + i instructions + t ticks + ms milliseconds + us microseconds + ns nanoseconds (default) + + Also the call chain size (default 16, max. 1024) for instructions or + transactions events can be specified. + + To disable decoding entirely, use --no-itrace. + + +include::callchain-overhead-calculation.txt[] + SEE ALSO -------- linkperf:perf-stat[1], linkperf:perf-annotate[1] diff --git a/tools/perf/Documentation/perf-script.txt b/tools/perf/Documentation/perf-script.txt index a21eec05bc42..c82df572fac2 100644 --- a/tools/perf/Documentation/perf-script.txt +++ b/tools/perf/Documentation/perf-script.txt @@ -115,7 +115,8 @@ OPTIONS -f:: --fields:: Comma separated list of fields to print. Options are: - comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, srcline, period. + comm, tid, pid, time, cpu, event, trace, ip, sym, dso, addr, symoff, + srcline, period, flags. Field list can be prepended with the type, trace, sw or hw, to indicate to which event type the field list applies. e.g., -f sw:comm,tid,time,ip,sym and -f trace:time,cpu,trace @@ -165,6 +166,12 @@ OPTIONS At this point usage is displayed, and perf-script exits. + The flags field is synthesized and may have a value when Instruction + Trace decoding. The flags are "bcrosyiABEx" which stand for branch, + call, return, conditional, system, asynchronous, interrupt, + transaction abort, trace begin, trace end, and in transaction, + respectively. + Finally, a user may not set fields to none for all event types. i.e., -f "" is not allowed. @@ -193,6 +200,12 @@ OPTIONS Only display events for these comms. CSV that understands file://filename entries. +--pid=:: + Only show events for given process ID (comma separated list). + +--tid=:: + Only show events for given thread ID (comma separated list). + -I:: --show-info:: Display extended information about the perf.data file. This adds @@ -215,6 +228,34 @@ OPTIONS --header-only Show only perf.data header. +--itrace:: + Options for decoding instruction tracing data. The options are: + + i synthesize instructions events + b synthesize branches events + c synthesize branches events (calls only) + r synthesize branches events (returns only) + x synthesize transactions events + e synthesize error events + d create a debug log + g synthesize a call chain (use with i or x) + + The default is all events i.e. the same as --itrace=ibxe + + In addition, the period (default 100000) for instructions events + can be specified in units of: + + i instructions + t ticks + ms milliseconds + us microseconds + ns nanoseconds (default) + + Also the call chain size (default 16, max. 1024) for instructions or + transactions events can be specified. + + To disable decoding entirely, use --no-itrace. + SEE ALSO -------- linkperf:perf-record[1], linkperf:perf-script-perl[1], diff --git a/tools/perf/Documentation/perf-stat.txt b/tools/perf/Documentation/perf-stat.txt index 04e150d83e7d..47469abdcc1c 100644 --- a/tools/perf/Documentation/perf-stat.txt +++ b/tools/perf/Documentation/perf-stat.txt @@ -144,6 +144,10 @@ is a useful mode to detect imbalance between physical cores. To enable this mod use --per-core in addition to -a. (system-wide). The output includes the core number and the number of online logical processors on that physical processor. +--per-thread:: +Aggregate counts per monitored threads, when monitoring threads (-t option) +or processes (-p option). + -D msecs:: --delay msecs:: After starting the program, wait msecs before measuring. This is useful to diff --git a/tools/perf/Documentation/perf-top.txt b/tools/perf/Documentation/perf-top.txt index 3265b1070518..776aec4d0927 100644 --- a/tools/perf/Documentation/perf-top.txt +++ b/tools/perf/Documentation/perf-top.txt @@ -168,7 +168,7 @@ Default is to monitor all CPUS. Accumulate callchain of children to parent entry so that then can show up in the output. The output will have a new "Children" column and will be sorted on the data. It requires -g/--call-graph option - enabled. + enabled. See the `overhead calculation' section for more details. --max-stack:: Set the stack depth limit when parsing the callchain, anything @@ -201,6 +201,12 @@ Default is to monitor all CPUS. Force each column width to the provided list, for large terminal readability. 0 means no limit (default behavior). +--proc-map-timeout:: + When processing pre-existing threads /proc/XXX/mmap, it may take + a long time, because the file may be huge. A time out is needed + in such cases. + This option sets the time out limit. The default value is 500 ms. + INTERACTIVE PROMPTING KEYS -------------------------- @@ -234,6 +240,7 @@ INTERACTIVE PROMPTING KEYS Pressing any unmapped key displays a menu, and prompts for input. +include::callchain-overhead-calculation.txt[] SEE ALSO -------- diff --git a/tools/perf/Documentation/perf-trace.txt b/tools/perf/Documentation/perf-trace.txt index 7e1b1f2bb83c..7ea078658a87 100644 --- a/tools/perf/Documentation/perf-trace.txt +++ b/tools/perf/Documentation/perf-trace.txt @@ -35,7 +35,7 @@ OPTIONS -e:: --expr:: - List of events to show, currently only syscall names. + List of syscalls to show, currently only syscall names. Prefixing with ! shows all syscalls but the ones specified. You may need to escape it. @@ -55,6 +55,9 @@ OPTIONS --uid=:: Record events in threads owned by uid. Name or number. +--filter-pids=:: + Filter out events for these pids and for 'trace' itself (comma separated list). + -v:: --verbose=:: Verbosity level. @@ -115,6 +118,14 @@ the thread executes on the designated CPUs. Default is to monitor all CPUs. --syscalls:: Trace system calls. This options is enabled by default. +--event:: + Trace other events, see 'perf list' for a complete list. + +--proc-map-timeout:: + When processing pre-existing threads /proc/XXX/mmap, it may take a long time, + because the file may be huge. A time out is needed in such cases. + This option sets the time out limit. The default value is 500 ms. + PAGEFAULTS ---------- diff --git a/tools/perf/Documentation/perf.txt b/tools/perf/Documentation/perf.txt index 1e8e400b4493..2b131776363e 100644 --- a/tools/perf/Documentation/perf.txt +++ b/tools/perf/Documentation/perf.txt @@ -13,11 +13,16 @@ SYNOPSIS OPTIONS ------- --debug:: - Setup debug variable (just verbose for now) in value + Setup debug variable (see list below) in value range (0, 10). Use like: --debug verbose # sets verbose = 1 --debug verbose=2 # sets verbose = 2 + List of debug variables allowed to set: + verbose - general debug messages + ordered-events - ordered events object debug messages + data-convert - data convert command debug messages + --buildid-dir:: Setup buildid cache directory. It has higher priority than buildid.dir config file option. diff --git a/tools/perf/MANIFEST b/tools/perf/MANIFEST index fbbfdc39271d..d01a0aad5a01 100644 --- a/tools/perf/MANIFEST +++ b/tools/perf/MANIFEST @@ -1,11 +1,32 @@ tools/perf +tools/arch/alpha/include/asm/barrier.h +tools/arch/arm/include/asm/barrier.h +tools/arch/ia64/include/asm/barrier.h +tools/arch/mips/include/asm/barrier.h +tools/arch/powerpc/include/asm/barrier.h +tools/arch/s390/include/asm/barrier.h +tools/arch/sh/include/asm/barrier.h +tools/arch/sparc/include/asm/barrier.h +tools/arch/sparc/include/asm/barrier_32.h +tools/arch/sparc/include/asm/barrier_64.h +tools/arch/tile/include/asm/barrier.h +tools/arch/x86/include/asm/barrier.h +tools/arch/xtensa/include/asm/barrier.h tools/scripts +tools/build +tools/arch/x86/include/asm/atomic.h +tools/arch/x86/include/asm/rmwcc.h tools/lib/traceevent tools/lib/api +tools/lib/hweight.c +tools/lib/rbtree.c tools/lib/symbol/kallsyms.c tools/lib/symbol/kallsyms.h tools/lib/util/find_next_bit.c +tools/include/asm/atomic.h +tools/include/asm/barrier.h tools/include/asm/bug.h +tools/include/asm-generic/barrier.h tools/include/asm-generic/bitops/arch_hweight.h tools/include/asm-generic/bitops/atomic.h tools/include/asm-generic/bitops/const_hweight.h @@ -16,37 +37,35 @@ tools/include/asm-generic/bitops/fls64.h tools/include/asm-generic/bitops/fls.h tools/include/asm-generic/bitops/hweight.h tools/include/asm-generic/bitops.h +tools/include/linux/atomic.h tools/include/linux/bitops.h tools/include/linux/compiler.h tools/include/linux/export.h tools/include/linux/hash.h +tools/include/linux/kernel.h +tools/include/linux/list.h tools/include/linux/log2.h +tools/include/linux/poison.h +tools/include/linux/rbtree.h +tools/include/linux/rbtree_augmented.h tools/include/linux/types.h include/asm-generic/bitops/arch_hweight.h include/asm-generic/bitops/const_hweight.h include/asm-generic/bitops/fls64.h include/asm-generic/bitops/__fls.h include/asm-generic/bitops/fls.h -include/linux/const.h include/linux/perf_event.h -include/linux/rbtree.h include/linux/list.h include/linux/hash.h include/linux/stringify.h -lib/find_next_bit.c -lib/hweight.c -lib/rbtree.c include/linux/swab.h arch/*/include/asm/unistd*.h -arch/*/include/asm/perf_regs.h arch/*/include/uapi/asm/unistd*.h arch/*/include/uapi/asm/perf_regs.h arch/*/lib/memcpy*.S arch/*/lib/memset*.S include/linux/poison.h -include/linux/magic.h include/linux/hw_breakpoint.h -include/linux/rbtree_augmented.h include/uapi/linux/perf_event.h include/uapi/linux/const.h include/uapi/linux/swab.h diff --git a/tools/perf/Makefile b/tools/perf/Makefile index cb2e5868c8e8..480546d5f13b 100644 --- a/tools/perf/Makefile +++ b/tools/perf/Makefile @@ -24,8 +24,8 @@ unexport MAKEFLAGS # (To override it, run 'make JOBS=1' and similar.) # ifeq ($(JOBS),) - JOBS := $(shell grep -c ^processor /proc/cpuinfo 2>/dev/null) - ifeq ($(JOBS),) + JOBS := $(shell (getconf _NPROCESSORS_ONLN || egrep -c '^processor|^CPU[0-9]' /proc/cpuinfo) 2>/dev/null) + ifeq ($(JOBS),0) JOBS := 1 endif endif @@ -83,8 +83,8 @@ build-test: # # All other targets get passed through: # -%: +%: FORCE $(print_msg) $(make) -.PHONY: tags TAGS +.PHONY: tags TAGS FORCE Makefile diff --git a/tools/perf/Makefile.perf b/tools/perf/Makefile.perf index aa6a50447c32..bba34636b733 100644 --- a/tools/perf/Makefile.perf +++ b/tools/perf/Makefile.perf @@ -68,7 +68,13 @@ include config/utilities.mak # for reading the x32 mode 32-bit compatibility VDSO in 64-bit mode # # Define NO_ZLIB if you do not want to support compressed kernel modules - +# +# Define LIBBABELTRACE if you DO want libbabeltrace support +# for CTF data format. +# +# Define NO_LZMA if you do not want to support compressed (xz) kernel modules +# +# Define NO_AUXTRACE if you do not want AUX area tracing support ifeq ($(srctree),) srctree := $(patsubst %/,%,$(dir $(shell pwd))) @@ -82,14 +88,43 @@ endif ifneq ($(OUTPUT),) #$(info Determined 'OUTPUT' to be $(OUTPUT)) +# Adding $(OUTPUT) as a directory to look for source files, +# because use generated output files as sources dependency +# for flex/bison parsers. +VPATH += $(OUTPUT) +export VPATH endif +ifeq ($(V),1) + Q = +else + Q = @ +endif + +# Do not use make's built-in rules +# (this improves performance and avoids hard-to-debug behaviour); +MAKEFLAGS += -r + $(OUTPUT)PERF-VERSION-FILE: ../../.git/HEAD - @$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) - @touch $(OUTPUT)PERF-VERSION-FILE + $(Q)$(SHELL_PATH) util/PERF-VERSION-GEN $(OUTPUT) + $(Q)touch $(OUTPUT)PERF-VERSION-FILE + +# Makefiles suck: This macro sets a default value of $(2) for the +# variable named by $(1), unless the variable has been set by +# environment or command line. This is necessary for CC and AR +# because make sets default values, so the simpler ?= approach +# won't work as expected. +define allow-override + $(if $(or $(findstring environment,$(origin $(1))),\ + $(findstring command line,$(origin $(1)))),,\ + $(eval $(1) = $(2))) +endef + +# Allow setting CC and AR and LD, or setting CROSS_COMPILE as a prefix. +$(call allow-override,CC,$(CROSS_COMPILE)gcc) +$(call allow-override,AR,$(CROSS_COMPILE)ar) +$(call allow-override,LD,$(CROSS_COMPILE)ld) -CC = $(CROSS_COMPILE)gcc -AR = $(CROSS_COMPILE)ar PKG_CONFIG = $(CROSS_COMPILE)pkg-config RM = rm -f @@ -127,10 +162,6 @@ export prefix bindir sharedir sysconfdir DESTDIR SPARSE_FLAGS = -D__BIG_ENDIAN__ -D__powerpc__ # Guard against environment variables -BUILTIN_OBJS = -LIB_H = -LIB_OBJS = -GTK_OBJS = PYRF_OBJS = SCRIPT_SH = @@ -155,8 +186,11 @@ endif LIBTRACEEVENT = $(TE_PATH)libtraceevent.a export LIBTRACEEVENT -LIBAPIKFS = $(LIB_PATH)libapikfs.a -export LIBAPIKFS +LIBTRACEEVENT_DYNAMIC_LIST = $(TE_PATH)libtraceevent-dynamic-list +LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS = -Xlinker --dynamic-list=$(LIBTRACEEVENT_DYNAMIC_LIST) + +LIBAPI = $(LIB_PATH)libapi.a +export LIBAPI # python extension build directories PYTHON_EXTBUILD := $(OUTPUT)python_ext_build/ @@ -167,10 +201,11 @@ export PYTHON_EXTBUILD_LIB PYTHON_EXTBUILD_TMP python-clean := $(call QUIET_CLEAN, python) $(RM) -r $(PYTHON_EXTBUILD) $(OUTPUT)python/perf.so PYTHON_EXT_SRCS := $(shell grep -v ^\# util/python-ext-sources) -PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPIKFS) +PYTHON_EXT_DEPS := util/python-ext-sources util/setup.py $(LIBTRACEEVENT) $(LIBAPI) -$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) - $(QUIET_GEN)CFLAGS='$(CFLAGS)' $(PYTHON_WORD) util/setup.py \ +$(OUTPUT)python/perf.so: $(PYTHON_EXT_SRCS) $(PYTHON_EXT_DEPS) $(LIBTRACEEVENT_DYNAMIC_LIST) + $(QUIET_GEN)CFLAGS='$(CFLAGS)' LDFLAGS='$(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS)' \ + $(PYTHON_WORD) util/setup.py \ --quiet build_ext; \ mkdir -p $(OUTPUT)python && \ cp $(PYTHON_EXTBUILD_LIB)perf.so $(OUTPUT)python/ @@ -206,297 +241,9 @@ endif export PERL_PATH -$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l - -$(OUTPUT)util/parse-events-bison.c: util/parse-events.y - $(QUIET_BISON)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $(OUTPUT)util/parse-events-bison.c -p parse_events_ - -$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c - $(QUIET_FLEX)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l - -$(OUTPUT)util/pmu-bison.c: util/pmu.y - $(QUIET_BISON)$(BISON) -v util/pmu.y -d -o $(OUTPUT)util/pmu-bison.c -p perf_pmu_ - -$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c -$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c - LIB_FILE=$(OUTPUT)libperf.a -LIB_H += ../lib/symbol/kallsyms.h -LIB_H += ../../include/uapi/linux/perf_event.h -LIB_H += ../../include/linux/rbtree.h -LIB_H += ../../include/linux/list.h -LIB_H += ../../include/uapi/linux/const.h -LIB_H += ../include/linux/hash.h -LIB_H += ../../include/linux/stringify.h -LIB_H += util/include/linux/bitmap.h -LIB_H += ../include/linux/bitops.h -LIB_H += ../include/asm-generic/bitops/arch_hweight.h -LIB_H += ../include/asm-generic/bitops/atomic.h -LIB_H += ../include/asm-generic/bitops/const_hweight.h -LIB_H += ../include/asm-generic/bitops/find.h -LIB_H += ../include/asm-generic/bitops/fls64.h -LIB_H += ../include/asm-generic/bitops/fls.h -LIB_H += ../include/asm-generic/bitops/__ffs.h -LIB_H += ../include/asm-generic/bitops/__fls.h -LIB_H += ../include/asm-generic/bitops/hweight.h -LIB_H += ../include/asm-generic/bitops.h -LIB_H += ../include/linux/compiler.h -LIB_H += ../include/linux/log2.h -LIB_H += util/include/linux/const.h -LIB_H += util/include/linux/ctype.h -LIB_H += util/include/linux/kernel.h -LIB_H += util/include/linux/list.h -LIB_H += ../include/linux/export.h -LIB_H += util/include/linux/poison.h -LIB_H += util/include/linux/rbtree.h -LIB_H += util/include/linux/rbtree_augmented.h -LIB_H += util/include/linux/string.h -LIB_H += ../include/linux/types.h -LIB_H += util/include/linux/linkage.h -LIB_H += util/include/asm/asm-offsets.h -LIB_H += ../include/asm/bug.h -LIB_H += util/include/asm/byteorder.h -LIB_H += util/include/asm/swab.h -LIB_H += util/include/asm/system.h -LIB_H += util/include/asm/uaccess.h -LIB_H += util/include/dwarf-regs.h -LIB_H += util/include/asm/dwarf2.h -LIB_H += util/include/asm/cpufeature.h -LIB_H += util/include/asm/unistd_32.h -LIB_H += util/include/asm/unistd_64.h -LIB_H += perf.h -LIB_H += util/annotate.h -LIB_H += util/cache.h -LIB_H += util/callchain.h -LIB_H += util/build-id.h -LIB_H += util/db-export.h -LIB_H += util/debug.h -LIB_H += util/pmu.h -LIB_H += util/event.h -LIB_H += util/evsel.h -LIB_H += util/evlist.h -LIB_H += util/exec_cmd.h -LIB_H += util/find-vdso-map.c -LIB_H += util/levenshtein.h -LIB_H += util/machine.h -LIB_H += util/map.h -LIB_H += util/parse-options.h -LIB_H += util/parse-events.h -LIB_H += util/quote.h -LIB_H += util/util.h -LIB_H += util/xyarray.h -LIB_H += util/header.h -LIB_H += util/help.h -LIB_H += util/session.h -LIB_H += util/ordered-events.h -LIB_H += util/strbuf.h -LIB_H += util/strlist.h -LIB_H += util/strfilter.h -LIB_H += util/svghelper.h -LIB_H += util/tool.h -LIB_H += util/run-command.h -LIB_H += util/sigchain.h -LIB_H += util/dso.h -LIB_H += util/symbol.h -LIB_H += util/color.h -LIB_H += util/values.h -LIB_H += util/sort.h -LIB_H += util/hist.h -LIB_H += util/comm.h -LIB_H += util/thread.h -LIB_H += util/thread_map.h -LIB_H += util/trace-event.h -LIB_H += util/probe-finder.h -LIB_H += util/dwarf-aux.h -LIB_H += util/probe-event.h -LIB_H += util/pstack.h -LIB_H += util/cpumap.h -LIB_H += util/top.h -LIB_H += $(ARCH_INCLUDE) -LIB_H += util/cgroup.h -LIB_H += $(LIB_INCLUDE)traceevent/event-parse.h -LIB_H += util/target.h -LIB_H += util/rblist.h -LIB_H += util/intlist.h -LIB_H += util/perf_regs.h -LIB_H += util/unwind.h -LIB_H += util/vdso.h -LIB_H += util/tsc.h -LIB_H += ui/helpline.h -LIB_H += ui/progress.h -LIB_H += ui/util.h -LIB_H += ui/ui.h -LIB_H += util/data.h -LIB_H += util/kvm-stat.h -LIB_H += util/thread-stack.h - -LIB_OBJS += $(OUTPUT)util/abspath.o -LIB_OBJS += $(OUTPUT)util/alias.o -LIB_OBJS += $(OUTPUT)util/annotate.o -LIB_OBJS += $(OUTPUT)util/build-id.o -LIB_OBJS += $(OUTPUT)util/config.o -LIB_OBJS += $(OUTPUT)util/ctype.o -LIB_OBJS += $(OUTPUT)util/db-export.o -LIB_OBJS += $(OUTPUT)util/pmu.o -LIB_OBJS += $(OUTPUT)util/environment.o -LIB_OBJS += $(OUTPUT)util/event.o -LIB_OBJS += $(OUTPUT)util/evlist.o -LIB_OBJS += $(OUTPUT)util/evsel.o -LIB_OBJS += $(OUTPUT)util/exec_cmd.o -LIB_OBJS += $(OUTPUT)util/find_next_bit.o -LIB_OBJS += $(OUTPUT)util/help.o -LIB_OBJS += $(OUTPUT)util/kallsyms.o -LIB_OBJS += $(OUTPUT)util/levenshtein.o -LIB_OBJS += $(OUTPUT)util/parse-options.o -LIB_OBJS += $(OUTPUT)util/parse-events.o -LIB_OBJS += $(OUTPUT)util/path.o -LIB_OBJS += $(OUTPUT)util/rbtree.o -LIB_OBJS += $(OUTPUT)util/bitmap.o -LIB_OBJS += $(OUTPUT)util/hweight.o -LIB_OBJS += $(OUTPUT)util/run-command.o -LIB_OBJS += $(OUTPUT)util/quote.o -LIB_OBJS += $(OUTPUT)util/strbuf.o -LIB_OBJS += $(OUTPUT)util/string.o -LIB_OBJS += $(OUTPUT)util/strlist.o -LIB_OBJS += $(OUTPUT)util/strfilter.o -LIB_OBJS += $(OUTPUT)util/top.o -LIB_OBJS += $(OUTPUT)util/usage.o -LIB_OBJS += $(OUTPUT)util/wrapper.o -LIB_OBJS += $(OUTPUT)util/sigchain.o -LIB_OBJS += $(OUTPUT)util/dso.o -LIB_OBJS += $(OUTPUT)util/symbol.o -LIB_OBJS += $(OUTPUT)util/symbol-elf.o -LIB_OBJS += $(OUTPUT)util/color.o -LIB_OBJS += $(OUTPUT)util/pager.o -LIB_OBJS += $(OUTPUT)util/header.o -LIB_OBJS += $(OUTPUT)util/callchain.o -LIB_OBJS += $(OUTPUT)util/values.o -LIB_OBJS += $(OUTPUT)util/debug.o -LIB_OBJS += $(OUTPUT)util/machine.o -LIB_OBJS += $(OUTPUT)util/map.o -LIB_OBJS += $(OUTPUT)util/pstack.o -LIB_OBJS += $(OUTPUT)util/session.o -LIB_OBJS += $(OUTPUT)util/ordered-events.o -LIB_OBJS += $(OUTPUT)util/comm.o -LIB_OBJS += $(OUTPUT)util/thread.o -LIB_OBJS += $(OUTPUT)util/thread_map.o -LIB_OBJS += $(OUTPUT)util/trace-event-parse.o -LIB_OBJS += $(OUTPUT)util/parse-events-flex.o -LIB_OBJS += $(OUTPUT)util/parse-events-bison.o -LIB_OBJS += $(OUTPUT)util/pmu-flex.o -LIB_OBJS += $(OUTPUT)util/pmu-bison.o -LIB_OBJS += $(OUTPUT)util/trace-event-read.o -LIB_OBJS += $(OUTPUT)util/trace-event-info.o -LIB_OBJS += $(OUTPUT)util/trace-event-scripting.o -LIB_OBJS += $(OUTPUT)util/trace-event.o -LIB_OBJS += $(OUTPUT)util/svghelper.o -LIB_OBJS += $(OUTPUT)util/sort.o -LIB_OBJS += $(OUTPUT)util/hist.o -LIB_OBJS += $(OUTPUT)util/probe-event.o -LIB_OBJS += $(OUTPUT)util/util.o -LIB_OBJS += $(OUTPUT)util/xyarray.o -LIB_OBJS += $(OUTPUT)util/cpumap.o -LIB_OBJS += $(OUTPUT)util/cgroup.o -LIB_OBJS += $(OUTPUT)util/target.o -LIB_OBJS += $(OUTPUT)util/rblist.o -LIB_OBJS += $(OUTPUT)util/intlist.o -LIB_OBJS += $(OUTPUT)util/vdso.o -LIB_OBJS += $(OUTPUT)util/stat.o -LIB_OBJS += $(OUTPUT)util/record.o -LIB_OBJS += $(OUTPUT)util/srcline.o -LIB_OBJS += $(OUTPUT)util/data.o -LIB_OBJS += $(OUTPUT)util/tsc.o -LIB_OBJS += $(OUTPUT)util/cloexec.o -LIB_OBJS += $(OUTPUT)util/thread-stack.o - -LIB_OBJS += $(OUTPUT)ui/setup.o -LIB_OBJS += $(OUTPUT)ui/helpline.o -LIB_OBJS += $(OUTPUT)ui/progress.o -LIB_OBJS += $(OUTPUT)ui/util.o -LIB_OBJS += $(OUTPUT)ui/hist.o -LIB_OBJS += $(OUTPUT)ui/stdio/hist.o - -LIB_OBJS += $(OUTPUT)arch/common.o - -LIB_OBJS += $(OUTPUT)tests/parse-events.o -LIB_OBJS += $(OUTPUT)tests/dso-data.o -LIB_OBJS += $(OUTPUT)tests/attr.o -LIB_OBJS += $(OUTPUT)tests/vmlinux-kallsyms.o -LIB_OBJS += $(OUTPUT)tests/open-syscall.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-all-cpus.o -LIB_OBJS += $(OUTPUT)tests/open-syscall-tp-fields.o -LIB_OBJS += $(OUTPUT)tests/mmap-basic.o -LIB_OBJS += $(OUTPUT)tests/perf-record.o -LIB_OBJS += $(OUTPUT)tests/rdpmc.o -LIB_OBJS += $(OUTPUT)tests/evsel-roundtrip-name.o -LIB_OBJS += $(OUTPUT)tests/evsel-tp-sched.o -LIB_OBJS += $(OUTPUT)tests/fdarray.o -LIB_OBJS += $(OUTPUT)tests/pmu.o -LIB_OBJS += $(OUTPUT)tests/hists_common.o -LIB_OBJS += $(OUTPUT)tests/hists_link.o -LIB_OBJS += $(OUTPUT)tests/hists_filter.o -LIB_OBJS += $(OUTPUT)tests/hists_output.o -LIB_OBJS += $(OUTPUT)tests/hists_cumulate.o -LIB_OBJS += $(OUTPUT)tests/python-use.o -LIB_OBJS += $(OUTPUT)tests/bp_signal.o -LIB_OBJS += $(OUTPUT)tests/bp_signal_overflow.o -LIB_OBJS += $(OUTPUT)tests/task-exit.o -LIB_OBJS += $(OUTPUT)tests/sw-clock.o -ifeq ($(ARCH),x86) -LIB_OBJS += $(OUTPUT)tests/perf-time-to-tsc.o -endif -LIB_OBJS += $(OUTPUT)tests/code-reading.o -LIB_OBJS += $(OUTPUT)tests/sample-parsing.o -LIB_OBJS += $(OUTPUT)tests/parse-no-sample-id-all.o -ifndef NO_DWARF_UNWIND -ifeq ($(ARCH),$(filter $(ARCH),x86 arm)) -LIB_OBJS += $(OUTPUT)tests/dwarf-unwind.o -endif -endif -LIB_OBJS += $(OUTPUT)tests/mmap-thread-lookup.o -LIB_OBJS += $(OUTPUT)tests/thread-mg-share.o -LIB_OBJS += $(OUTPUT)tests/switch-tracking.o - -BUILTIN_OBJS += $(OUTPUT)builtin-annotate.o -BUILTIN_OBJS += $(OUTPUT)builtin-bench.o -# Benchmark modules -BUILTIN_OBJS += $(OUTPUT)bench/sched-messaging.o -BUILTIN_OBJS += $(OUTPUT)bench/sched-pipe.o -ifeq ($(ARCH), x86) -ifeq ($(IS_64_BIT), 1) -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy-x86-64-asm.o -BUILTIN_OBJS += $(OUTPUT)bench/mem-memset-x86-64-asm.o -endif -endif -BUILTIN_OBJS += $(OUTPUT)bench/mem-memcpy.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-hash.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-wake.o -BUILTIN_OBJS += $(OUTPUT)bench/futex-requeue.o - -BUILTIN_OBJS += $(OUTPUT)builtin-diff.o -BUILTIN_OBJS += $(OUTPUT)builtin-evlist.o -BUILTIN_OBJS += $(OUTPUT)builtin-help.o -BUILTIN_OBJS += $(OUTPUT)builtin-sched.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-buildid-cache.o -BUILTIN_OBJS += $(OUTPUT)builtin-list.o -BUILTIN_OBJS += $(OUTPUT)builtin-record.o -BUILTIN_OBJS += $(OUTPUT)builtin-report.o -BUILTIN_OBJS += $(OUTPUT)builtin-stat.o -BUILTIN_OBJS += $(OUTPUT)builtin-timechart.o -BUILTIN_OBJS += $(OUTPUT)builtin-top.o -BUILTIN_OBJS += $(OUTPUT)builtin-script.o -BUILTIN_OBJS += $(OUTPUT)builtin-probe.o -BUILTIN_OBJS += $(OUTPUT)builtin-kmem.o -BUILTIN_OBJS += $(OUTPUT)builtin-lock.o -BUILTIN_OBJS += $(OUTPUT)builtin-kvm.o -BUILTIN_OBJS += $(OUTPUT)builtin-inject.o -BUILTIN_OBJS += $(OUTPUT)tests/builtin-test.o -BUILTIN_OBJS += $(OUTPUT)builtin-mem.o - -PERFLIBS = $(LIB_FILE) $(LIBAPIKFS) $(LIBTRACEEVENT) +PERFLIBS = $(LIB_FILE) $(LIBAPI) $(LIBTRACEEVENT) # We choose to avoid "if .. else if .. else .. endif endif" # because maintaining the nesting to match is a pain. If @@ -508,67 +255,9 @@ ifneq ($(OUTPUT),) CFLAGS += -I$(OUTPUT) endif -ifdef NO_LIBELF -# Remove ELF/DWARF dependent codes -LIB_OBJS := $(filter-out $(OUTPUT)util/symbol-elf.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/dwarf-aux.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-event.o,$(LIB_OBJS)) -LIB_OBJS := $(filter-out $(OUTPUT)util/probe-finder.o,$(LIB_OBJS)) - -BUILTIN_OBJS := $(filter-out $(OUTPUT)builtin-probe.o,$(BUILTIN_OBJS)) - -# Use minimal symbol handling -LIB_OBJS += $(OUTPUT)util/symbol-minimal.o - -else # NO_LIBELF -ifndef NO_DWARF - LIB_OBJS += $(OUTPUT)util/probe-finder.o - LIB_OBJS += $(OUTPUT)util/dwarf-aux.o -endif # NO_DWARF -endif # NO_LIBELF - -ifndef NO_LIBDW_DWARF_UNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libdw.o - LIB_H += util/unwind-libdw.h -endif - -ifndef NO_LIBUNWIND - LIB_OBJS += $(OUTPUT)util/unwind-libunwind.o -endif -LIB_OBJS += $(OUTPUT)tests/keep-tracking.o - -ifndef NO_LIBAUDIT - BUILTIN_OBJS += $(OUTPUT)builtin-trace.o -endif - -ifndef NO_SLANG - LIB_OBJS += $(OUTPUT)ui/browser.o - LIB_OBJS += $(OUTPUT)ui/browsers/annotate.o - LIB_OBJS += $(OUTPUT)ui/browsers/hists.o - LIB_OBJS += $(OUTPUT)ui/browsers/map.o - LIB_OBJS += $(OUTPUT)ui/browsers/scripts.o - LIB_OBJS += $(OUTPUT)ui/browsers/header.o - LIB_OBJS += $(OUTPUT)ui/tui/setup.o - LIB_OBJS += $(OUTPUT)ui/tui/util.o - LIB_OBJS += $(OUTPUT)ui/tui/helpline.o - LIB_OBJS += $(OUTPUT)ui/tui/progress.o - LIB_H += ui/tui/tui.h - LIB_H += ui/browser.h - LIB_H += ui/browsers/map.h - LIB_H += ui/keysyms.h - LIB_H += ui/libslang.h -endif - ifndef NO_GTK2 ALL_PROGRAMS += $(OUTPUT)libperf-gtk.so - - GTK_OBJS += $(OUTPUT)ui/gtk/browser.o - GTK_OBJS += $(OUTPUT)ui/gtk/hists.o - GTK_OBJS += $(OUTPUT)ui/gtk/setup.o - GTK_OBJS += $(OUTPUT)ui/gtk/util.o - GTK_OBJS += $(OUTPUT)ui/gtk/helpline.o - GTK_OBJS += $(OUTPUT)ui/gtk/progress.o - GTK_OBJS += $(OUTPUT)ui/gtk/annotate.o + GTK_IN := $(OUTPUT)gtk-in.o install-gtk: $(OUTPUT)libperf-gtk.so $(call QUIET_INSTALL, 'GTK UI') \ @@ -576,31 +265,6 @@ install-gtk: $(OUTPUT)libperf-gtk.so $(INSTALL) $(OUTPUT)libperf-gtk.so '$(DESTDIR_SQ)$(libdir_SQ)' endif -ifndef NO_LIBPERL - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-perl.o - LIB_OBJS += $(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o -endif - -ifndef NO_LIBPYTHON - LIB_OBJS += $(OUTPUT)util/scripting-engines/trace-event-python.o - LIB_OBJS += $(OUTPUT)scripts/python/Perf-Trace-Util/Context.o -endif - -ifeq ($(NO_PERF_REGS),0) - ifeq ($(ARCH),x86) - LIB_H += arch/x86/include/perf_regs.h - endif - LIB_OBJS += $(OUTPUT)util/perf_regs.o -endif - -ifndef NO_LIBNUMA - BUILTIN_OBJS += $(OUTPUT)bench/numa.o -endif - -ifndef NO_ZLIB - LIB_OBJS += $(OUTPUT)util/zlib.o -endif - ifdef ASCIIDOC8 export ASCIIDOC8 endif @@ -616,39 +280,30 @@ SHELL = $(SHELL_PATH) all: shell_compatibility_test $(ALL_PROGRAMS) $(LANG_BINDINGS) $(OTHER_PROGRAMS) please_set_SHELL_PATH_to_a_more_modern_shell: - @$$(:) + $(Q)$$(:) shell_compatibility_test: please_set_SHELL_PATH_to_a_more_modern_shell strip: $(PROGRAMS) $(OUTPUT)perf $(STRIP) $(STRIP_OPTS) $(PROGRAMS) $(OUTPUT)perf -$(OUTPUT)perf.o: perf.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -include $(OUTPUT)PERF-VERSION-FILE \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - $(CFLAGS) -c $(filter %.c,$^) -o $@ +PERF_IN := $(OUTPUT)perf-in.o -$(OUTPUT)perf: $(OUTPUT)perf.o $(BUILTIN_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(OUTPUT)perf.o \ - $(BUILTIN_OBJS) $(LIBS) -o $@ +export srctree OUTPUT RM CC LD AR CFLAGS V BISON FLEX +build := -f $(srctree)/tools/build/Makefile.build dir=. obj -$(GTK_OBJS): $(OUTPUT)%.o: %.c $(LIB_H) - $(QUIET_CC)$(CC) -o $@ -c -fPIC $(CFLAGS) $(GTK_CFLAGS) $< +$(PERF_IN): $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h FORCE + $(Q)$(MAKE) $(build)=perf -$(OUTPUT)libperf-gtk.so: $(GTK_OBJS) $(PERFLIBS) - $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) +$(OUTPUT)perf: $(PERFLIBS) $(PERF_IN) $(LIBTRACEEVENT_DYNAMIC_LIST) + $(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) $(LIBTRACEEVENT_DYNAMIC_LIST_LDFLAGS) \ + $(PERF_IN) $(LIBS) -o $@ -$(OUTPUT)builtin-help.o: builtin-help.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(GTK_IN): FORCE + $(Q)$(MAKE) $(build)=gtk -$(OUTPUT)builtin-timechart.o: builtin-timechart.c $(OUTPUT)common-cmds.h $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_HTML_PATH="$(htmldir_SQ)"' \ - '-DPERF_MAN_PATH="$(mandir_SQ)"' \ - '-DPERF_INFO_PATH="$(infodir_SQ)"' $< +$(OUTPUT)libperf-gtk.so: $(GTK_IN) $(PERFLIBS) + $(QUIET_LINK)$(CC) -o $@ -shared $(LDFLAGS) $(filter %.o,$^) $(GTK_LIBS) $(OUTPUT)common-cmds.h: util/generate-cmdlist.sh command-list.txt @@ -659,8 +314,7 @@ $(SCRIPTS) : % : %.sh $(QUIET_GEN)$(INSTALL) '$@.sh' '$(OUTPUT)$@' # These can record PERF_VERSION -$(OUTPUT)perf.o perf.spec \ - $(SCRIPTS) \ +perf.spec $(SCRIPTS) \ : $(OUTPUT)PERF-VERSION-FILE .SUFFIXES: @@ -683,90 +337,33 @@ endif # These two need to be here so that when O= is not used they take precedence # over the general rule for .o -$(OUTPUT)util/%-flex.o: $(OUTPUT)util/%-flex.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -w $< - -$(OUTPUT)util/%-bison.o: $(OUTPUT)util/%-bison.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c -Iutil/ $(CFLAGS) -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w $< - -$(OUTPUT)%.o: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.i: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< -$(OUTPUT)%.s: %.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -S $(CFLAGS) $< -$(OUTPUT)%.o: %.S - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< -$(OUTPUT)%.s: %.S - $(QUIET_CC)$(CC) -o $@ -E $(CFLAGS) $< - -$(OUTPUT)util/exec_cmd.o: util/exec_cmd.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DPERF_EXEC_PATH="$(perfexecdir_SQ)"' \ - '-DPREFIX="$(prefix_SQ)"' \ - $< - -$(OUTPUT)tests/attr.o: tests/attr.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - '-DBINDIR="$(bindir_SQ)"' -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/python-use.o: tests/python-use.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) \ - -DPYTHONPATH='"$(OUTPUT)python"' \ - -DPYTHON='"$(PYTHON_WORD)"' \ - $< - -$(OUTPUT)tests/dwarf-unwind.o: tests/dwarf-unwind.c - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -fno-optimize-sibling-calls $< - -$(OUTPUT)util/config.o: util/config.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< +# get relative building directory (to $(OUTPUT)) +# and '.' if it's $(OUTPUT) itself +__build-dir = $(subst $(OUTPUT),,$(dir $@)) +build-dir = $(if $(__build-dir),$(__build-dir),.) -$(OUTPUT)ui/setup.o: ui/setup.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DLIBDIR='"$(libdir_SQ)"' $< +single_dep: $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -$(OUTPUT)ui/browser.o: ui/browser.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/annotate.o: ui/browsers/annotate.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.i: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/hists.o: ui/browsers/hists.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%.s: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/map.o: ui/browsers/map.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-bison.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)ui/browsers/scripts.o: ui/browsers/scripts.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -DENABLE_SLFUTURE_CONST $< +$(OUTPUT)%-flex.o: %.c single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $< +$(OUTPUT)%.o: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ -$(OUTPUT)util/rbtree.o: ../../lib/rbtree.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/hweight.o: ../../lib/hweight.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-unused-parameter -DETC_PERFCONFIG='"$(ETC_PERFCONFIG_SQ)"' $< - -$(OUTPUT)util/parse-events.o: util/parse-events.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) -Wno-redundant-decls $< - -$(OUTPUT)util/scripting-engines/trace-event-perl.o: util/scripting-engines/trace-event-perl.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default $< - -$(OUTPUT)scripts/perl/Perf-Trace-Util/Context.o: scripts/perl/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default $< - -$(OUTPUT)util/scripting-engines/trace-event-python.o: util/scripting-engines/trace-event-python.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow $< - -$(OUTPUT)scripts/python/Perf-Trace-Util/Context.o: scripts/python/Perf-Trace-Util/Context.c $(OUTPUT)PERF-CFLAGS - $(QUIET_CC)$(CC) -o $@ -c $(CFLAGS) $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs $< +$(OUTPUT)%.i: %.S single_dep FORCE + $(Q)$(MAKE) -f $(srctree)/tools/build/Makefile.build dir=$(build-dir) $@ $(OUTPUT)perf-%: %.o $(PERFLIBS) $(QUIET_LINK)$(CC) $(CFLAGS) -o $@ $(LDFLAGS) $(filter %.o,$^) $(LIBS) @@ -781,58 +378,40 @@ $(OUTPUT)perf-read-vdsox32: perf-read-vdso.c util/find-vdso-map.c $(QUIET_CC)$(CC) -mx32 $(filter -static,$(LDFLAGS)) -Wall -Werror -o $@ perf-read-vdso.c endif -$(LIB_OBJS) $(BUILTIN_OBJS): $(LIB_H) -$(patsubst perf-%,%.o,$(PROGRAMS)): $(LIB_H) $(wildcard */*.h) +$(patsubst perf-%,%.o,$(PROGRAMS)): $(wildcard */*.h) -# we compile into subdirectories. if the target directory is not the source directory, they might not exists. So -# we depend the various files onto their directories. -DIRECTORY_DEPS = $(LIB_OBJS) $(BUILTIN_OBJS) $(GTK_OBJS) -DIRECTORY_DEPS += $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)common-cmds.h -# no need to add flex objects, because they depend on bison ones -DIRECTORY_DEPS += $(OUTPUT)util/parse-events-bison.c -DIRECTORY_DEPS += $(OUTPUT)util/pmu-bison.c +LIBPERF_IN := $(OUTPUT)libperf-in.o -OUTPUT_DIRECTORIES := $(sort $(dir $(DIRECTORY_DEPS))) +$(LIBPERF_IN): FORCE + $(Q)$(MAKE) $(build)=libperf -$(DIRECTORY_DEPS): | $(OUTPUT_DIRECTORIES) -# In the second step, we make a rule to actually create these directories -$(OUTPUT_DIRECTORIES): - $(QUIET_MKDIR)$(MKDIR) -p $@ 2>/dev/null +$(LIB_FILE): $(LIBPERF_IN) + $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIBPERF_IN) $(LIB_OBJS) -$(LIB_FILE): $(LIB_OBJS) - $(QUIET_AR)$(RM) $@ && $(AR) rcs $@ $(LIB_OBJS) +LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) -# libtraceevent.a -TE_SOURCES = $(wildcard $(TRACE_EVENT_DIR)*.[ch]) +$(LIBTRACEEVENT): FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent.a -LIBTRACEEVENT_FLAGS = $(QUIET_SUBDIR1) O=$(OUTPUT) -LIBTRACEEVENT_FLAGS += CFLAGS="-g -Wall $(EXTRA_CFLAGS)" -LIBTRACEEVENT_FLAGS += plugin_dir=$(plugindir_SQ) +libtraceevent_plugins: FORCE + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) plugins -$(LIBTRACEEVENT): $(TE_SOURCES) $(OUTPUT)PERF-CFLAGS - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) libtraceevent.a plugins +$(LIBTRACEEVENT_DYNAMIC_LIST): libtraceevent_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) $(OUTPUT)libtraceevent-dynamic-list $(LIBTRACEEVENT)-clean: $(call QUIET_CLEAN, libtraceevent) - @$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) O=$(OUTPUT) clean >/dev/null install-traceevent-plugins: $(LIBTRACEEVENT) - $(QUIET_SUBDIR0)$(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) install_plugins + $(Q)$(MAKE) -C $(TRACE_EVENT_DIR) $(LIBTRACEEVENT_FLAGS) O=$(OUTPUT) install_plugins -LIBAPIKFS_SOURCES = $(wildcard $(LIB_PATH)fs/*.[ch] $(LIB_PATH)fd/*.[ch]) +$(LIBAPI): FORCE + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) $(OUTPUT)libapi.a -# if subdir is set, we've been called from above so target has been built -# already -$(LIBAPIKFS): $(LIBAPIKFS_SOURCES) -ifeq ($(subdir),) - $(QUIET_SUBDIR0)$(LIB_DIR) $(QUIET_SUBDIR1) O=$(OUTPUT) libapikfs.a -endif - -$(LIBAPIKFS)-clean: -ifeq ($(subdir),) - $(call QUIET_CLEAN, libapikfs) - @$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null -endif +$(LIBAPI)-clean: + $(call QUIET_CLEAN, libapi) + $(Q)$(MAKE) -C $(LIB_DIR) O=$(OUTPUT) clean >/dev/null help: @echo 'Perf make targets:' @@ -888,17 +467,6 @@ cscope: $(QUIET_GEN)$(RM) cscope*; \ $(FIND) $(TAG_FOLDERS) -name '*.[hcS]' -print | xargs cscope -b $(TAG_FILES) -### Detect prefix changes -TRACK_CFLAGS = $(subst ','\'',$(CFLAGS)):\ - $(bindir_SQ):$(perfexecdir_SQ):$(template_dir_SQ):$(prefix_SQ):$(plugindir_SQ) - -$(OUTPUT)PERF-CFLAGS: .FORCE-PERF-CFLAGS - @FLAGS='$(TRACK_CFLAGS)'; \ - if test x"$$FLAGS" != x"`cat $(OUTPUT)PERF-CFLAGS 2>/dev/null`" ; then \ - echo 1>&2 " FLAGS: * new build flags or prefix"; \ - echo "$$FLAGS" >$(OUTPUT)PERF-CFLAGS; \ - fi - ### Testing rules # GNU make supports exporting all variables by "export" without parameters. @@ -920,7 +488,7 @@ check: $(OUTPUT)common-cmds.h install-gtk: -install-bin: all install-gtk +install-tools: all install-gtk $(call QUIET_INSTALL, binaries) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(bindir_SQ)'; \ $(INSTALL) $(OUTPUT)perf '$(DESTDIR_SQ)$(bindir_SQ)'; \ @@ -958,12 +526,16 @@ endif $(call QUIET_INSTALL, perf_completion-script) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d'; \ $(INSTALL) perf-completion.sh '$(DESTDIR_SQ)$(sysconfdir_SQ)/bash_completion.d/perf' + +install-tests: all install-gtk $(call QUIET_INSTALL, tests) \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) tests/attr.py '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests'; \ $(INSTALL) -d -m 755 '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr'; \ $(INSTALL) tests/attr/* '$(DESTDIR_SQ)$(perfexec_instdir_SQ)/tests/attr' +install-bin: install-tools install-tests + install: install-bin try-install-man install-traceevent-plugins install-python_ext: @@ -981,12 +553,14 @@ $(INSTALL_DOC_TARGETS): # config-clean: $(call QUIET_CLEAN, config) - @$(MAKE) -C config/feature-checks clean >/dev/null + $(Q)$(MAKE) -C $(srctree)/tools/build/feature/ clean >/dev/null -clean: $(LIBTRACEEVENT)-clean $(LIBAPIKFS)-clean config-clean - $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_OBJS) $(BUILTIN_OBJS) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(OUTPUT)perf.o $(LANG_BINDINGS) $(GTK_OBJS) +clean: $(LIBTRACEEVENT)-clean $(LIBAPI)-clean config-clean + $(call QUIET_CLEAN, core-objs) $(RM) $(LIB_FILE) $(OUTPUT)perf-archive $(OUTPUT)perf-with-kcore $(LANG_BINDINGS) + $(Q)find . -name '*.o' -delete -o -name '\.*.cmd' -delete -o -name '\.*.d' -delete + $(Q)$(RM) $(OUTPUT).config-detected $(call QUIET_CLEAN, core-progs) $(RM) $(ALL_PROGRAMS) perf perf-read-vdso32 perf-read-vdsox32 - $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)PERF-CFLAGS $(OUTPUT)PERF-FEATURES $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* + $(call QUIET_CLEAN, core-gen) $(RM) *.spec *.pyc *.pyo */*.pyc */*.pyo $(OUTPUT)common-cmds.h TAGS tags cscope* $(OUTPUT)PERF-VERSION-FILE $(OUTPUT)FEATURE-DUMP $(OUTPUT)util/*-bison* $(OUTPUT)util/*-flex* $(QUIET_SUBDIR0)Documentation $(QUIET_SUBDIR1) clean $(python-clean) @@ -1000,7 +574,10 @@ else GIT-HEAD-PHONY = endif +FORCE: + .PHONY: all install clean config-clean strip install-gtk .PHONY: shell_compatibility_test please_set_SHELL_PATH_to_a_more_modern_shell -.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope .FORCE-PERF-CFLAGS +.PHONY: $(GIT-HEAD-PHONY) TAGS tags cscope FORCE single_dep +.PHONY: libtraceevent_plugins diff --git a/tools/perf/arch/Build b/tools/perf/arch/Build new file mode 100644 index 000000000000..109eb75cf7de --- /dev/null +++ b/tools/perf/arch/Build @@ -0,0 +1,2 @@ +libperf-y += common.o +libperf-y += $(ARCH)/ diff --git a/tools/perf/arch/arm/Build b/tools/perf/arch/arm/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/arm/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm/Makefile b/tools/perf/arch/arm/Makefile index 09d62153d384..7fbca175099e 100644 --- a/tools/perf/arch/arm/Makefile +++ b/tools/perf/arch/arm/Makefile @@ -1,14 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o endif diff --git a/tools/perf/arch/arm/tests/Build b/tools/perf/arch/arm/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/arm/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm/util/Build b/tools/perf/arch/arm/util/Build new file mode 100644 index 000000000000..d22e3d07de3d --- /dev/null +++ b/tools/perf/arch/arm/util/Build @@ -0,0 +1,4 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/arch/arm64/Build b/tools/perf/arch/arm64/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/arm64/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/arm64/Makefile b/tools/perf/arch/arm64/Makefile index 67e9b3d38e89..7fbca175099e 100644 --- a/tools/perf/arch/arm64/Makefile +++ b/tools/perf/arch/arm64/Makefile @@ -1,7 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o endif diff --git a/tools/perf/arch/arm64/include/perf_regs.h b/tools/perf/arch/arm64/include/perf_regs.h index 1d3f39c3aa56..4e5af27e3fbf 100644 --- a/tools/perf/arch/arm64/include/perf_regs.h +++ b/tools/perf/arch/arm64/include/perf_regs.h @@ -5,8 +5,11 @@ #include <linux/types.h> #include <asm/perf_regs.h> +void perf_regs_load(u64 *regs); + #define PERF_REGS_MASK ((1ULL << PERF_REG_ARM64_MAX) - 1) #define PERF_REGS_MAX PERF_REG_ARM64_MAX +#define PERF_SAMPLE_REGS_ABI PERF_SAMPLE_REGS_ABI_64 #define PERF_REG_IP PERF_REG_ARM64_PC #define PERF_REG_SP PERF_REG_ARM64_SP diff --git a/tools/perf/arch/arm64/tests/Build b/tools/perf/arch/arm64/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/arm64/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/arm64/tests/dwarf-unwind.c b/tools/perf/arch/arm64/tests/dwarf-unwind.c new file mode 100644 index 000000000000..cf04a4c91c59 --- /dev/null +++ b/tools/perf/arch/arm64/tests/dwarf-unwind.c @@ -0,0 +1,61 @@ +#include <string.h> +#include "perf_regs.h" +#include "thread.h" +#include "map.h" +#include "event.h" +#include "debug.h" +#include "tests/tests.h" + +#define STACK_SIZE 8192 + +static int sample_ustack(struct perf_sample *sample, + struct thread *thread, u64 *regs) +{ + struct stack_dump *stack = &sample->user_stack; + struct map *map; + unsigned long sp; + u64 stack_size, *buf; + + buf = malloc(STACK_SIZE); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + sp = (unsigned long) regs[PERF_REG_ARM64_SP]; + + map = map_groups__find(thread->mg, MAP__VARIABLE, (u64) sp); + if (!map) { + pr_debug("failed to get stack map\n"); + free(buf); + return -1; + } + + stack_size = map->end - sp; + stack_size = stack_size > STACK_SIZE ? STACK_SIZE : stack_size; + + memcpy(buf, (void *) sp, stack_size); + stack->data = (char *) buf; + stack->size = stack_size; + return 0; +} + +int test__arch_unwind_sample(struct perf_sample *sample, + struct thread *thread) +{ + struct regs_dump *regs = &sample->user_regs; + u64 *buf; + + buf = calloc(1, sizeof(u64) * PERF_REGS_MAX); + if (!buf) { + pr_debug("failed to allocate sample uregs data\n"); + return -1; + } + + perf_regs_load(buf); + regs->abi = PERF_SAMPLE_REGS_ABI; + regs->regs = buf; + regs->mask = PERF_REGS_MASK; + + return sample_ustack(sample, thread, buf); +} diff --git a/tools/perf/arch/arm64/tests/regs_load.S b/tools/perf/arch/arm64/tests/regs_load.S new file mode 100644 index 000000000000..025b46e579a6 --- /dev/null +++ b/tools/perf/arch/arm64/tests/regs_load.S @@ -0,0 +1,46 @@ +#include <linux/linkage.h> + +.text +.type perf_regs_load,%function +#define STR_REG(r) str x##r, [x0, 8 * r] +#define LDR_REG(r) ldr x##r, [x0, 8 * r] +#define SP (8 * 31) +#define PC (8 * 32) +ENTRY(perf_regs_load) + STR_REG(0) + STR_REG(1) + STR_REG(2) + STR_REG(3) + STR_REG(4) + STR_REG(5) + STR_REG(6) + STR_REG(7) + STR_REG(8) + STR_REG(9) + STR_REG(10) + STR_REG(11) + STR_REG(12) + STR_REG(13) + STR_REG(14) + STR_REG(15) + STR_REG(16) + STR_REG(17) + STR_REG(18) + STR_REG(19) + STR_REG(20) + STR_REG(21) + STR_REG(22) + STR_REG(23) + STR_REG(24) + STR_REG(25) + STR_REG(26) + STR_REG(27) + STR_REG(28) + STR_REG(29) + STR_REG(30) + mov x1, sp + str x1, [x0, #SP] + str x30, [x0, #PC] + LDR_REG(1) + ret +ENDPROC(perf_regs_load) diff --git a/tools/perf/arch/arm64/util/Build b/tools/perf/arch/arm64/util/Build new file mode 100644 index 000000000000..e58123a8912b --- /dev/null +++ b/tools/perf/arch/arm64/util/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o diff --git a/tools/perf/arch/common.c b/tools/perf/arch/common.c index 49776f190abf..b7bb42c44694 100644 --- a/tools/perf/arch/common.c +++ b/tools/perf/arch/common.c @@ -61,7 +61,7 @@ const char *const mips_triplets[] = { static bool lookup_path(char *name) { bool found = false; - char *path, *tmp; + char *path, *tmp = NULL; char buf[PATH_MAX]; char *env = getenv("PATH"); diff --git a/tools/perf/arch/powerpc/Build b/tools/perf/arch/powerpc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/powerpc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/powerpc/Makefile b/tools/perf/arch/powerpc/Makefile index 6f7782bea5dd..7fbca175099e 100644 --- a/tools/perf/arch/powerpc/Makefile +++ b/tools/perf/arch/powerpc/Makefile @@ -1,6 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/skip-callchain-idx.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o diff --git a/tools/perf/arch/powerpc/util/Build b/tools/perf/arch/powerpc/util/Build new file mode 100644 index 000000000000..7b8b0d1a1b62 --- /dev/null +++ b/tools/perf/arch/powerpc/util/Build @@ -0,0 +1,5 @@ +libperf-y += header.o +libperf-y += sym-handling.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o +libperf-$(CONFIG_DWARF) += skip-callchain-idx.o diff --git a/tools/perf/arch/powerpc/util/sym-handling.c b/tools/perf/arch/powerpc/util/sym-handling.c new file mode 100644 index 000000000000..bbc1a50768dd --- /dev/null +++ b/tools/perf/arch/powerpc/util/sym-handling.c @@ -0,0 +1,82 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License, version 2, as + * published by the Free Software Foundation. + * + * Copyright (C) 2015 Naveen N. Rao, IBM Corporation + */ + +#include "debug.h" +#include "symbol.h" +#include "map.h" +#include "probe-event.h" + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || + ehdr.e_type == ET_REL || + ehdr.e_type == ET_DYN; +} + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +void arch__elf_sym_adjust(GElf_Sym *sym) +{ + sym->st_value += PPC64_LOCAL_ENTRY_OFFSET(sym->st_other); +} +#endif +#endif + +#if !defined(_CALL_ELF) || _CALL_ELF != 2 +int arch__choose_best_symbol(struct symbol *syma, + struct symbol *symb __maybe_unused) +{ + char *sym = syma->name; + + /* Skip over any initial dot */ + if (*sym == '.') + sym++; + + /* Avoid "SyS" kernel syscall aliases */ + if (strlen(sym) >= 3 && !strncmp(sym, "SyS", 3)) + return SYMBOL_B; + if (strlen(sym) >= 10 && !strncmp(sym, "compat_SyS", 10)) + return SYMBOL_B; + + return SYMBOL_A; +} + +/* Allow matching against dot variants */ +int arch__compare_symbol_names(const char *namea, const char *nameb) +{ + /* Skip over initial dot */ + if (*namea == '.') + namea++; + if (*nameb == '.') + nameb++; + + return strcmp(namea, nameb); +} +#endif + +#if defined(_CALL_ELF) && _CALL_ELF == 2 +bool arch__prefers_symtab(void) +{ + return true; +} + +#define PPC64LE_LEP_OFFSET 8 + +void arch__fix_tev_from_maps(struct perf_probe_event *pev, + struct probe_trace_event *tev, struct map *map) +{ + /* + * ppc64 ABIv2 local entry point is currently always 2 instructions + * (8 bytes) after the global entry point. + */ + if (!pev->uprobes && map->dso->symtab_type == DSO_BINARY_TYPE__KALLSYMS) { + tev->point.address += PPC64LE_LEP_OFFSET; + tev->point.offset += PPC64LE_LEP_OFFSET; + } +} +#endif diff --git a/tools/perf/arch/s390/Build b/tools/perf/arch/s390/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/s390/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/s390/Makefile b/tools/perf/arch/s390/Makefile index 798ac7379c5f..21322e0385b8 100644 --- a/tools/perf/arch/s390/Makefile +++ b/tools/perf/arch/s390/Makefile @@ -1,7 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/s390/util/Build b/tools/perf/arch/s390/util/Build new file mode 100644 index 000000000000..8a61372bb47a --- /dev/null +++ b/tools/perf/arch/s390/util/Build @@ -0,0 +1,4 @@ +libperf-y += header.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sh/Build b/tools/perf/arch/sh/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sh/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sh/Makefile b/tools/perf/arch/sh/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sh/Makefile +++ b/tools/perf/arch/sh/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sh/util/Build b/tools/perf/arch/sh/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sh/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/sparc/Build b/tools/perf/arch/sparc/Build new file mode 100644 index 000000000000..54afe4a467e7 --- /dev/null +++ b/tools/perf/arch/sparc/Build @@ -0,0 +1 @@ +libperf-y += util/ diff --git a/tools/perf/arch/sparc/Makefile b/tools/perf/arch/sparc/Makefile index 15130b50dfe3..7fbca175099e 100644 --- a/tools/perf/arch/sparc/Makefile +++ b/tools/perf/arch/sparc/Makefile @@ -1,4 +1,3 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif diff --git a/tools/perf/arch/sparc/util/Build b/tools/perf/arch/sparc/util/Build new file mode 100644 index 000000000000..954e287bbb89 --- /dev/null +++ b/tools/perf/arch/sparc/util/Build @@ -0,0 +1 @@ +libperf-$(CONFIG_DWARF) += dwarf-regs.o diff --git a/tools/perf/arch/x86/Build b/tools/perf/arch/x86/Build new file mode 100644 index 000000000000..41bf61da476a --- /dev/null +++ b/tools/perf/arch/x86/Build @@ -0,0 +1,2 @@ +libperf-y += util/ +libperf-$(CONFIG_DWARF_UNWIND) += tests/ diff --git a/tools/perf/arch/x86/Makefile b/tools/perf/arch/x86/Makefile index 9b21881db52f..21322e0385b8 100644 --- a/tools/perf/arch/x86/Makefile +++ b/tools/perf/arch/x86/Makefile @@ -1,19 +1,4 @@ ifndef NO_DWARF PERF_HAVE_DWARF_REGS := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/dwarf-regs.o endif -ifndef NO_LIBUNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libunwind.o -endif -ifndef NO_LIBDW_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/unwind-libdw.o -endif -ifndef NO_DWARF_UNWIND -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/regs_load.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/tests/dwarf-unwind.o -endif -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/header.o -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/tsc.o -LIB_H += arch/$(ARCH)/util/tsc.h HAVE_KVM_STAT_SUPPORT := 1 -LIB_OBJS += $(OUTPUT)arch/$(ARCH)/util/kvm-stat.o diff --git a/tools/perf/arch/x86/tests/Build b/tools/perf/arch/x86/tests/Build new file mode 100644 index 000000000000..b30eff9bcc83 --- /dev/null +++ b/tools/perf/arch/x86/tests/Build @@ -0,0 +1,2 @@ +libperf-y += regs_load.o +libperf-y += dwarf-unwind.o diff --git a/tools/perf/arch/x86/util/Build b/tools/perf/arch/x86/util/Build new file mode 100644 index 000000000000..cfbccc4e3187 --- /dev/null +++ b/tools/perf/arch/x86/util/Build @@ -0,0 +1,8 @@ +libperf-y += header.o +libperf-y += tsc.o +libperf-y += kvm-stat.o + +libperf-$(CONFIG_DWARF) += dwarf-regs.o + +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o diff --git a/tools/perf/bench/Build b/tools/perf/bench/Build new file mode 100644 index 000000000000..c3ab760e06b4 --- /dev/null +++ b/tools/perf/bench/Build @@ -0,0 +1,12 @@ +perf-y += sched-messaging.o +perf-y += sched-pipe.o +perf-y += mem-memcpy.o +perf-y += futex-hash.o +perf-y += futex-wake.o +perf-y += futex-wake-parallel.o +perf-y += futex-requeue.o + +perf-$(CONFIG_X86_64) += mem-memcpy-x86-64-asm.o +perf-$(CONFIG_X86_64) += mem-memset-x86-64-asm.o + +perf-$(CONFIG_NUMA) += numa.o diff --git a/tools/perf/bench/bench.h b/tools/perf/bench/bench.h index 3c4dd44d45cb..70b2f718cc21 100644 --- a/tools/perf/bench/bench.h +++ b/tools/perf/bench/bench.h @@ -33,6 +33,8 @@ extern int bench_mem_memcpy(int argc, const char **argv, extern int bench_mem_memset(int argc, const char **argv, const char *prefix); extern int bench_futex_hash(int argc, const char **argv, const char *prefix); extern int bench_futex_wake(int argc, const char **argv, const char *prefix); +extern int bench_futex_wake_parallel(int argc, const char **argv, + const char *prefix); extern int bench_futex_requeue(int argc, const char **argv, const char *prefix); #define BENCH_FORMAT_DEFAULT_STR "default" diff --git a/tools/perf/bench/futex-requeue.c b/tools/perf/bench/futex-requeue.c index bedff6b5b3cf..ad0d9b5342fb 100644 --- a/tools/perf/bench/futex-requeue.c +++ b/tools/perf/bench/futex-requeue.c @@ -132,6 +132,9 @@ int bench_futex_requeue(int argc, const char **argv, if (!fshared) futex_flag = FUTEX_PRIVATE_FLAG; + if (nrequeue > nthreads) + nrequeue = nthreads; + printf("Run summary [PID %d]: Requeuing %d threads (from [%s] %p to %p), " "%d at a time.\n\n", getpid(), nthreads, fshared ? "shared":"private", &futex1, &futex2, nrequeue); @@ -161,20 +164,18 @@ int bench_futex_requeue(int argc, const char **argv, /* Ok, all threads are patiently blocked, start requeueing */ gettimeofday(&start, NULL); - for (nrequeued = 0; nrequeued < nthreads; nrequeued += nrequeue) { + while (nrequeued < nthreads) { /* * Do not wakeup any tasks blocked on futex1, allowing * us to really measure futex_wait functionality. */ - futex_cmp_requeue(&futex1, 0, &futex2, 0, - nrequeue, futex_flag); + nrequeued += futex_cmp_requeue(&futex1, 0, &futex2, 0, + nrequeue, futex_flag); } + gettimeofday(&end, NULL); timersub(&end, &start, &runtime); - if (nrequeued > nthreads) - nrequeued = nthreads; - update_stats(&requeued_stats, nrequeued); update_stats(&requeuetime_stats, runtime.tv_usec); @@ -184,7 +185,7 @@ int bench_futex_requeue(int argc, const char **argv, } /* everybody should be blocked on futex2, wake'em up */ - nrequeued = futex_wake(&futex2, nthreads, futex_flag); + nrequeued = futex_wake(&futex2, nrequeued, futex_flag); if (nthreads != nrequeued) warnx("couldn't wakeup all tasks (%d/%d)", nrequeued, nthreads); diff --git a/tools/perf/bench/futex-wake-parallel.c b/tools/perf/bench/futex-wake-parallel.c new file mode 100644 index 000000000000..6d8c9fa2a16c --- /dev/null +++ b/tools/perf/bench/futex-wake-parallel.c @@ -0,0 +1,294 @@ +/* + * Copyright (C) 2015 Davidlohr Bueso. + * + * Block a bunch of threads and let parallel waker threads wakeup an + * equal amount of them. The program output reflects the avg latency + * for each individual thread to service its share of work. Ultimately + * it can be used to measure futex_wake() changes. + */ + +#include "../perf.h" +#include "../util/util.h" +#include "../util/stat.h" +#include "../util/parse-options.h" +#include "../util/header.h" +#include "bench.h" +#include "futex.h" + +#include <err.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> + +struct thread_data { + pthread_t worker; + unsigned int nwoken; + struct timeval runtime; +}; + +static unsigned int nwakes = 1; + +/* all threads will block on the same futex -- hash bucket chaos ;) */ +static u_int32_t futex = 0; + +static pthread_t *blocked_worker; +static bool done = false, silent = false, fshared = false; +static unsigned int nblocked_threads = 0, nwaking_threads = 0; +static pthread_mutex_t thread_lock; +static pthread_cond_t thread_parent, thread_worker; +static struct stats waketime_stats, wakeup_stats; +static unsigned int ncpus, threads_starting; +static int futex_flag = 0; + +static const struct option options[] = { + OPT_UINTEGER('t', "threads", &nblocked_threads, "Specify amount of threads"), + OPT_UINTEGER('w', "nwakers", &nwaking_threads, "Specify amount of waking threads"), + OPT_BOOLEAN( 's', "silent", &silent, "Silent mode: do not display data/details"), + OPT_BOOLEAN( 'S', "shared", &fshared, "Use shared futexes instead of private ones"), + OPT_END() +}; + +static const char * const bench_futex_wake_parallel_usage[] = { + "perf bench futex wake-parallel <options>", + NULL +}; + +static void *waking_workerfn(void *arg) +{ + struct thread_data *waker = (struct thread_data *) arg; + struct timeval start, end; + + gettimeofday(&start, NULL); + + waker->nwoken = futex_wake(&futex, nwakes, futex_flag); + if (waker->nwoken != nwakes) + warnx("couldn't wakeup all tasks (%d/%d)", + waker->nwoken, nwakes); + + gettimeofday(&end, NULL); + timersub(&end, &start, &waker->runtime); + + pthread_exit(NULL); + return NULL; +} + +static void wakeup_threads(struct thread_data *td, pthread_attr_t thread_attr) +{ + unsigned int i; + + pthread_attr_setdetachstate(&thread_attr, PTHREAD_CREATE_JOINABLE); + + /* create and block all threads */ + for (i = 0; i < nwaking_threads; i++) { + /* + * Thread creation order will impact per-thread latency + * as it will affect the order to acquire the hb spinlock. + * For now let the scheduler decide. + */ + if (pthread_create(&td[i].worker, &thread_attr, + waking_workerfn, (void *)&td[i])) + err(EXIT_FAILURE, "pthread_create"); + } + + for (i = 0; i < nwaking_threads; i++) + if (pthread_join(td[i].worker, NULL)) + err(EXIT_FAILURE, "pthread_join"); +} + +static void *blocked_workerfn(void *arg __maybe_unused) +{ + pthread_mutex_lock(&thread_lock); + threads_starting--; + if (!threads_starting) + pthread_cond_signal(&thread_parent); + pthread_cond_wait(&thread_worker, &thread_lock); + pthread_mutex_unlock(&thread_lock); + + while (1) { /* handle spurious wakeups */ + if (futex_wait(&futex, 0, NULL, futex_flag) != EINTR) + break; + } + + pthread_exit(NULL); + return NULL; +} + +static void block_threads(pthread_t *w, pthread_attr_t thread_attr) +{ + cpu_set_t cpu; + unsigned int i; + + threads_starting = nblocked_threads; + + /* create and block all threads */ + for (i = 0; i < nblocked_threads; i++) { + CPU_ZERO(&cpu); + CPU_SET(i % ncpus, &cpu); + + if (pthread_attr_setaffinity_np(&thread_attr, sizeof(cpu_set_t), &cpu)) + err(EXIT_FAILURE, "pthread_attr_setaffinity_np"); + + if (pthread_create(&w[i], &thread_attr, blocked_workerfn, NULL)) + err(EXIT_FAILURE, "pthread_create"); + } +} + +static void print_run(struct thread_data *waking_worker, unsigned int run_num) +{ + unsigned int i, wakeup_avg; + double waketime_avg, waketime_stddev; + struct stats __waketime_stats, __wakeup_stats; + + init_stats(&__wakeup_stats); + init_stats(&__waketime_stats); + + for (i = 0; i < nwaking_threads; i++) { + update_stats(&__waketime_stats, waking_worker[i].runtime.tv_usec); + update_stats(&__wakeup_stats, waking_worker[i].nwoken); + } + + waketime_avg = avg_stats(&__waketime_stats); + waketime_stddev = stddev_stats(&__waketime_stats); + wakeup_avg = avg_stats(&__wakeup_stats); + + printf("[Run %d]: Avg per-thread latency (waking %d/%d threads) " + "in %.4f ms (+-%.2f%%)\n", run_num + 1, wakeup_avg, + nblocked_threads, waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + +static void print_summary(void) +{ + unsigned int wakeup_avg; + double waketime_avg, waketime_stddev; + + waketime_avg = avg_stats(&waketime_stats); + waketime_stddev = stddev_stats(&waketime_stats); + wakeup_avg = avg_stats(&wakeup_stats); + + printf("Avg per-thread latency (waking %d/%d threads) in %.4f ms (+-%.2f%%)\n", + wakeup_avg, + nblocked_threads, + waketime_avg/1e3, + rel_stddev_stats(waketime_stddev, waketime_avg)); +} + + +static void do_run_stats(struct thread_data *waking_worker) +{ + unsigned int i; + + for (i = 0; i < nwaking_threads; i++) { + update_stats(&waketime_stats, waking_worker[i].runtime.tv_usec); + update_stats(&wakeup_stats, waking_worker[i].nwoken); + } + +} + +static void toggle_done(int sig __maybe_unused, + siginfo_t *info __maybe_unused, + void *uc __maybe_unused) +{ + done = true; +} + +int bench_futex_wake_parallel(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + int ret = 0; + unsigned int i, j; + struct sigaction act; + pthread_attr_t thread_attr; + struct thread_data *waking_worker; + + argc = parse_options(argc, argv, options, + bench_futex_wake_parallel_usage, 0); + if (argc) { + usage_with_options(bench_futex_wake_parallel_usage, options); + exit(EXIT_FAILURE); + } + + sigfillset(&act.sa_mask); + act.sa_sigaction = toggle_done; + sigaction(SIGINT, &act, NULL); + + ncpus = sysconf(_SC_NPROCESSORS_ONLN); + if (!nblocked_threads) + nblocked_threads = ncpus; + + /* some sanity checks */ + if (nwaking_threads > nblocked_threads || !nwaking_threads) + nwaking_threads = nblocked_threads; + + if (nblocked_threads % nwaking_threads) + errx(EXIT_FAILURE, "Must be perfectly divisible"); + /* + * Each thread will wakeup nwakes tasks in + * a single futex_wait call. + */ + nwakes = nblocked_threads/nwaking_threads; + + blocked_worker = calloc(nblocked_threads, sizeof(*blocked_worker)); + if (!blocked_worker) + err(EXIT_FAILURE, "calloc"); + + if (!fshared) + futex_flag = FUTEX_PRIVATE_FLAG; + + printf("Run summary [PID %d]: blocking on %d threads (at [%s] " + "futex %p), %d threads waking up %d at a time.\n\n", + getpid(), nblocked_threads, fshared ? "shared":"private", + &futex, nwaking_threads, nwakes); + + init_stats(&wakeup_stats); + init_stats(&waketime_stats); + + pthread_attr_init(&thread_attr); + pthread_mutex_init(&thread_lock, NULL); + pthread_cond_init(&thread_parent, NULL); + pthread_cond_init(&thread_worker, NULL); + + for (j = 0; j < bench_repeat && !done; j++) { + waking_worker = calloc(nwaking_threads, sizeof(*waking_worker)); + if (!waking_worker) + err(EXIT_FAILURE, "calloc"); + + /* create, launch & block all threads */ + block_threads(blocked_worker, thread_attr); + + /* make sure all threads are already blocked */ + pthread_mutex_lock(&thread_lock); + while (threads_starting) + pthread_cond_wait(&thread_parent, &thread_lock); + pthread_cond_broadcast(&thread_worker); + pthread_mutex_unlock(&thread_lock); + + usleep(100000); + + /* Ok, all threads are patiently blocked, start waking folks up */ + wakeup_threads(waking_worker, thread_attr); + + for (i = 0; i < nblocked_threads; i++) { + ret = pthread_join(blocked_worker[i], NULL); + if (ret) + err(EXIT_FAILURE, "pthread_join"); + } + + do_run_stats(waking_worker); + if (!silent) + print_run(waking_worker, j); + + free(waking_worker); + } + + /* cleanup & report results */ + pthread_cond_destroy(&thread_parent); + pthread_cond_destroy(&thread_worker); + pthread_mutex_destroy(&thread_lock); + pthread_attr_destroy(&thread_attr); + + print_summary(); + + free(blocked_worker); + return ret; +} diff --git a/tools/perf/bench/futex-wake.c b/tools/perf/bench/futex-wake.c index 929f762be47e..e5e41d3bdce7 100644 --- a/tools/perf/bench/futex-wake.c +++ b/tools/perf/bench/futex-wake.c @@ -60,7 +60,12 @@ static void *workerfn(void *arg __maybe_unused) pthread_cond_wait(&thread_worker, &thread_lock); pthread_mutex_unlock(&thread_lock); - futex_wait(&futex1, 0, NULL, futex_flag); + while (1) { + if (futex_wait(&futex1, 0, NULL, futex_flag) != EINTR) + break; + } + + pthread_exit(NULL); return NULL; } diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h index d66ab799b35f..8c0c1a2770c8 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memcpy-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMCPY_FN(__memcpy, +MEMCPY_FN(memcpy_orig, "x86-64-unrolled", "unrolled memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c, +MEMCPY_FN(__memcpy, "x86-64-movsq", "movsq-based memcpy() in arch/x86/lib/memcpy_64.S") -MEMCPY_FN(memcpy_c_e, +MEMCPY_FN(memcpy_erms, "x86-64-movsb", "movsb-based memcpy() in arch/x86/lib/memcpy_64.S") diff --git a/tools/perf/bench/mem-memcpy-x86-64-asm.S b/tools/perf/bench/mem-memcpy-x86-64-asm.S index fcd9cf00600a..e4c2c30143b9 100644 --- a/tools/perf/bench/mem-memcpy-x86-64-asm.S +++ b/tools/perf/bench/mem-memcpy-x86-64-asm.S @@ -1,8 +1,6 @@ #define memcpy MEMCPY /* don't hide glibc's memcpy() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemcpy_c globl memcpy_c; memcpy_c -#define Lmemcpy_c_e globl memcpy_c_e; memcpy_c_e #include "../../../arch/x86/lib/memcpy_64.S" /* * We need to provide note.GNU-stack section, saying that we want diff --git a/tools/perf/bench/mem-memcpy.c b/tools/perf/bench/mem-memcpy.c index db1d3a29d97f..d3dfb7936dcd 100644 --- a/tools/perf/bench/mem-memcpy.c +++ b/tools/perf/bench/mem-memcpy.c @@ -36,7 +36,7 @@ static const struct option options[] = { "Specify length of memory to copy. " "Available units: B, KB, MB, GB and TB (upper and lower)"), OPT_STRING('r', "routine", &routine, "default", - "Specify routine to copy"), + "Specify routine to copy, \"all\" runs all available routines"), OPT_INTEGER('i', "iterations", &iterations, "repeat memcpy() invocation this number of times"), OPT_BOOLEAN('c', "cycle", &use_cycle, @@ -135,55 +135,16 @@ struct bench_mem_info { const char *const *usage; }; -static int bench_mem_common(int argc, const char **argv, - const char *prefix __maybe_unused, - struct bench_mem_info *info) +static void __bench_mem_routine(struct bench_mem_info *info, int r_idx, size_t len, double totallen) { - int i; - size_t len; - double totallen; + const struct routine *r = &info->routines[r_idx]; double result_bps[2]; u64 result_cycle[2]; - argc = parse_options(argc, argv, options, - info->usage, 0); - - if (no_prefault && only_prefault) { - fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); - return 1; - } - - if (use_cycle) - init_cycle(); - - len = (size_t)perf_atoll((char *)length_str); - totallen = (double)len * iterations; - result_cycle[0] = result_cycle[1] = 0ULL; result_bps[0] = result_bps[1] = 0.0; - if ((s64)len <= 0) { - fprintf(stderr, "Invalid length:%s\n", length_str); - return 1; - } - - /* same to without specifying either of prefault and no-prefault */ - if (only_prefault && no_prefault) - only_prefault = no_prefault = false; - - for (i = 0; info->routines[i].name; i++) { - if (!strcmp(info->routines[i].name, routine)) - break; - } - if (!info->routines[i].name) { - printf("Unknown routine:%s\n", routine); - printf("Available routines...\n"); - for (i = 0; info->routines[i].name; i++) { - printf("\t%s ... %s\n", - info->routines[i].name, info->routines[i].desc); - } - return 1; - } + printf("Routine %s (%s)\n", r->name, r->desc); if (bench_format == BENCH_FORMAT_DEFAULT) printf("# Copying %s Bytes ...\n\n", length_str); @@ -191,28 +152,17 @@ static int bench_mem_common(int argc, const char **argv, if (!only_prefault && !no_prefault) { /* show both of results */ if (use_cycle) { - result_cycle[0] = - info->do_cycle(&info->routines[i], len, false); - result_cycle[1] = - info->do_cycle(&info->routines[i], len, true); + result_cycle[0] = info->do_cycle(r, len, false); + result_cycle[1] = info->do_cycle(r, len, true); } else { - result_bps[0] = - info->do_gettimeofday(&info->routines[i], - len, false); - result_bps[1] = - info->do_gettimeofday(&info->routines[i], - len, true); + result_bps[0] = info->do_gettimeofday(r, len, false); + result_bps[1] = info->do_gettimeofday(r, len, true); } } else { - if (use_cycle) { - result_cycle[pf] = - info->do_cycle(&info->routines[i], - len, only_prefault); - } else { - result_bps[pf] = - info->do_gettimeofday(&info->routines[i], - len, only_prefault); - } + if (use_cycle) + result_cycle[pf] = info->do_cycle(r, len, only_prefault); + else + result_bps[pf] = info->do_gettimeofday(r, len, only_prefault); } switch (bench_format) { @@ -265,6 +215,60 @@ static int bench_mem_common(int argc, const char **argv, die("unknown format: %d\n", bench_format); break; } +} + +static int bench_mem_common(int argc, const char **argv, + const char *prefix __maybe_unused, + struct bench_mem_info *info) +{ + int i; + size_t len; + double totallen; + + argc = parse_options(argc, argv, options, + info->usage, 0); + + if (no_prefault && only_prefault) { + fprintf(stderr, "Invalid options: -o and -n are mutually exclusive\n"); + return 1; + } + + if (use_cycle) + init_cycle(); + + len = (size_t)perf_atoll((char *)length_str); + totallen = (double)len * iterations; + + if ((s64)len <= 0) { + fprintf(stderr, "Invalid length:%s\n", length_str); + return 1; + } + + /* same to without specifying either of prefault and no-prefault */ + if (only_prefault && no_prefault) + only_prefault = no_prefault = false; + + if (!strncmp(routine, "all", 3)) { + for (i = 0; info->routines[i].name; i++) + __bench_mem_routine(info, i, len, totallen); + return 0; + } + + for (i = 0; info->routines[i].name; i++) { + if (!strcmp(info->routines[i].name, routine)) + break; + } + if (!info->routines[i].name) { + printf("Unknown routine:%s\n", routine); + printf("Available routines...\n"); + for (i = 0; info->routines[i].name; i++) { + printf("\t%s ... %s\n", + info->routines[i].name, info->routines[i].desc); + } + return 1; + } + + __bench_mem_routine(info, i, len, totallen); return 0; } diff --git a/tools/perf/bench/mem-memset-x86-64-asm-def.h b/tools/perf/bench/mem-memset-x86-64-asm-def.h index a71dff97c1f5..f02d028771d9 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm-def.h +++ b/tools/perf/bench/mem-memset-x86-64-asm-def.h @@ -1,12 +1,12 @@ -MEMSET_FN(__memset, +MEMSET_FN(memset_orig, "x86-64-unrolled", "unrolled memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c, +MEMSET_FN(__memset, "x86-64-stosq", "movsq-based memset() in arch/x86/lib/memset_64.S") -MEMSET_FN(memset_c_e, +MEMSET_FN(memset_erms, "x86-64-stosb", "movsb-based memset() in arch/x86/lib/memset_64.S") diff --git a/tools/perf/bench/mem-memset-x86-64-asm.S b/tools/perf/bench/mem-memset-x86-64-asm.S index 9e5af89ed13a..de278784c866 100644 --- a/tools/perf/bench/mem-memset-x86-64-asm.S +++ b/tools/perf/bench/mem-memset-x86-64-asm.S @@ -1,8 +1,6 @@ #define memset MEMSET /* don't hide glibc's memset() */ #define altinstr_replacement text #define globl p2align 4; .globl -#define Lmemset_c globl memset_c; memset_c -#define Lmemset_c_e globl memset_c_e; memset_c_e #include "../../../arch/x86/lib/memset_64.S" /* diff --git a/tools/perf/bench/numa.c b/tools/perf/bench/numa.c index ebfa163b80b5..870b7e665a20 100644 --- a/tools/perf/bench/numa.c +++ b/tools/perf/bench/numa.c @@ -8,6 +8,7 @@ #include "../builtin.h" #include "../util/util.h" #include "../util/parse-options.h" +#include "../util/cloexec.h" #include "bench.h" @@ -23,6 +24,7 @@ #include <pthread.h> #include <sys/mman.h> #include <sys/time.h> +#include <sys/resource.h> #include <sys/wait.h> #include <sys/prctl.h> #include <sys/types.h> @@ -51,6 +53,9 @@ struct thread_data { unsigned int loops_done; u64 val; u64 runtime_ns; + u64 system_time_ns; + u64 user_time_ns; + double speed_gbs; pthread_mutex_t *process_lock; }; @@ -180,7 +185,7 @@ static const struct option options[] = { OPT_INTEGER('H', "thp" , &p0.thp, "MADV_NOHUGEPAGE < 0 < MADV_HUGEPAGE"), OPT_BOOLEAN('c', "show_convergence", &p0.show_convergence, "show convergence details"), OPT_BOOLEAN('m', "measure_convergence", &p0.measure_convergence, "measure convergence latency"), - OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "bzero the initial allocations"), + OPT_BOOLEAN('q', "quiet" , &p0.show_quiet, "quiet mode"), OPT_BOOLEAN('S', "serialize-startup", &p0.serialize_startup,"serialize thread startup"), /* Special option string parsing callbacks: */ @@ -828,6 +833,9 @@ static int count_process_nodes(int process_nr) td = g->threads + task_nr; node = numa_node_of_cpu(td->curr_cpu); + if (node < 0) /* curr_cpu was likely still -1 */ + return 0; + node_present[node] = 1; } @@ -882,6 +890,11 @@ static void calc_convergence_compression(int *strong) for (p = 0; p < g->p.nr_proc; p++) { unsigned int nodes = count_process_nodes(p); + if (!nodes) { + *strong = 0; + return; + } + nodes_min = min(nodes, nodes_min); nodes_max = max(nodes, nodes_max); } @@ -1034,6 +1047,7 @@ static void *worker_thread(void *__tdata) u64 bytes_done; long work_done; u32 l; + struct rusage rusage; bind_to_cpumask(td->bind_cpumask); bind_to_memnode(td->bind_node); @@ -1186,6 +1200,13 @@ static void *worker_thread(void *__tdata) timersub(&stop, &start0, &diff); td->runtime_ns = diff.tv_sec * 1000000000ULL; td->runtime_ns += diff.tv_usec * 1000ULL; + td->speed_gbs = bytes_done / (td->runtime_ns / 1e9) / 1e9; + + getrusage(RUSAGE_THREAD, &rusage); + td->system_time_ns = rusage.ru_stime.tv_sec * 1000000000ULL; + td->system_time_ns += rusage.ru_stime.tv_usec * 1000ULL; + td->user_time_ns = rusage.ru_utime.tv_sec * 1000000000ULL; + td->user_time_ns += rusage.ru_utime.tv_usec * 1000ULL; free_data(thread_data, g->p.bytes_thread); @@ -1395,7 +1416,7 @@ static void print_res(const char *name, double val, if (!name) name = "main,"; - if (g->p.show_quiet) + if (!g->p.show_quiet) printf(" %-30s %15.3f, %-15s %s\n", name, val, txt_unit, txt_short); else printf(" %14.3f %s\n", val, txt_long); @@ -1412,7 +1433,7 @@ static int __bench_numa(const char *name) double runtime_sec_min; int wait_stat; double bytes; - int i, t; + int i, t, p; if (init()) return -1; @@ -1548,6 +1569,24 @@ static int __bench_numa(const char *name) print_res(name, bytes / runtime_sec_max / 1e9, "GB/sec,", "total-speed", "GB/sec total speed"); + if (g->p.show_details >= 2) { + char tname[32]; + struct thread_data *td; + for (p = 0; p < g->p.nr_proc; p++) { + for (t = 0; t < g->p.nr_threads; t++) { + memset(tname, 0, 32); + td = g->threads + p*g->p.nr_threads + t; + snprintf(tname, 32, "process%d:thread%d", p, t); + print_res(tname, td->speed_gbs, + "GB/sec", "thread-speed", "GB/sec/thread speed"); + print_res(tname, td->system_time_ns / 1e9, + "secs", "thread-system-time", "system CPU time/thread"); + print_res(tname, td->user_time_ns / 1e9, + "secs", "thread-user-time", "user CPU time/thread"); + } + } + } + free(pids); deinit(); diff --git a/tools/perf/builtin-annotate.c b/tools/perf/builtin-annotate.c index 747f86103599..2c1bec39c30e 100644 --- a/tools/perf/builtin-annotate.c +++ b/tools/perf/builtin-annotate.c @@ -59,6 +59,10 @@ static int perf_evsel__add_sample(struct perf_evsel *evsel, (al->sym == NULL || strcmp(ann->sym_hist_filter, al->sym->name) != 0)) { /* We're only interested in a symbol named sym_hist_filter */ + /* + * FIXME: why isn't this done in the symbol_filter when loading + * the DSO? + */ if (al->sym != NULL) { rb_erase(&al->sym->rb_node, &al->map->dso->symbols[al->map->type]); @@ -84,6 +88,7 @@ static int process_sample_event(struct perf_tool *tool, { struct perf_annotate *ann = container_of(tool, struct perf_annotate, tool); struct addr_location al; + int ret = 0; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", @@ -92,15 +97,16 @@ static int process_sample_event(struct perf_tool *tool, } if (ann->cpu_list && !test_bit(sample->cpu, ann->cpu_bitmap)) - return 0; + goto out_put; if (!al.filtered && perf_evsel__add_sample(evsel, sample, &al, ann)) { pr_warning("problem incrementing symbol count, " "skipping event\n"); - return -1; + ret = -1; } - - return 0; +out_put: + addr_location__put(&al); + return ret; } static int hist_entry__tty_annotate(struct hist_entry *he, @@ -208,7 +214,7 @@ static int __cmd_annotate(struct perf_annotate *ann) goto out; } - ret = perf_session__process_events(session, &ann->tool); + ret = perf_session__process_events(session); if (ret) goto out; @@ -283,7 +289,6 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) }, }; struct perf_data_file file = { - .path = input_name, .mode = PERF_DATA_MODE_READ, }; const struct option options[] = { @@ -324,6 +329,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) "objdump binary to use for disassembly and annotations"), OPT_BOOLEAN(0, "group", &symbol_conf.event_group, "Show event group information together"), + OPT_BOOLEAN(0, "show-total-period", &symbol_conf.show_total_period, + "Show a column with the sum of periods"), OPT_END() }; int ret = hists__init(); @@ -340,6 +347,8 @@ int cmd_annotate(int argc, const char **argv, const char *prefix __maybe_unused) else if (annotate.use_gtk) use_browser = 2; + file.path = input_name; + setup_browser(true); annotate.session = perf_session__new(&file, false, &annotate.tool); diff --git a/tools/perf/builtin-bench.c b/tools/perf/builtin-bench.c index b9a56fa83330..b5314e452ec7 100644 --- a/tools/perf/builtin-bench.c +++ b/tools/perf/builtin-bench.c @@ -58,6 +58,7 @@ static struct bench mem_benchmarks[] = { static struct bench futex_benchmarks[] = { { "hash", "Benchmark for futex hash table", bench_futex_hash }, { "wake", "Benchmark for futex wake calls", bench_futex_wake }, + { "wake-parallel", "Benchmark for parallel futex wake calls", bench_futex_wake_parallel }, { "requeue", "Benchmark for futex requeue calls", bench_futex_requeue }, { "all", "Test all futex benchmarks", NULL }, { NULL, NULL, NULL } diff --git a/tools/perf/builtin-buildid-cache.c b/tools/perf/builtin-buildid-cache.c index 50e6b66aea1f..d47a0cdc71c9 100644 --- a/tools/perf/builtin-buildid-cache.c +++ b/tools/perf/builtin-buildid-cache.c @@ -125,8 +125,7 @@ static int build_id_cache__kcore_existing(const char *from_dir, char *to_dir, return ret; } -static int build_id_cache__add_kcore(const char *filename, const char *debugdir, - bool force) +static int build_id_cache__add_kcore(const char *filename, bool force) { char dir[32], sbuildid[BUILD_ID_SIZE * 2 + 1]; char from_dir[PATH_MAX], to_dir[PATH_MAX]; @@ -143,7 +142,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s", - debugdir, sbuildid); + buildid_dir, sbuildid); if (!force && !build_id_cache__kcore_existing(from_dir, to_dir, sizeof(to_dir))) { @@ -155,7 +154,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return -1; scnprintf(to_dir, sizeof(to_dir), "%s/[kernel.kcore]/%s/%s", - debugdir, sbuildid, dir); + buildid_dir, sbuildid, dir); if (mkdir_p(to_dir, 0755)) return -1; @@ -183,7 +182,7 @@ static int build_id_cache__add_kcore(const char *filename, const char *debugdir, return 0; } -static int build_id_cache__add_file(const char *filename, const char *debugdir) +static int build_id_cache__add_file(const char *filename) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; u8 build_id[BUILD_ID_SIZE]; @@ -195,16 +194,14 @@ static int build_id_cache__add_file(const char *filename, const char *debugdir) } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__add_s(sbuild_id, debugdir, filename, + err = build_id_cache__add_s(sbuild_id, filename, false, false); - if (verbose) - pr_info("Adding %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + pr_debug("Adding %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } -static int build_id_cache__remove_file(const char *filename, - const char *debugdir) +static int build_id_cache__remove_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; @@ -217,10 +214,34 @@ static int build_id_cache__remove_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (verbose) - pr_info("Removing %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + err = build_id_cache__remove_s(sbuild_id); + pr_debug("Removing %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); + + return err; +} + +static int build_id_cache__purge_path(const char *pathname) +{ + struct strlist *list; + struct str_node *pos; + int err; + + err = build_id_cache__list_build_ids(pathname, &list); + if (err) + goto out; + + strlist__for_each(pos, list) { + err = build_id_cache__remove_s(pos->s); + pr_debug("Removing %s %s: %s\n", pos->s, pathname, + err ? "FAIL" : "Ok"); + if (err) + break; + } + strlist__delete(list); + +out: + pr_debug("Purging %s: %s\n", pathname, err ? "FAIL" : "Ok"); return err; } @@ -252,13 +273,12 @@ static int build_id_cache__fprintf_missing(struct perf_session *session, FILE *f return 0; } -static int build_id_cache__update_file(const char *filename, - const char *debugdir) +static int build_id_cache__update_file(const char *filename) { u8 build_id[BUILD_ID_SIZE]; char sbuild_id[BUILD_ID_SIZE * 2 + 1]; - int err; + int err = 0; if (filename__read_build_id(filename, &build_id, sizeof(build_id)) < 0) { pr_debug("Couldn't read a build-id in %s\n", filename); @@ -266,14 +286,14 @@ static int build_id_cache__update_file(const char *filename, } build_id__sprintf(build_id, sizeof(build_id), sbuild_id); - err = build_id_cache__remove_s(sbuild_id, debugdir); - if (!err) { - err = build_id_cache__add_s(sbuild_id, debugdir, filename, - false, false); - } - if (verbose) - pr_info("Updating %s %s: %s\n", sbuild_id, filename, - err ? "FAIL" : "Ok"); + if (build_id_cache__cached(sbuild_id)) + err = build_id_cache__remove_s(sbuild_id); + + if (!err) + err = build_id_cache__add_s(sbuild_id, filename, false, false); + + pr_debug("Updating %s %s: %s\n", sbuild_id, filename, + err ? "FAIL" : "Ok"); return err; } @@ -287,6 +307,7 @@ int cmd_buildid_cache(int argc, const char **argv, bool force = false; char const *add_name_list_str = NULL, *remove_name_list_str = NULL, + *purge_name_list_str = NULL, *missing_filename = NULL, *update_name_list_str = NULL, *kcore_filename = NULL; @@ -304,6 +325,8 @@ int cmd_buildid_cache(int argc, const char **argv, "file", "kcore file to add"), OPT_STRING('r', "remove", &remove_name_list_str, "file list", "file(s) to remove"), + OPT_STRING('p', "purge", &purge_name_list_str, "path list", + "path(s) to remove (remove old caches too)"), OPT_STRING('M', "missing", &missing_filename, "file", "to find missing build ids in the cache"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), @@ -320,6 +343,11 @@ int cmd_buildid_cache(int argc, const char **argv, argc = parse_options(argc, argv, buildid_cache_options, buildid_cache_usage, 0); + if (argc || (!add_name_list_str && !kcore_filename && + !remove_name_list_str && !purge_name_list_str && + !missing_filename && !update_name_list_str)) + usage_with_options(buildid_cache_usage, buildid_cache_options); + if (missing_filename) { file.path = missing_filename; file.force = force; @@ -338,7 +366,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, add_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__add_file(pos->s, buildid_dir)) { + if (build_id_cache__add_file(pos->s)) { if (errno == EEXIST) { pr_debug("%s already in the cache\n", pos->s); @@ -356,7 +384,25 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, remove_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__remove_file(pos->s, buildid_dir)) { + if (build_id_cache__remove_file(pos->s)) { + if (errno == ENOENT) { + pr_debug("%s wasn't in the cache\n", + pos->s); + continue; + } + pr_warning("Couldn't remove %s: %s\n", + pos->s, strerror_r(errno, sbuf, sizeof(sbuf))); + } + + strlist__delete(list); + } + } + + if (purge_name_list_str) { + list = strlist__new(true, purge_name_list_str); + if (list) { + strlist__for_each(pos, list) + if (build_id_cache__purge_path(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -377,7 +423,7 @@ int cmd_buildid_cache(int argc, const char **argv, list = strlist__new(true, update_name_list_str); if (list) { strlist__for_each(pos, list) - if (build_id_cache__update_file(pos->s, buildid_dir)) { + if (build_id_cache__update_file(pos->s)) { if (errno == ENOENT) { pr_debug("%s wasn't in the cache\n", pos->s); @@ -391,8 +437,7 @@ int cmd_buildid_cache(int argc, const char **argv, } } - if (kcore_filename && - build_id_cache__add_kcore(kcore_filename, buildid_dir, force)) + if (kcore_filename && build_id_cache__add_kcore(kcore_filename, force)) pr_warning("Couldn't add %s\n", kcore_filename); out: diff --git a/tools/perf/builtin-buildid-list.c b/tools/perf/builtin-buildid-list.c index ed3873b3e238..9fe93c8d4fcf 100644 --- a/tools/perf/builtin-buildid-list.c +++ b/tools/perf/builtin-buildid-list.c @@ -69,12 +69,21 @@ static int perf_session__list_build_ids(bool force, bool with_hits) session = perf_session__new(&file, false, &build_id__mark_dso_hit_ops); if (session == NULL) return -1; + + /* + * We take all buildids when the file contains AUX area tracing data + * because we do not decode the trace because it would take too long. + */ + if (!perf_data_file__is_pipe(&file) && + perf_header__has_feat(&session->header, HEADER_AUXTRACE)) + with_hits = false; + /* * in pipe-mode, the only way to get the buildids is to parse * the record stream. Buildids are stored as RECORD_HEADER_BUILD_ID */ if (with_hits || perf_data_file__is_pipe(&file)) - perf_session__process_events(session, &build_id__mark_dso_hit_ops); + perf_session__process_events(session); perf_session__fprintf_dsos_buildid(session, stdout, dso__skip_buildid, with_hits); perf_session__delete(session); diff --git a/tools/perf/builtin-data.c b/tools/perf/builtin-data.c new file mode 100644 index 000000000000..d6525bc54d13 --- /dev/null +++ b/tools/perf/builtin-data.c @@ -0,0 +1,123 @@ +#include <linux/compiler.h> +#include "builtin.h" +#include "perf.h" +#include "debug.h" +#include "parse-options.h" +#include "data-convert-bt.h" + +typedef int (*data_cmd_fn_t)(int argc, const char **argv, const char *prefix); + +struct data_cmd { + const char *name; + const char *summary; + data_cmd_fn_t fn; +}; + +static struct data_cmd data_cmds[]; + +#define for_each_cmd(cmd) \ + for (cmd = data_cmds; cmd && cmd->name; cmd++) + +static const struct option data_options[] = { + OPT_END() +}; + +static const char * const data_subcommands[] = { "convert", NULL }; + +static const char *data_usage[] = { + "perf data [<common options>] <command> [<options>]", + NULL +}; + +static void print_usage(void) +{ + struct data_cmd *cmd; + + printf("Usage:\n"); + printf("\t%s\n\n", data_usage[0]); + printf("\tAvailable commands:\n"); + + for_each_cmd(cmd) { + printf("\t %s\t- %s\n", cmd->name, cmd->summary); + } + + printf("\n"); +} + +static const char * const data_convert_usage[] = { + "perf data convert [<options>]", + NULL +}; + +static int cmd_data_convert(int argc, const char **argv, + const char *prefix __maybe_unused) +{ + const char *to_ctf = NULL; + bool force = false; + const struct option options[] = { + OPT_INCR('v', "verbose", &verbose, "be more verbose"), + OPT_STRING('i', "input", &input_name, "file", "input file name"), +#ifdef HAVE_LIBBABELTRACE_SUPPORT + OPT_STRING(0, "to-ctf", &to_ctf, NULL, "Convert to CTF format"), +#endif + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_END() + }; + +#ifndef HAVE_LIBBABELTRACE_SUPPORT + pr_err("No conversion support compiled in.\n"); + return -1; +#endif + + argc = parse_options(argc, argv, options, + data_convert_usage, 0); + if (argc) { + usage_with_options(data_convert_usage, options); + return -1; + } + + if (to_ctf) { +#ifdef HAVE_LIBBABELTRACE_SUPPORT + return bt_convert__perf2ctf(input_name, to_ctf, force); +#else + pr_err("The libbabeltrace support is not compiled in.\n"); + return -1; +#endif + } + + return 0; +} + +static struct data_cmd data_cmds[] = { + { "convert", "converts data file between formats", cmd_data_convert }, + { .name = NULL, }, +}; + +int cmd_data(int argc, const char **argv, const char *prefix) +{ + struct data_cmd *cmd; + const char *cmdstr; + + /* No command specified. */ + if (argc < 2) + goto usage; + + argc = parse_options_subcommand(argc, argv, data_options, data_subcommands, data_usage, + PARSE_OPT_STOP_AT_NON_OPTION); + if (argc < 1) + goto usage; + + cmdstr = argv[0]; + + for_each_cmd(cmd) { + if (strcmp(cmd->name, cmdstr)) + continue; + + return cmd->fn(argc, argv, prefix); + } + + pr_err("Unknown command: %s\n", cmdstr); +usage: + print_usage(); + return -1; +} diff --git a/tools/perf/builtin-diff.c b/tools/perf/builtin-diff.c index 74aada554b12..daaa7dca9c3b 100644 --- a/tools/perf/builtin-diff.c +++ b/tools/perf/builtin-diff.c @@ -328,6 +328,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, { struct addr_location al; struct hists *hists = evsel__hists(evsel); + int ret = -1; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_warning("problem processing %d event, skipping it.\n", @@ -338,7 +339,7 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, if (hists__add_entry(hists, &al, sample->period, sample->weight, sample->transaction)) { pr_warning("problem incrementing symbol period, skipping event\n"); - return -1; + goto out_put; } /* @@ -350,8 +351,10 @@ static int diff__process_sample_event(struct perf_tool *tool __maybe_unused, hists->stats.total_period += sample->period; if (!al.filtered) hists->stats.total_non_filtered_period += sample->period; - - return 0; + ret = 0; +out_put: + addr_location__put(&al); + return ret; } static struct perf_tool tool = { @@ -747,7 +750,7 @@ static int __cmd_diff(void) goto out_delete; } - ret = perf_session__process_events(d->session, &tool); + ret = perf_session__process_events(d->session); if (ret) { pr_err("Failed to process %s\n", d->file.path); goto out_delete; @@ -791,6 +794,8 @@ static const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), + OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, + "file", "kallsyms pathname"), OPT_BOOLEAN('m', "modules", &symbol_conf.use_modules, "load module symbols - WARNING: use only with -k and LIVE kernel"), OPT_STRING('d', "dsos", &symbol_conf.dso_list_str, "dso[,dso...]", @@ -802,7 +807,7 @@ static const struct option options[] = { OPT_STRING('s', "sort", &sort_order, "key[,key2...]", "sort by key(s): pid, comm, dso, symbol, parent, cpu, srcline, ..." " Please refer the man page for the complete list."), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", diff --git a/tools/perf/builtin-evlist.c b/tools/perf/builtin-evlist.c index 0f93f859b782..695ec5a50cf2 100644 --- a/tools/perf/builtin-evlist.c +++ b/tools/perf/builtin-evlist.c @@ -24,6 +24,7 @@ static int __cmd_evlist(const char *file_name, struct perf_attr_details *details struct perf_data_file file = { .path = file_name, .mode = PERF_DATA_MODE_READ, + .force = details->force, }; session = perf_session__new(&file, 0, NULL); @@ -47,6 +48,7 @@ int cmd_evlist(int argc, const char **argv, const char *prefix __maybe_unused) "Show all event attr details"), OPT_BOOLEAN('g', "group", &details.event_group, "Show event group information"), + OPT_BOOLEAN('f', "force", &details.force, "don't complain, do it"), OPT_END() }; const char * const evlist_usage[] = { diff --git a/tools/perf/builtin-help.c b/tools/perf/builtin-help.c index 25d20628212e..36486eade1ef 100644 --- a/tools/perf/builtin-help.c +++ b/tools/perf/builtin-help.c @@ -437,7 +437,18 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) HELP_FORMAT_INFO), OPT_END(), }; - const char * const builtin_help_usage[] = { + const char * const builtin_help_subcommands[] = { + "buildid-cache", "buildid-list", "diff", "evlist", "help", "list", + "record", "report", "bench", "stat", "timechart", "top", "annotate", + "script", "sched", "kmem", "lock", "kvm", "test", "inject", "mem", "data", +#ifdef HAVE_LIBELF_SUPPORT + "probe", +#endif +#ifdef HAVE_LIBAUDIT_SUPPORT + "trace", +#endif + NULL }; + const char *builtin_help_usage[] = { "perf help [--all] [--man|--web|--info] [command]", NULL }; @@ -448,8 +459,8 @@ int cmd_help(int argc, const char **argv, const char *prefix __maybe_unused) perf_config(perf_help_config, &help_format); - argc = parse_options(argc, argv, builtin_help_options, - builtin_help_usage, 0); + argc = parse_options_subcommand(argc, argv, builtin_help_options, + builtin_help_subcommands, builtin_help_usage, 0); if (show_all) { printf("\n usage: %s\n\n", perf_usage_string); diff --git a/tools/perf/builtin-inject.c b/tools/perf/builtin-inject.c index a13641e066f5..01b06492bd6a 100644 --- a/tools/perf/builtin-inject.c +++ b/tools/perf/builtin-inject.c @@ -16,6 +16,7 @@ #include "util/debug.h" #include "util/build-id.h" #include "util/data.h" +#include "util/auxtrace.h" #include "util/parse-options.h" @@ -26,10 +27,12 @@ struct perf_inject { struct perf_session *session; bool build_ids; bool sched_stat; + bool have_auxtrace; const char *input_name; struct perf_data_file output; u64 bytes_written; struct list_head samples; + struct itrace_synth_opts itrace_synth_opts; }; struct event_entry { @@ -38,14 +41,11 @@ struct event_entry { union perf_event event[0]; }; -static int perf_event__repipe_synth(struct perf_tool *tool, - union perf_event *event) +static int output_bytes(struct perf_inject *inject, void *buf, size_t sz) { - struct perf_inject *inject = container_of(tool, struct perf_inject, tool); ssize_t size; - size = perf_data_file__write(&inject->output, event, - event->header.size); + size = perf_data_file__write(&inject->output, buf, sz); if (size < 0) return -errno; @@ -53,6 +53,22 @@ static int perf_event__repipe_synth(struct perf_tool *tool, return 0; } +static int perf_event__repipe_synth(struct perf_tool *tool, + union perf_event *event) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, + tool); + + return output_bytes(inject, event, event->header.size); +} + +static int perf_event__repipe_oe_synth(struct perf_tool *tool, + union perf_event *event, + struct ordered_events *oe __maybe_unused) +{ + return perf_event__repipe_synth(tool, event); +} + static int perf_event__repipe_op2_synth(struct perf_tool *tool, union perf_event *event, struct perf_session *session @@ -79,6 +95,79 @@ static int perf_event__repipe_attr(struct perf_tool *tool, return perf_event__repipe_synth(tool, event); } +#ifdef HAVE_AUXTRACE_SUPPORT + +static int copy_bytes(struct perf_inject *inject, int fd, off_t size) +{ + char buf[4096]; + ssize_t ssz; + int ret; + + while (size > 0) { + ssz = read(fd, buf, min(size, (off_t)sizeof(buf))); + if (ssz < 0) + return -errno; + ret = output_bytes(inject, buf, ssz); + if (ret) + return ret; + size -= ssz; + } + + return 0; +} + +static s64 perf_event__repipe_auxtrace(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session + __maybe_unused) +{ + struct perf_inject *inject = container_of(tool, struct perf_inject, + tool); + int ret; + + inject->have_auxtrace = true; + + if (!inject->output.is_pipe) { + off_t offset; + + offset = lseek(inject->output.fd, 0, SEEK_CUR); + if (offset == -1) + return -errno; + ret = auxtrace_index__auxtrace_event(&session->auxtrace_index, + event, offset); + if (ret < 0) + return ret; + } + + if (perf_data_file__is_pipe(session->file) || !session->one_mmap) { + ret = output_bytes(inject, event, event->header.size); + if (ret < 0) + return ret; + ret = copy_bytes(inject, perf_data_file__fd(session->file), + event->auxtrace.size); + } else { + ret = output_bytes(inject, event, + event->header.size + event->auxtrace.size); + } + if (ret < 0) + return ret; + + return event->auxtrace.size; +} + +#else + +static s64 +perf_event__repipe_auxtrace(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + pr_err("AUX area tracing not supported\n"); + return -EINVAL; +} + +#endif + static int perf_event__repipe(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample __maybe_unused, @@ -148,6 +237,32 @@ static int perf_event__repipe_fork(struct perf_tool *tool, return err; } +static int perf_event__repipe_comm(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err; + + err = perf_event__process_comm(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); + + return err; +} + +static int perf_event__repipe_exit(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + int err; + + err = perf_event__process_exit(tool, event, sample, machine); + perf_event__repipe(tool, event, sample, machine); + + return err; +} + static int perf_event__repipe_tracing_data(struct perf_tool *tool, union perf_event *event, struct perf_session *session) @@ -160,6 +275,18 @@ static int perf_event__repipe_tracing_data(struct perf_tool *tool, return err; } +static int perf_event__repipe_id_index(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + int err; + + perf_event__repipe_synth(tool, event); + err = perf_event__process_id_index(tool, event, session); + + return err; +} + static int dso__read_build_id(struct dso *dso) { if (dso->has_build_id) @@ -238,6 +365,7 @@ static int perf_event__inject_buildid(struct perf_tool *tool, } } + thread__put(thread); repipe: perf_event__repipe(tool, event, sample, machine); return 0; @@ -344,23 +472,25 @@ static int __cmd_inject(struct perf_inject *inject) struct perf_session *session = inject->session; struct perf_data_file *file_out = &inject->output; int fd = perf_data_file__fd(file_out); + u64 output_data_offset; signal(SIGINT, sig_handler); - if (inject->build_ids || inject->sched_stat) { + if (inject->build_ids || inject->sched_stat || + inject->itrace_synth_opts.set) { inject->tool.mmap = perf_event__repipe_mmap; inject->tool.mmap2 = perf_event__repipe_mmap2; inject->tool.fork = perf_event__repipe_fork; inject->tool.tracing_data = perf_event__repipe_tracing_data; } + output_data_offset = session->header.data_offset; + if (inject->build_ids) { inject->tool.sample = perf_event__inject_buildid; } else if (inject->sched_stat) { struct perf_evsel *evsel; - inject->tool.ordered_events = true; - evlist__for_each(session->evlist, evsel) { const char *name = perf_evsel__name(evsel); @@ -374,17 +504,43 @@ static int __cmd_inject(struct perf_inject *inject) else if (!strncmp(name, "sched:sched_stat_", 17)) evsel->handler = perf_inject__sched_stat; } + } else if (inject->itrace_synth_opts.set) { + session->itrace_synth_opts = &inject->itrace_synth_opts; + inject->itrace_synth_opts.inject = true; + inject->tool.comm = perf_event__repipe_comm; + inject->tool.exit = perf_event__repipe_exit; + inject->tool.id_index = perf_event__repipe_id_index; + inject->tool.auxtrace_info = perf_event__process_auxtrace_info; + inject->tool.auxtrace = perf_event__process_auxtrace; + inject->tool.ordered_events = true; + inject->tool.ordering_requires_timestamps = true; + /* Allow space in the header for new attributes */ + output_data_offset = 4096; } + if (!inject->itrace_synth_opts.set) + auxtrace_index__free(&session->auxtrace_index); + if (!file_out->is_pipe) - lseek(fd, session->header.data_offset, SEEK_SET); + lseek(fd, output_data_offset, SEEK_SET); - ret = perf_session__process_events(session, &inject->tool); + ret = perf_session__process_events(session); if (!file_out->is_pipe) { - if (inject->build_ids) + if (inject->build_ids) { perf_header__set_feat(&session->header, HEADER_BUILD_ID); + if (inject->have_auxtrace) + dsos__hit_all(session); + } + /* + * The AUX areas have been removed and replaced with + * synthesized hardware events, so clear the feature flag. + */ + if (inject->itrace_synth_opts.set) + perf_header__clear_feat(&session->header, + HEADER_AUXTRACE); + session->header.data_offset = output_data_offset; session->header.data_size = inject->bytes_written; perf_session__write_header(session, session->evlist, fd, true); } @@ -403,12 +559,17 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) .fork = perf_event__repipe, .exit = perf_event__repipe, .lost = perf_event__repipe, + .aux = perf_event__repipe, + .itrace_start = perf_event__repipe, .read = perf_event__repipe_sample, .throttle = perf_event__repipe, .unthrottle = perf_event__repipe, .attr = perf_event__repipe_attr, .tracing_data = perf_event__repipe_op2_synth, - .finished_round = perf_event__repipe_op2_synth, + .auxtrace_info = perf_event__repipe_op2_synth, + .auxtrace = perf_event__repipe_auxtrace, + .auxtrace_error = perf_event__repipe_op2_synth, + .finished_round = perf_event__repipe_oe_synth, .build_id = perf_event__repipe_op2_synth, .id_index = perf_event__repipe_op2_synth, }, @@ -438,6 +599,10 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show build ids, etc)"), OPT_STRING(0, "kallsyms", &symbol_conf.kallsyms_name, "file", "kallsyms pathname"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_CALLBACK_OPTARG(0, "itrace", &inject.itrace_synth_opts, + NULL, "opts", "Instruction Tracing options", + itrace_parse_synth_opts), OPT_END() }; const char * const inject_usage[] = { @@ -458,17 +623,20 @@ int cmd_inject(int argc, const char **argv, const char *prefix __maybe_unused) return -1; } + inject.tool.ordered_events = inject.sched_stat; + file.path = inject.input_name; inject.session = perf_session__new(&file, true, &inject.tool); if (inject.session == NULL) return -1; - if (symbol__init(&inject.session->header.env) < 0) - return -1; + ret = symbol__init(&inject.session->header.env); + if (ret < 0) + goto out_delete; ret = __cmd_inject(&inject); +out_delete: perf_session__delete(inject.session); - return ret; } diff --git a/tools/perf/builtin-kmem.c b/tools/perf/builtin-kmem.c index f295141025bc..23b1faaaa4cc 100644 --- a/tools/perf/builtin-kmem.c +++ b/tools/perf/builtin-kmem.c @@ -10,6 +10,7 @@ #include "util/header.h" #include "util/session.h" #include "util/tool.h" +#include "util/callchain.h" #include "util/parse-options.h" #include "util/trace-event.h" @@ -20,9 +21,20 @@ #include <linux/rbtree.h> #include <linux/string.h> +#include <locale.h> +#include <regex.h> + +static int kmem_slab; +static int kmem_page; + +static long kmem_page_size; +static enum { + KMEM_SLAB, + KMEM_PAGE, +} kmem_default = KMEM_SLAB; /* for backward compatibility */ struct alloc_stat; -typedef int (*sort_fn_t)(struct alloc_stat *, struct alloc_stat *); +typedef int (*sort_fn_t)(void *, void *); static int alloc_flag; static int caller_flag; @@ -173,8 +185,8 @@ static int perf_evsel__process_alloc_node_event(struct perf_evsel *evsel, return ret; } -static int ptr_cmp(struct alloc_stat *, struct alloc_stat *); -static int callsite_cmp(struct alloc_stat *, struct alloc_stat *); +static int ptr_cmp(void *, void *); +static int slab_callsite_cmp(void *, void *); static struct alloc_stat *search_alloc_stat(unsigned long ptr, unsigned long call_site, @@ -215,7 +227,8 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, s_alloc->pingpong++; s_caller = search_alloc_stat(0, s_alloc->call_site, - &root_caller_stat, callsite_cmp); + &root_caller_stat, + slab_callsite_cmp); if (!s_caller) return -1; s_caller->pingpong++; @@ -225,6 +238,665 @@ static int perf_evsel__process_free_event(struct perf_evsel *evsel, return 0; } +static u64 total_page_alloc_bytes; +static u64 total_page_free_bytes; +static u64 total_page_nomatch_bytes; +static u64 total_page_fail_bytes; +static unsigned long nr_page_allocs; +static unsigned long nr_page_frees; +static unsigned long nr_page_fails; +static unsigned long nr_page_nomatch; + +static bool use_pfn; +static bool live_page; +static struct perf_session *kmem_session; + +#define MAX_MIGRATE_TYPES 6 +#define MAX_PAGE_ORDER 11 + +static int order_stats[MAX_PAGE_ORDER][MAX_MIGRATE_TYPES]; + +struct page_stat { + struct rb_node node; + u64 page; + u64 callsite; + int order; + unsigned gfp_flags; + unsigned migrate_type; + u64 alloc_bytes; + u64 free_bytes; + int nr_alloc; + int nr_free; +}; + +static struct rb_root page_live_tree; +static struct rb_root page_alloc_tree; +static struct rb_root page_alloc_sorted; +static struct rb_root page_caller_tree; +static struct rb_root page_caller_sorted; + +struct alloc_func { + u64 start; + u64 end; + char *name; +}; + +static int nr_alloc_funcs; +static struct alloc_func *alloc_func_list; + +static int funcmp(const void *a, const void *b) +{ + const struct alloc_func *fa = a; + const struct alloc_func *fb = b; + + if (fa->start > fb->start) + return 1; + else + return -1; +} + +static int callcmp(const void *a, const void *b) +{ + const struct alloc_func *fa = a; + const struct alloc_func *fb = b; + + if (fb->start <= fa->start && fa->end < fb->end) + return 0; + + if (fa->start > fb->start) + return 1; + else + return -1; +} + +static int build_alloc_func_list(void) +{ + int ret; + struct map *kernel_map; + struct symbol *sym; + struct rb_node *node; + struct alloc_func *func; + struct machine *machine = &kmem_session->machines.host; + regex_t alloc_func_regex; + const char pattern[] = "^_?_?(alloc|get_free|get_zeroed)_pages?"; + + ret = regcomp(&alloc_func_regex, pattern, REG_EXTENDED); + if (ret) { + char err[BUFSIZ]; + + regerror(ret, &alloc_func_regex, err, sizeof(err)); + pr_err("Invalid regex: %s\n%s", pattern, err); + return -EINVAL; + } + + kernel_map = machine->vmlinux_maps[MAP__FUNCTION]; + if (map__load(kernel_map, NULL) < 0) { + pr_err("cannot load kernel map\n"); + return -ENOENT; + } + + map__for_each_symbol(kernel_map, sym, node) { + if (regexec(&alloc_func_regex, sym->name, 0, NULL, 0)) + continue; + + func = realloc(alloc_func_list, + (nr_alloc_funcs + 1) * sizeof(*func)); + if (func == NULL) + return -ENOMEM; + + pr_debug("alloc func: %s\n", sym->name); + func[nr_alloc_funcs].start = sym->start; + func[nr_alloc_funcs].end = sym->end; + func[nr_alloc_funcs].name = sym->name; + + alloc_func_list = func; + nr_alloc_funcs++; + } + + qsort(alloc_func_list, nr_alloc_funcs, sizeof(*func), funcmp); + + regfree(&alloc_func_regex); + return 0; +} + +/* + * Find first non-memory allocation function from callchain. + * The allocation functions are in the 'alloc_func_list'. + */ +static u64 find_callsite(struct perf_evsel *evsel, struct perf_sample *sample) +{ + struct addr_location al; + struct machine *machine = &kmem_session->machines.host; + struct callchain_cursor_node *node; + + if (alloc_func_list == NULL) { + if (build_alloc_func_list() < 0) + goto out; + } + + al.thread = machine__findnew_thread(machine, sample->pid, sample->tid); + sample__resolve_callchain(sample, NULL, evsel, &al, 16); + + callchain_cursor_commit(&callchain_cursor); + while (true) { + struct alloc_func key, *caller; + u64 addr; + + node = callchain_cursor_current(&callchain_cursor); + if (node == NULL) + break; + + key.start = key.end = node->ip; + caller = bsearch(&key, alloc_func_list, nr_alloc_funcs, + sizeof(key), callcmp); + if (!caller) { + /* found */ + if (node->map) + addr = map__unmap_ip(node->map, node->ip); + else + addr = node->ip; + + return addr; + } else + pr_debug3("skipping alloc function: %s\n", caller->name); + + callchain_cursor_advance(&callchain_cursor); + } + +out: + pr_debug2("unknown callsite: %"PRIx64 "\n", sample->ip); + return sample->ip; +} + +struct sort_dimension { + const char name[20]; + sort_fn_t cmp; + struct list_head list; +}; + +static LIST_HEAD(page_alloc_sort_input); +static LIST_HEAD(page_caller_sort_input); + +static struct page_stat * +__page_stat__findnew_page(struct page_stat *pstat, bool create) +{ + struct rb_node **node = &page_live_tree.rb_node; + struct rb_node *parent = NULL; + struct page_stat *data; + + while (*node) { + s64 cmp; + + parent = *node; + data = rb_entry(*node, struct page_stat, node); + + cmp = data->page - pstat->page; + if (cmp < 0) + node = &parent->rb_left; + else if (cmp > 0) + node = &parent->rb_right; + else + return data; + } + + if (!create) + return NULL; + + data = zalloc(sizeof(*data)); + if (data != NULL) { + data->page = pstat->page; + data->order = pstat->order; + data->gfp_flags = pstat->gfp_flags; + data->migrate_type = pstat->migrate_type; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &page_live_tree); + } + + return data; +} + +static struct page_stat *page_stat__find_page(struct page_stat *pstat) +{ + return __page_stat__findnew_page(pstat, false); +} + +static struct page_stat *page_stat__findnew_page(struct page_stat *pstat) +{ + return __page_stat__findnew_page(pstat, true); +} + +static struct page_stat * +__page_stat__findnew_alloc(struct page_stat *pstat, bool create) +{ + struct rb_node **node = &page_alloc_tree.rb_node; + struct rb_node *parent = NULL; + struct page_stat *data; + struct sort_dimension *sort; + + while (*node) { + int cmp = 0; + + parent = *node; + data = rb_entry(*node, struct page_stat, node); + + list_for_each_entry(sort, &page_alloc_sort_input, list) { + cmp = sort->cmp(pstat, data); + if (cmp) + break; + } + + if (cmp < 0) + node = &parent->rb_left; + else if (cmp > 0) + node = &parent->rb_right; + else + return data; + } + + if (!create) + return NULL; + + data = zalloc(sizeof(*data)); + if (data != NULL) { + data->page = pstat->page; + data->order = pstat->order; + data->gfp_flags = pstat->gfp_flags; + data->migrate_type = pstat->migrate_type; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &page_alloc_tree); + } + + return data; +} + +static struct page_stat *page_stat__find_alloc(struct page_stat *pstat) +{ + return __page_stat__findnew_alloc(pstat, false); +} + +static struct page_stat *page_stat__findnew_alloc(struct page_stat *pstat) +{ + return __page_stat__findnew_alloc(pstat, true); +} + +static struct page_stat * +__page_stat__findnew_caller(struct page_stat *pstat, bool create) +{ + struct rb_node **node = &page_caller_tree.rb_node; + struct rb_node *parent = NULL; + struct page_stat *data; + struct sort_dimension *sort; + + while (*node) { + int cmp = 0; + + parent = *node; + data = rb_entry(*node, struct page_stat, node); + + list_for_each_entry(sort, &page_caller_sort_input, list) { + cmp = sort->cmp(pstat, data); + if (cmp) + break; + } + + if (cmp < 0) + node = &parent->rb_left; + else if (cmp > 0) + node = &parent->rb_right; + else + return data; + } + + if (!create) + return NULL; + + data = zalloc(sizeof(*data)); + if (data != NULL) { + data->callsite = pstat->callsite; + data->order = pstat->order; + data->gfp_flags = pstat->gfp_flags; + data->migrate_type = pstat->migrate_type; + + rb_link_node(&data->node, parent, node); + rb_insert_color(&data->node, &page_caller_tree); + } + + return data; +} + +static struct page_stat *page_stat__find_caller(struct page_stat *pstat) +{ + return __page_stat__findnew_caller(pstat, false); +} + +static struct page_stat *page_stat__findnew_caller(struct page_stat *pstat) +{ + return __page_stat__findnew_caller(pstat, true); +} + +static bool valid_page(u64 pfn_or_page) +{ + if (use_pfn && pfn_or_page == -1UL) + return false; + if (!use_pfn && pfn_or_page == 0) + return false; + return true; +} + +struct gfp_flag { + unsigned int flags; + char *compact_str; + char *human_readable; +}; + +static struct gfp_flag *gfps; +static int nr_gfps; + +static int gfpcmp(const void *a, const void *b) +{ + const struct gfp_flag *fa = a; + const struct gfp_flag *fb = b; + + return fa->flags - fb->flags; +} + +/* see include/trace/events/gfpflags.h */ +static const struct { + const char *original; + const char *compact; +} gfp_compact_table[] = { + { "GFP_TRANSHUGE", "THP" }, + { "GFP_HIGHUSER_MOVABLE", "HUM" }, + { "GFP_HIGHUSER", "HU" }, + { "GFP_USER", "U" }, + { "GFP_TEMPORARY", "TMP" }, + { "GFP_KERNEL", "K" }, + { "GFP_NOFS", "NF" }, + { "GFP_ATOMIC", "A" }, + { "GFP_NOIO", "NI" }, + { "GFP_HIGH", "H" }, + { "GFP_WAIT", "W" }, + { "GFP_IO", "I" }, + { "GFP_COLD", "CO" }, + { "GFP_NOWARN", "NWR" }, + { "GFP_REPEAT", "R" }, + { "GFP_NOFAIL", "NF" }, + { "GFP_NORETRY", "NR" }, + { "GFP_COMP", "C" }, + { "GFP_ZERO", "Z" }, + { "GFP_NOMEMALLOC", "NMA" }, + { "GFP_MEMALLOC", "MA" }, + { "GFP_HARDWALL", "HW" }, + { "GFP_THISNODE", "TN" }, + { "GFP_RECLAIMABLE", "RC" }, + { "GFP_MOVABLE", "M" }, + { "GFP_NOTRACK", "NT" }, + { "GFP_NO_KSWAPD", "NK" }, + { "GFP_OTHER_NODE", "ON" }, + { "GFP_NOWAIT", "NW" }, +}; + +static size_t max_gfp_len; + +static char *compact_gfp_flags(char *gfp_flags) +{ + char *orig_flags = strdup(gfp_flags); + char *new_flags = NULL; + char *str, *pos = NULL; + size_t len = 0; + + if (orig_flags == NULL) + return NULL; + + str = strtok_r(orig_flags, "|", &pos); + while (str) { + size_t i; + char *new; + const char *cpt; + + for (i = 0; i < ARRAY_SIZE(gfp_compact_table); i++) { + if (strcmp(gfp_compact_table[i].original, str)) + continue; + + cpt = gfp_compact_table[i].compact; + new = realloc(new_flags, len + strlen(cpt) + 2); + if (new == NULL) { + free(new_flags); + return NULL; + } + + new_flags = new; + + if (!len) { + strcpy(new_flags, cpt); + } else { + strcat(new_flags, "|"); + strcat(new_flags, cpt); + len++; + } + + len += strlen(cpt); + } + + str = strtok_r(NULL, "|", &pos); + } + + if (max_gfp_len < len) + max_gfp_len = len; + + free(orig_flags); + return new_flags; +} + +static char *compact_gfp_string(unsigned long gfp_flags) +{ + struct gfp_flag key = { + .flags = gfp_flags, + }; + struct gfp_flag *gfp; + + gfp = bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp); + if (gfp) + return gfp->compact_str; + + return NULL; +} + +static int parse_gfp_flags(struct perf_evsel *evsel, struct perf_sample *sample, + unsigned int gfp_flags) +{ + struct pevent_record record = { + .cpu = sample->cpu, + .data = sample->raw_data, + .size = sample->raw_size, + }; + struct trace_seq seq; + char *str, *pos = NULL; + + if (nr_gfps) { + struct gfp_flag key = { + .flags = gfp_flags, + }; + + if (bsearch(&key, gfps, nr_gfps, sizeof(*gfps), gfpcmp)) + return 0; + } + + trace_seq_init(&seq); + pevent_event_info(&seq, evsel->tp_format, &record); + + str = strtok_r(seq.buffer, " ", &pos); + while (str) { + if (!strncmp(str, "gfp_flags=", 10)) { + struct gfp_flag *new; + + new = realloc(gfps, (nr_gfps + 1) * sizeof(*gfps)); + if (new == NULL) + return -ENOMEM; + + gfps = new; + new += nr_gfps++; + + new->flags = gfp_flags; + new->human_readable = strdup(str + 10); + new->compact_str = compact_gfp_flags(str + 10); + if (!new->human_readable || !new->compact_str) + return -ENOMEM; + + qsort(gfps, nr_gfps, sizeof(*gfps), gfpcmp); + } + + str = strtok_r(NULL, " ", &pos); + } + + trace_seq_destroy(&seq); + return 0; +} + +static int perf_evsel__process_page_alloc_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 page; + unsigned int order = perf_evsel__intval(evsel, sample, "order"); + unsigned int gfp_flags = perf_evsel__intval(evsel, sample, "gfp_flags"); + unsigned int migrate_type = perf_evsel__intval(evsel, sample, + "migratetype"); + u64 bytes = kmem_page_size << order; + u64 callsite; + struct page_stat *pstat; + struct page_stat this = { + .order = order, + .gfp_flags = gfp_flags, + .migrate_type = migrate_type, + }; + + if (use_pfn) + page = perf_evsel__intval(evsel, sample, "pfn"); + else + page = perf_evsel__intval(evsel, sample, "page"); + + nr_page_allocs++; + total_page_alloc_bytes += bytes; + + if (!valid_page(page)) { + nr_page_fails++; + total_page_fail_bytes += bytes; + + return 0; + } + + if (parse_gfp_flags(evsel, sample, gfp_flags) < 0) + return -1; + + callsite = find_callsite(evsel, sample); + + /* + * This is to find the current page (with correct gfp flags and + * migrate type) at free event. + */ + this.page = page; + pstat = page_stat__findnew_page(&this); + if (pstat == NULL) + return -ENOMEM; + + pstat->nr_alloc++; + pstat->alloc_bytes += bytes; + pstat->callsite = callsite; + + if (!live_page) { + pstat = page_stat__findnew_alloc(&this); + if (pstat == NULL) + return -ENOMEM; + + pstat->nr_alloc++; + pstat->alloc_bytes += bytes; + pstat->callsite = callsite; + } + + this.callsite = callsite; + pstat = page_stat__findnew_caller(&this); + if (pstat == NULL) + return -ENOMEM; + + pstat->nr_alloc++; + pstat->alloc_bytes += bytes; + + order_stats[order][migrate_type]++; + + return 0; +} + +static int perf_evsel__process_page_free_event(struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 page; + unsigned int order = perf_evsel__intval(evsel, sample, "order"); + u64 bytes = kmem_page_size << order; + struct page_stat *pstat; + struct page_stat this = { + .order = order, + }; + + if (use_pfn) + page = perf_evsel__intval(evsel, sample, "pfn"); + else + page = perf_evsel__intval(evsel, sample, "page"); + + nr_page_frees++; + total_page_free_bytes += bytes; + + this.page = page; + pstat = page_stat__find_page(&this); + if (pstat == NULL) { + pr_debug2("missing free at page %"PRIx64" (order: %d)\n", + page, order); + + nr_page_nomatch++; + total_page_nomatch_bytes += bytes; + + return 0; + } + + this.gfp_flags = pstat->gfp_flags; + this.migrate_type = pstat->migrate_type; + this.callsite = pstat->callsite; + + rb_erase(&pstat->node, &page_live_tree); + free(pstat); + + if (live_page) { + order_stats[this.order][this.migrate_type]--; + } else { + pstat = page_stat__find_alloc(&this); + if (pstat == NULL) + return -ENOMEM; + + pstat->nr_free++; + pstat->free_bytes += bytes; + } + + pstat = page_stat__find_caller(&this); + if (pstat == NULL) + return -ENOENT; + + pstat->nr_free++; + pstat->free_bytes += bytes; + + if (live_page) { + pstat->nr_alloc--; + pstat->alloc_bytes -= bytes; + + if (pstat->nr_alloc == 0) { + rb_erase(&pstat->node, &page_caller_tree); + free(pstat); + } + } + + return 0; +} + typedef int (*tracepoint_handler)(struct perf_evsel *evsel, struct perf_sample *sample); @@ -234,6 +906,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct perf_evsel *evsel, struct machine *machine) { + int err = 0; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -247,10 +920,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; - return f(evsel, sample); + err = f(evsel, sample); } - return 0; + thread__put(thread); + + return err; } static struct perf_tool perf_kmem = { @@ -269,16 +944,17 @@ static double fragmentation(unsigned long n_req, unsigned long n_alloc) return 100.0 - (100.0 * n_req / n_alloc); } -static void __print_result(struct rb_root *root, struct perf_session *session, - int n_lines, int is_caller) +static void __print_slab_result(struct rb_root *root, + struct perf_session *session, + int n_lines, int is_caller) { struct rb_node *next; struct machine *machine = &session->machines.host; - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); printf(" %-34s |", is_caller ? "Callsite": "Alloc Ptr"); printf(" Total_alloc/Per | Total_req/Per | Hit | Ping-pong | Frag\n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); next = rb_first(root); @@ -304,7 +980,7 @@ static void __print_result(struct rb_root *root, struct perf_session *session, snprintf(buf, sizeof(buf), "%#" PRIx64 "", addr); printf(" %-34s |", buf); - printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %8lu | %6.3f%%\n", + printf(" %9llu/%-5lu | %9llu/%-5lu | %8lu | %9lu | %6.3f%%\n", (unsigned long long)data->bytes_alloc, (unsigned long)data->bytes_alloc / data->hit, (unsigned long long)data->bytes_req, @@ -317,43 +993,220 @@ static void __print_result(struct rb_root *root, struct perf_session *session, } if (n_lines == -1) - printf(" ... | ... | ... | ... | ... | ... \n"); + printf(" ... | ... | ... | ... | ... | ... \n"); - printf("%.102s\n", graph_dotted_line); + printf("%.105s\n", graph_dotted_line); } -static void print_summary(void) +static const char * const migrate_type_str[] = { + "UNMOVABL", + "RECLAIM", + "MOVABLE", + "RESERVED", + "CMA/ISLT", + "UNKNOWN", +}; + +static void __print_page_alloc_result(struct perf_session *session, int n_lines) { - printf("\nSUMMARY\n=======\n"); - printf("Total bytes requested: %lu\n", total_requested); - printf("Total bytes allocated: %lu\n", total_allocated); - printf("Total bytes wasted on internal fragmentation: %lu\n", + struct rb_node *next = rb_first(&page_alloc_sorted); + struct machine *machine = &session->machines.host; + const char *format; + int gfp_len = max(strlen("GFP flags"), max_gfp_len); + + printf("\n%.105s\n", graph_dotted_line); + printf(" %-16s | %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", + use_pfn ? "PFN" : "Page", live_page ? "Live" : "Total", + gfp_len, "GFP flags"); + printf("%.105s\n", graph_dotted_line); + + if (use_pfn) + format = " %16llu | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; + else + format = " %016llx | %'16llu | %'9d | %5d | %8s | %-*s | %s\n"; + + while (next && n_lines--) { + struct page_stat *data; + struct symbol *sym; + struct map *map; + char buf[32]; + char *caller = buf; + + data = rb_entry(next, struct page_stat, node); + sym = machine__find_kernel_function(machine, data->callsite, + &map, NULL); + if (sym && sym->name) + caller = sym->name; + else + scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); + + printf(format, (unsigned long long)data->page, + (unsigned long long)data->alloc_bytes / 1024, + data->nr_alloc, data->order, + migrate_type_str[data->migrate_type], + gfp_len, compact_gfp_string(data->gfp_flags), caller); + + next = rb_next(next); + } + + if (n_lines == -1) { + printf(" ... | ... | ... | ... | ... | %-*s | ...\n", + gfp_len, "..."); + } + + printf("%.105s\n", graph_dotted_line); +} + +static void __print_page_caller_result(struct perf_session *session, int n_lines) +{ + struct rb_node *next = rb_first(&page_caller_sorted); + struct machine *machine = &session->machines.host; + int gfp_len = max(strlen("GFP flags"), max_gfp_len); + + printf("\n%.105s\n", graph_dotted_line); + printf(" %5s alloc (KB) | Hits | Order | Mig.type | %-*s | Callsite\n", + live_page ? "Live" : "Total", gfp_len, "GFP flags"); + printf("%.105s\n", graph_dotted_line); + + while (next && n_lines--) { + struct page_stat *data; + struct symbol *sym; + struct map *map; + char buf[32]; + char *caller = buf; + + data = rb_entry(next, struct page_stat, node); + sym = machine__find_kernel_function(machine, data->callsite, + &map, NULL); + if (sym && sym->name) + caller = sym->name; + else + scnprintf(buf, sizeof(buf), "%"PRIx64, data->callsite); + + printf(" %'16llu | %'9d | %5d | %8s | %-*s | %s\n", + (unsigned long long)data->alloc_bytes / 1024, + data->nr_alloc, data->order, + migrate_type_str[data->migrate_type], + gfp_len, compact_gfp_string(data->gfp_flags), caller); + + next = rb_next(next); + } + + if (n_lines == -1) { + printf(" ... | ... | ... | ... | %-*s | ...\n", + gfp_len, "..."); + } + + printf("%.105s\n", graph_dotted_line); +} + +static void print_gfp_flags(void) +{ + int i; + + printf("#\n"); + printf("# GFP flags\n"); + printf("# ---------\n"); + for (i = 0; i < nr_gfps; i++) { + printf("# %08x: %*s: %s\n", gfps[i].flags, + (int) max_gfp_len, gfps[i].compact_str, + gfps[i].human_readable); + } +} + +static void print_slab_summary(void) +{ + printf("\nSUMMARY (SLAB allocator)"); + printf("\n========================\n"); + printf("Total bytes requested: %'lu\n", total_requested); + printf("Total bytes allocated: %'lu\n", total_allocated); + printf("Total bytes wasted on internal fragmentation: %'lu\n", total_allocated - total_requested); printf("Internal fragmentation: %f%%\n", fragmentation(total_requested, total_allocated)); - printf("Cross CPU allocations: %lu/%lu\n", nr_cross_allocs, nr_allocs); + printf("Cross CPU allocations: %'lu/%'lu\n", nr_cross_allocs, nr_allocs); } -static void print_result(struct perf_session *session) +static void print_page_summary(void) +{ + int o, m; + u64 nr_alloc_freed = nr_page_frees - nr_page_nomatch; + u64 total_alloc_freed_bytes = total_page_free_bytes - total_page_nomatch_bytes; + + printf("\nSUMMARY (page allocator)"); + printf("\n========================\n"); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation requests", + nr_page_allocs, total_page_alloc_bytes / 1024); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free requests", + nr_page_frees, total_page_free_bytes / 1024); + printf("\n"); + + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc+freed requests", + nr_alloc_freed, (total_alloc_freed_bytes) / 1024); + printf("%-30s: %'16"PRIu64" [ %'16"PRIu64" KB ]\n", "Total alloc-only requests", + nr_page_allocs - nr_alloc_freed, + (total_page_alloc_bytes - total_alloc_freed_bytes) / 1024); + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total free-only requests", + nr_page_nomatch, total_page_nomatch_bytes / 1024); + printf("\n"); + + printf("%-30s: %'16lu [ %'16"PRIu64" KB ]\n", "Total allocation failures", + nr_page_fails, total_page_fail_bytes / 1024); + printf("\n"); + + printf("%5s %12s %12s %12s %12s %12s\n", "Order", "Unmovable", + "Reclaimable", "Movable", "Reserved", "CMA/Isolated"); + printf("%.5s %.12s %.12s %.12s %.12s %.12s\n", graph_dotted_line, + graph_dotted_line, graph_dotted_line, graph_dotted_line, + graph_dotted_line, graph_dotted_line); + + for (o = 0; o < MAX_PAGE_ORDER; o++) { + printf("%5d", o); + for (m = 0; m < MAX_MIGRATE_TYPES - 1; m++) { + if (order_stats[o][m]) + printf(" %'12d", order_stats[o][m]); + else + printf(" %12c", '.'); + } + printf("\n"); + } +} + +static void print_slab_result(struct perf_session *session) { if (caller_flag) - __print_result(&root_caller_sorted, session, caller_lines, 1); + __print_slab_result(&root_caller_sorted, session, caller_lines, 1); if (alloc_flag) - __print_result(&root_alloc_sorted, session, alloc_lines, 0); - print_summary(); + __print_slab_result(&root_alloc_sorted, session, alloc_lines, 0); + print_slab_summary(); } -struct sort_dimension { - const char name[20]; - sort_fn_t cmp; - struct list_head list; -}; +static void print_page_result(struct perf_session *session) +{ + if (caller_flag || alloc_flag) + print_gfp_flags(); + if (caller_flag) + __print_page_caller_result(session, caller_lines); + if (alloc_flag) + __print_page_alloc_result(session, alloc_lines); + print_page_summary(); +} + +static void print_result(struct perf_session *session) +{ + if (kmem_slab) + print_slab_result(session); + if (kmem_page) + print_page_result(session); +} -static LIST_HEAD(caller_sort); -static LIST_HEAD(alloc_sort); +static LIST_HEAD(slab_caller_sort); +static LIST_HEAD(slab_alloc_sort); +static LIST_HEAD(page_caller_sort); +static LIST_HEAD(page_alloc_sort); -static void sort_insert(struct rb_root *root, struct alloc_stat *data, - struct list_head *sort_list) +static void sort_slab_insert(struct rb_root *root, struct alloc_stat *data, + struct list_head *sort_list) { struct rb_node **new = &(root->rb_node); struct rb_node *parent = NULL; @@ -382,8 +1235,8 @@ static void sort_insert(struct rb_root *root, struct alloc_stat *data, rb_insert_color(&data->node, root); } -static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, - struct list_head *sort_list) +static void __sort_slab_result(struct rb_root *root, struct rb_root *root_sorted, + struct list_head *sort_list) { struct rb_node *node; struct alloc_stat *data; @@ -395,26 +1248,93 @@ static void __sort_result(struct rb_root *root, struct rb_root *root_sorted, rb_erase(node, root); data = rb_entry(node, struct alloc_stat, node); - sort_insert(root_sorted, data, sort_list); + sort_slab_insert(root_sorted, data, sort_list); + } +} + +static void sort_page_insert(struct rb_root *root, struct page_stat *data, + struct list_head *sort_list) +{ + struct rb_node **new = &root->rb_node; + struct rb_node *parent = NULL; + struct sort_dimension *sort; + + while (*new) { + struct page_stat *this; + int cmp = 0; + + this = rb_entry(*new, struct page_stat, node); + parent = *new; + + list_for_each_entry(sort, sort_list, list) { + cmp = sort->cmp(data, this); + if (cmp) + break; + } + + if (cmp > 0) + new = &parent->rb_left; + else + new = &parent->rb_right; + } + + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void __sort_page_result(struct rb_root *root, struct rb_root *root_sorted, + struct list_head *sort_list) +{ + struct rb_node *node; + struct page_stat *data; + + for (;;) { + node = rb_first(root); + if (!node) + break; + + rb_erase(node, root); + data = rb_entry(node, struct page_stat, node); + sort_page_insert(root_sorted, data, sort_list); } } static void sort_result(void) { - __sort_result(&root_alloc_stat, &root_alloc_sorted, &alloc_sort); - __sort_result(&root_caller_stat, &root_caller_sorted, &caller_sort); + if (kmem_slab) { + __sort_slab_result(&root_alloc_stat, &root_alloc_sorted, + &slab_alloc_sort); + __sort_slab_result(&root_caller_stat, &root_caller_sorted, + &slab_caller_sort); + } + if (kmem_page) { + if (live_page) + __sort_page_result(&page_live_tree, &page_alloc_sorted, + &page_alloc_sort); + else + __sort_page_result(&page_alloc_tree, &page_alloc_sorted, + &page_alloc_sort); + + __sort_page_result(&page_caller_tree, &page_caller_sorted, + &page_caller_sort); + } } static int __cmd_kmem(struct perf_session *session) { int err = -EINVAL; + struct perf_evsel *evsel; const struct perf_evsel_str_handler kmem_tracepoints[] = { + /* slab allocator */ { "kmem:kmalloc", perf_evsel__process_alloc_event, }, { "kmem:kmem_cache_alloc", perf_evsel__process_alloc_event, }, { "kmem:kmalloc_node", perf_evsel__process_alloc_node_event, }, { "kmem:kmem_cache_alloc_node", perf_evsel__process_alloc_node_event, }, { "kmem:kfree", perf_evsel__process_free_event, }, { "kmem:kmem_cache_free", perf_evsel__process_free_event, }, + /* page allocator */ + { "kmem:mm_page_alloc", perf_evsel__process_page_alloc_event, }, + { "kmem:mm_page_free", perf_evsel__process_page_free_event, }, }; if (!perf_session__has_traces(session, "kmem record")) @@ -425,18 +1345,32 @@ static int __cmd_kmem(struct perf_session *session) goto out; } + evlist__for_each(session->evlist, evsel) { + if (!strcmp(perf_evsel__name(evsel), "kmem:mm_page_alloc") && + perf_evsel__field(evsel, "pfn")) { + use_pfn = true; + break; + } + } + setup_pager(); - err = perf_session__process_events(session, &perf_kmem); - if (err != 0) + err = perf_session__process_events(session); + if (err != 0) { + pr_err("error during process events: %d\n", err); goto out; + } sort_result(); print_result(session); out: return err; } -static int ptr_cmp(struct alloc_stat *l, struct alloc_stat *r) +/* slab sort keys */ +static int ptr_cmp(void *a, void *b) { + struct alloc_stat *l = a; + struct alloc_stat *r = b; + if (l->ptr < r->ptr) return -1; else if (l->ptr > r->ptr) @@ -449,8 +1383,11 @@ static struct sort_dimension ptr_sort_dimension = { .cmp = ptr_cmp, }; -static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) +static int slab_callsite_cmp(void *a, void *b) { + struct alloc_stat *l = a; + struct alloc_stat *r = b; + if (l->call_site < r->call_site) return -1; else if (l->call_site > r->call_site) @@ -460,11 +1397,14 @@ static int callsite_cmp(struct alloc_stat *l, struct alloc_stat *r) static struct sort_dimension callsite_sort_dimension = { .name = "callsite", - .cmp = callsite_cmp, + .cmp = slab_callsite_cmp, }; -static int hit_cmp(struct alloc_stat *l, struct alloc_stat *r) +static int hit_cmp(void *a, void *b) { + struct alloc_stat *l = a; + struct alloc_stat *r = b; + if (l->hit < r->hit) return -1; else if (l->hit > r->hit) @@ -477,8 +1417,11 @@ static struct sort_dimension hit_sort_dimension = { .cmp = hit_cmp, }; -static int bytes_cmp(struct alloc_stat *l, struct alloc_stat *r) +static int bytes_cmp(void *a, void *b) { + struct alloc_stat *l = a; + struct alloc_stat *r = b; + if (l->bytes_alloc < r->bytes_alloc) return -1; else if (l->bytes_alloc > r->bytes_alloc) @@ -491,9 +1434,11 @@ static struct sort_dimension bytes_sort_dimension = { .cmp = bytes_cmp, }; -static int frag_cmp(struct alloc_stat *l, struct alloc_stat *r) +static int frag_cmp(void *a, void *b) { double x, y; + struct alloc_stat *l = a; + struct alloc_stat *r = b; x = fragmentation(l->bytes_req, l->bytes_alloc); y = fragmentation(r->bytes_req, r->bytes_alloc); @@ -510,8 +1455,11 @@ static struct sort_dimension frag_sort_dimension = { .cmp = frag_cmp, }; -static int pingpong_cmp(struct alloc_stat *l, struct alloc_stat *r) +static int pingpong_cmp(void *a, void *b) { + struct alloc_stat *l = a; + struct alloc_stat *r = b; + if (l->pingpong < r->pingpong) return -1; else if (l->pingpong > r->pingpong) @@ -524,7 +1472,135 @@ static struct sort_dimension pingpong_sort_dimension = { .cmp = pingpong_cmp, }; -static struct sort_dimension *avail_sorts[] = { +/* page sort keys */ +static int page_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + if (l->page < r->page) + return -1; + else if (l->page > r->page) + return 1; + return 0; +} + +static struct sort_dimension page_sort_dimension = { + .name = "page", + .cmp = page_cmp, +}; + +static int page_callsite_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + if (l->callsite < r->callsite) + return -1; + else if (l->callsite > r->callsite) + return 1; + return 0; +} + +static struct sort_dimension page_callsite_sort_dimension = { + .name = "callsite", + .cmp = page_callsite_cmp, +}; + +static int page_hit_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + if (l->nr_alloc < r->nr_alloc) + return -1; + else if (l->nr_alloc > r->nr_alloc) + return 1; + return 0; +} + +static struct sort_dimension page_hit_sort_dimension = { + .name = "hit", + .cmp = page_hit_cmp, +}; + +static int page_bytes_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + if (l->alloc_bytes < r->alloc_bytes) + return -1; + else if (l->alloc_bytes > r->alloc_bytes) + return 1; + return 0; +} + +static struct sort_dimension page_bytes_sort_dimension = { + .name = "bytes", + .cmp = page_bytes_cmp, +}; + +static int page_order_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + if (l->order < r->order) + return -1; + else if (l->order > r->order) + return 1; + return 0; +} + +static struct sort_dimension page_order_sort_dimension = { + .name = "order", + .cmp = page_order_cmp, +}; + +static int migrate_type_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + /* for internal use to find free'd page */ + if (l->migrate_type == -1U) + return 0; + + if (l->migrate_type < r->migrate_type) + return -1; + else if (l->migrate_type > r->migrate_type) + return 1; + return 0; +} + +static struct sort_dimension migrate_type_sort_dimension = { + .name = "migtype", + .cmp = migrate_type_cmp, +}; + +static int gfp_flags_cmp(void *a, void *b) +{ + struct page_stat *l = a; + struct page_stat *r = b; + + /* for internal use to find free'd page */ + if (l->gfp_flags == -1U) + return 0; + + if (l->gfp_flags < r->gfp_flags) + return -1; + else if (l->gfp_flags > r->gfp_flags) + return 1; + return 0; +} + +static struct sort_dimension gfp_flags_sort_dimension = { + .name = "gfp", + .cmp = gfp_flags_cmp, +}; + +static struct sort_dimension *slab_sorts[] = { &ptr_sort_dimension, &callsite_sort_dimension, &hit_sort_dimension, @@ -533,16 +1609,24 @@ static struct sort_dimension *avail_sorts[] = { &pingpong_sort_dimension, }; -#define NUM_AVAIL_SORTS ((int)ARRAY_SIZE(avail_sorts)) +static struct sort_dimension *page_sorts[] = { + &page_sort_dimension, + &page_callsite_sort_dimension, + &page_hit_sort_dimension, + &page_bytes_sort_dimension, + &page_order_sort_dimension, + &migrate_type_sort_dimension, + &gfp_flags_sort_dimension, +}; -static int sort_dimension__add(const char *tok, struct list_head *list) +static int slab_sort_dimension__add(const char *tok, struct list_head *list) { struct sort_dimension *sort; int i; - for (i = 0; i < NUM_AVAIL_SORTS; i++) { - if (!strcmp(avail_sorts[i]->name, tok)) { - sort = memdup(avail_sorts[i], sizeof(*avail_sorts[i])); + for (i = 0; i < (int)ARRAY_SIZE(slab_sorts); i++) { + if (!strcmp(slab_sorts[i]->name, tok)) { + sort = memdup(slab_sorts[i], sizeof(*slab_sorts[i])); if (!sort) { pr_err("%s: memdup failed\n", __func__); return -1; @@ -555,10 +1639,57 @@ static int sort_dimension__add(const char *tok, struct list_head *list) return -1; } -static int setup_sorting(struct list_head *sort_list, const char *arg) +static int page_sort_dimension__add(const char *tok, struct list_head *list) +{ + struct sort_dimension *sort; + int i; + + for (i = 0; i < (int)ARRAY_SIZE(page_sorts); i++) { + if (!strcmp(page_sorts[i]->name, tok)) { + sort = memdup(page_sorts[i], sizeof(*page_sorts[i])); + if (!sort) { + pr_err("%s: memdup failed\n", __func__); + return -1; + } + list_add_tail(&sort->list, list); + return 0; + } + } + + return -1; +} + +static int setup_slab_sorting(struct list_head *sort_list, const char *arg) +{ + char *tok; + char *str = strdup(arg); + char *pos = str; + + if (!str) { + pr_err("%s: strdup failed\n", __func__); + return -1; + } + + while (true) { + tok = strsep(&pos, ","); + if (!tok) + break; + if (slab_sort_dimension__add(tok, sort_list) < 0) { + error("Unknown slab --sort key: '%s'", tok); + free(str); + return -1; + } + } + + free(str); + return 0; +} + +static int setup_page_sorting(struct list_head *sort_list, const char *arg) { char *tok; char *str = strdup(arg); + char *pos = str; if (!str) { pr_err("%s: strdup failed\n", __func__); @@ -566,11 +1697,11 @@ static int setup_sorting(struct list_head *sort_list, const char *arg) } while (true) { - tok = strsep(&str, ","); + tok = strsep(&pos, ","); if (!tok) break; - if (sort_dimension__add(tok, sort_list) < 0) { - error("Unknown --sort key: '%s'", tok); + if (page_sort_dimension__add(tok, sort_list) < 0) { + error("Unknown page --sort key: '%s'", tok); free(str); return -1; } @@ -586,10 +1717,18 @@ static int parse_sort_opt(const struct option *opt __maybe_unused, if (!arg) return -1; - if (caller_flag > alloc_flag) - return setup_sorting(&caller_sort, arg); - else - return setup_sorting(&alloc_sort, arg); + if (kmem_page > kmem_slab || + (kmem_page == 0 && kmem_slab == 0 && kmem_default == KMEM_PAGE)) { + if (caller_flag > alloc_flag) + return setup_page_sorting(&page_caller_sort, arg); + else + return setup_page_sorting(&page_alloc_sort, arg); + } else { + if (caller_flag > alloc_flag) + return setup_slab_sorting(&slab_caller_sort, arg); + else + return setup_slab_sorting(&slab_alloc_sort, arg); + } return 0; } @@ -610,6 +1749,22 @@ static int parse_alloc_opt(const struct option *opt __maybe_unused, return 0; } +static int parse_slab_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + kmem_slab = (kmem_page + 1); + return 0; +} + +static int parse_page_opt(const struct option *opt __maybe_unused, + const char *arg __maybe_unused, + int unset __maybe_unused) +{ + kmem_page = (kmem_slab + 1); + return 0; +} + static int parse_line_opt(const struct option *opt __maybe_unused, const char *arg, int unset __maybe_unused) { @@ -632,6 +1787,8 @@ static int __cmd_record(int argc, const char **argv) { const char * const record_args[] = { "record", "-a", "-R", "-c", "1", + }; + const char * const slab_events[] = { "-e", "kmem:kmalloc", "-e", "kmem:kmalloc_node", "-e", "kmem:kfree", @@ -639,10 +1796,19 @@ static int __cmd_record(int argc, const char **argv) "-e", "kmem:kmem_cache_alloc_node", "-e", "kmem:kmem_cache_free", }; + const char * const page_events[] = { + "-e", "kmem:mm_page_alloc", + "-e", "kmem:mm_page_free", + }; unsigned int rec_argc, i, j; const char **rec_argv; rec_argc = ARRAY_SIZE(record_args) + argc - 1; + if (kmem_slab) + rec_argc += ARRAY_SIZE(slab_events); + if (kmem_page) + rec_argc += ARRAY_SIZE(page_events) + 1; /* for -g */ + rec_argv = calloc(rec_argc + 1, sizeof(char *)); if (rec_argv == NULL) @@ -651,26 +1817,65 @@ static int __cmd_record(int argc, const char **argv) for (i = 0; i < ARRAY_SIZE(record_args); i++) rec_argv[i] = strdup(record_args[i]); + if (kmem_slab) { + for (j = 0; j < ARRAY_SIZE(slab_events); j++, i++) + rec_argv[i] = strdup(slab_events[j]); + } + if (kmem_page) { + rec_argv[i++] = strdup("-g"); + + for (j = 0; j < ARRAY_SIZE(page_events); j++, i++) + rec_argv[i] = strdup(page_events[j]); + } + for (j = 1; j < (unsigned int)argc; j++, i++) rec_argv[i] = argv[j]; return cmd_record(i, rec_argv, NULL); } +static int kmem_config(const char *var, const char *value, void *cb) +{ + if (!strcmp(var, "kmem.default")) { + if (!strcmp(value, "slab")) + kmem_default = KMEM_SLAB; + else if (!strcmp(value, "page")) + kmem_default = KMEM_PAGE; + else + pr_err("invalid default value ('slab' or 'page' required): %s\n", + value); + return 0; + } + + return perf_default_config(var, value, cb); +} + int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const default_sort_order = "frag,hit,bytes"; + const char * const default_slab_sort = "frag,hit,bytes"; + const char * const default_page_sort = "bytes,hit"; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option kmem_options[] = { OPT_STRING('i', "input", &input_name, "file", "input file name"), + OPT_INCR('v', "verbose", &verbose, + "be more verbose (show symbol address, etc)"), OPT_CALLBACK_NOOPT(0, "caller", NULL, NULL, "show per-callsite statistics", parse_caller_opt), OPT_CALLBACK_NOOPT(0, "alloc", NULL, NULL, "show per-allocation statistics", parse_alloc_opt), OPT_CALLBACK('s', "sort", NULL, "key[,key2...]", - "sort by keys: ptr, call_site, bytes, hit, pingpong, frag", - parse_sort_opt), + "sort by keys: ptr, callsite, bytes, hit, pingpong, frag, " + "page, order, migtype, gfp", parse_sort_opt), OPT_CALLBACK('l', "line", NULL, "num", "show n lines", parse_line_opt), OPT_BOOLEAN(0, "raw-ip", &raw_ip, "show raw ip instead of symbol"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_CALLBACK_NOOPT(0, "slab", NULL, NULL, "Analyze slab allocator", + parse_slab_opt), + OPT_CALLBACK_NOOPT(0, "page", NULL, NULL, "Analyze page allocator", + parse_page_opt), + OPT_BOOLEAN(0, "live", &live_page, "Show live page stat"), OPT_END() }; const char *const kmem_subcommands[] = { "record", "stat", NULL }; @@ -679,38 +1884,79 @@ int cmd_kmem(int argc, const char **argv, const char *prefix __maybe_unused) NULL }; struct perf_session *session; - struct perf_data_file file = { - .path = input_name, - .mode = PERF_DATA_MODE_READ, - }; int ret = -1; + const char errmsg[] = "No %s allocation events found. Have you run 'perf kmem record --%s'?\n"; + perf_config(kmem_config, NULL); argc = parse_options_subcommand(argc, argv, kmem_options, kmem_subcommands, kmem_usage, 0); if (!argc) usage_with_options(kmem_usage, kmem_options); + if (kmem_slab == 0 && kmem_page == 0) { + if (kmem_default == KMEM_SLAB) + kmem_slab = 1; + else + kmem_page = 1; + } + if (!strncmp(argv[0], "rec", 3)) { symbol__init(NULL); return __cmd_record(argc, argv); } - session = perf_session__new(&file, false, &perf_kmem); + file.path = input_name; + + kmem_session = session = perf_session__new(&file, false, &perf_kmem); if (session == NULL) return -1; + if (kmem_slab) { + if (!perf_evlist__find_tracepoint_by_name(session->evlist, + "kmem:kmalloc")) { + pr_err(errmsg, "slab", "slab"); + goto out_delete; + } + } + + if (kmem_page) { + struct perf_evsel *evsel; + + evsel = perf_evlist__find_tracepoint_by_name(session->evlist, + "kmem:mm_page_alloc"); + if (evsel == NULL) { + pr_err(errmsg, "page", "page"); + goto out_delete; + } + + kmem_page_size = pevent_get_page_size(evsel->tp_format->pevent); + symbol_conf.use_callchain = true; + } + symbol__init(&session->header.env); if (!strcmp(argv[0], "stat")) { + setlocale(LC_ALL, ""); + if (cpu__setup_cpunode_map()) goto out_delete; - if (list_empty(&caller_sort)) - setup_sorting(&caller_sort, default_sort_order); - if (list_empty(&alloc_sort)) - setup_sorting(&alloc_sort, default_sort_order); - + if (list_empty(&slab_caller_sort)) + setup_slab_sorting(&slab_caller_sort, default_slab_sort); + if (list_empty(&slab_alloc_sort)) + setup_slab_sorting(&slab_alloc_sort, default_slab_sort); + if (list_empty(&page_caller_sort)) + setup_page_sorting(&page_caller_sort, default_page_sort); + if (list_empty(&page_alloc_sort)) + setup_page_sorting(&page_alloc_sort, default_page_sort); + + if (kmem_page) { + setup_page_sorting(&page_alloc_sort_input, + "page,order,migtype,gfp"); + setup_page_sorting(&page_caller_sort_input, + "callsite,order,migtype,gfp"); + } ret = __cmd_kmem(session); } else usage_with_options(kmem_usage, kmem_options); diff --git a/tools/perf/builtin-kvm.c b/tools/perf/builtin-kvm.c index 0894a817f67e..fc1cffb1b7a2 100644 --- a/tools/perf/builtin-kvm.c +++ b/tools/perf/builtin-kvm.c @@ -18,6 +18,7 @@ #include "util/stat.h" #include "util/top.h" #include "util/data.h" +#include "util/ordered-events.h" #include <sys/prctl.h> #ifdef HAVE_TIMERFD_SUPPORT @@ -650,6 +651,7 @@ static int process_sample_event(struct perf_tool *tool, struct perf_evsel *evsel, struct machine *machine) { + int err = 0; struct thread *thread; struct perf_kvm_stat *kvm = container_of(tool, struct perf_kvm_stat, tool); @@ -665,9 +667,10 @@ static int process_sample_event(struct perf_tool *tool, } if (!handle_kvm_event(kvm, thread, evsel, sample)) - return -1; + err = -1; - return 0; + thread__put(thread); + return err; } static int cpu_isa_config(struct perf_kvm_stat *kvm) @@ -730,9 +733,9 @@ static s64 perf_kvm__mmap_read_idx(struct perf_kvm_stat *kvm, int idx, return -1; } - err = perf_session_queue_event(kvm->session, event, &kvm->tool, &sample, 0); + err = perf_session__queue_event(kvm->session, event, &sample, 0); /* - * FIXME: Here we can't consume the event, as perf_session_queue_event will + * FIXME: Here we can't consume the event, as perf_session__queue_event will * point to it, and it'll get possibly overwritten by the kernel. */ perf_evlist__mmap_consume(kvm->evlist, idx); @@ -783,8 +786,10 @@ static int perf_kvm__mmap_read(struct perf_kvm_stat *kvm) /* flush queue after each round in which we processed events */ if (ntotal) { - kvm->session->ordered_events.next_flush = flush_time; - err = kvm->tool.finished_round(&kvm->tool, NULL, kvm->session); + struct ordered_events *oe = &kvm->session->ordered_events; + + oe->next_flush = flush_time; + err = ordered_events__flush(oe, OE_FLUSH__ROUND); if (err) { if (kvm->lost_events) pr_info("\nLost events: %" PRIu64 "\n\n", @@ -1044,6 +1049,7 @@ static int read_events(struct perf_kvm_stat *kvm) struct perf_data_file file = { .path = kvm->file_name, .mode = PERF_DATA_MODE_READ, + .force = kvm->force, }; kvm->tool = eops; @@ -1055,8 +1061,10 @@ static int read_events(struct perf_kvm_stat *kvm) symbol__init(&kvm->session->header.env); - if (!perf_session__has_traces(kvm->session, "kvm record")) - return -EINVAL; + if (!perf_session__has_traces(kvm->session, "kvm record")) { + ret = -EINVAL; + goto out_delete; + } /* * Do not use 'isa' recorded in kvm_exit tracepoint since it is not @@ -1064,9 +1072,13 @@ static int read_events(struct perf_kvm_stat *kvm) */ ret = cpu_isa_config(kvm); if (ret < 0) - return ret; + goto out_delete; - return perf_session__process_events(kvm->session, &kvm->tool); + ret = perf_session__process_events(kvm->session); + +out_delete: + perf_session__delete(kvm->session); + return ret; } static int parse_target_str(struct perf_kvm_stat *kvm) @@ -1201,6 +1213,7 @@ kvm_events_report(struct perf_kvm_stat *kvm, int argc, const char **argv) " time (sort by avg time)"), OPT_STRING('p', "pid", &kvm->opts.target.pid, "pid", "analyze events only for given process id(s)"), + OPT_BOOLEAN('f', "force", &kvm->force, "don't complain, do it"), OPT_END() }; @@ -1304,6 +1317,8 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, "show events other than" " HLT (x86 only) or Wait state (s390 only)" " that take longer than duration usecs"), + OPT_UINTEGER(0, "proc-map-timeout", &kvm->opts.proc_map_timeout, + "per thread proc mmap processing timeout in ms"), OPT_END() }; const char * const live_usage[] = { @@ -1331,6 +1346,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, kvm->opts.target.uses_mmap = false; kvm->opts.target.uid_str = NULL; kvm->opts.target.uid = UINT_MAX; + kvm->opts.proc_map_timeout = 500; symbol__init(NULL); disable_buildid_cache(); @@ -1386,7 +1402,7 @@ static int kvm_events_live(struct perf_kvm_stat *kvm, perf_session__set_id_hdr_size(kvm->session); ordered_events__set_copy_on_queue(&kvm->session->ordered_events, true); machine__synthesize_threads(&kvm->session->machines.host, &kvm->opts.target, - kvm->evlist->threads, false); + kvm->evlist->threads, false, kvm->opts.proc_map_timeout); err = kvm_live_open_events(kvm); if (err) goto out; diff --git a/tools/perf/builtin-list.c b/tools/perf/builtin-list.c index 198f3c3aff95..af5bd0514108 100644 --- a/tools/perf/builtin-list.c +++ b/tools/perf/builtin-list.c @@ -36,38 +36,36 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) setup_pager(); - if (raw_dump) { - print_events(NULL, true); - return 0; - } + if (!raw_dump) + printf("\nList of pre-defined events (to be used in -e):\n\n"); if (argc == 0) { - print_events(NULL, false); + print_events(NULL, raw_dump); return 0; } for (i = 0; i < argc; ++i) { - if (i) - putchar('\n'); - if (strncmp(argv[i], "tracepoint", 10) == 0) - print_tracepoint_events(NULL, NULL, false); + if (strcmp(argv[i], "tracepoint") == 0) + print_tracepoint_events(NULL, NULL, raw_dump); else if (strcmp(argv[i], "hw") == 0 || strcmp(argv[i], "hardware") == 0) - print_events_type(PERF_TYPE_HARDWARE); + print_symbol_events(NULL, PERF_TYPE_HARDWARE, + event_symbols_hw, PERF_COUNT_HW_MAX, raw_dump); else if (strcmp(argv[i], "sw") == 0 || strcmp(argv[i], "software") == 0) - print_events_type(PERF_TYPE_SOFTWARE); + print_symbol_events(NULL, PERF_TYPE_SOFTWARE, + event_symbols_sw, PERF_COUNT_SW_MAX, raw_dump); else if (strcmp(argv[i], "cache") == 0 || strcmp(argv[i], "hwcache") == 0) - print_hwcache_events(NULL, false); + print_hwcache_events(NULL, raw_dump); else if (strcmp(argv[i], "pmu") == 0) - print_pmu_events(NULL, false); + print_pmu_events(NULL, raw_dump); else { char *sep = strchr(argv[i], ':'), *s; int sep_idx; if (sep == NULL) { - print_events(argv[i], false); + print_events(argv[i], raw_dump); continue; } sep_idx = sep - argv[i]; @@ -76,7 +74,7 @@ int cmd_list(int argc, const char **argv, const char *prefix __maybe_unused) return -1; s[sep_idx] = '\0'; - print_tracepoint_events(s, s + sep_idx + 1, false); + print_tracepoint_events(s, s + sep_idx + 1, raw_dump); free(s); } } diff --git a/tools/perf/builtin-lock.c b/tools/perf/builtin-lock.c index e7ec71589da6..de16aaed516e 100644 --- a/tools/perf/builtin-lock.c +++ b/tools/perf/builtin-lock.c @@ -769,6 +769,7 @@ static void dump_threads(void) t = perf_session__findnew(session, st->tid); pr_info("%10d: %s\n", st->tid, thread__comm_str(t)); node = rb_next(node); + thread__put(t); }; } @@ -810,6 +811,7 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct perf_evsel *evsel, struct machine *machine) { + int err = 0; struct thread *thread = machine__findnew_thread(machine, sample->pid, sample->tid); @@ -821,10 +823,12 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, if (evsel->handler != NULL) { tracepoint_handler f = evsel->handler; - return f(evsel, sample); + err = f(evsel, sample); } - return 0; + thread__put(thread); + + return err; } static void sort_result(void) @@ -846,6 +850,8 @@ static const struct perf_evsel_str_handler lock_tracepoints[] = { { "lock:lock_release", perf_evsel__process_lock_release, }, /* CONFIG_LOCKDEP */ }; +static bool force; + static int __cmd_report(bool display_info) { int err = -EINVAL; @@ -857,6 +863,7 @@ static int __cmd_report(bool display_info) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = force, }; session = perf_session__new(&file, false, &eops); @@ -878,7 +885,7 @@ static int __cmd_report(bool display_info) if (select_key()) goto out_delete; - err = perf_session__process_events(session, &eops); + err = perf_session__process_events(session); if (err) goto out_delete; @@ -945,6 +952,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) "dump thread list in perf.data"), OPT_BOOLEAN('m', "map", &info_map, "map of lock instances (address:name table)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), OPT_END() }; const struct option lock_options[] = { @@ -956,6 +964,7 @@ int cmd_lock(int argc, const char **argv, const char *prefix __maybe_unused) const struct option report_options[] = { OPT_STRING('k', "key", &sort_key, "acquired", "key for sorting (acquired / contended / avg_wait / wait_total / wait_max / wait_min)"), + OPT_BOOLEAN('f', "force", &force, "don't complain, do it"), /* TODO: type */ OPT_END() }; diff --git a/tools/perf/builtin-mem.c b/tools/perf/builtin-mem.c index 9b5663950a4d..80170aace5d4 100644 --- a/tools/perf/builtin-mem.c +++ b/tools/perf/builtin-mem.c @@ -15,6 +15,7 @@ struct perf_mem { char const *input_name; bool hide_unresolved; bool dump_raw; + bool force; int operation; const char *cpu_list; DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -73,7 +74,7 @@ dump_raw_samples(struct perf_tool *tool, } if (al.filtered || (mem->hide_unresolved && al.sym == NULL)) - return 0; + goto out_put; if (al.map != NULL) al.map->dso->hit = 1; @@ -102,7 +103,8 @@ dump_raw_samples(struct perf_tool *tool, symbol_conf.field_sep, al.map ? (al.map->dso ? al.map->dso->long_name : "???") : "???", al.sym ? al.sym->name : "???"); - +out_put: + addr_location__put(&al); return 0; } @@ -120,8 +122,8 @@ static int report_raw_events(struct perf_mem *mem) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = mem->force, }; - int err = -EINVAL; int ret; struct perf_session *session = perf_session__new(&file, false, &mem->tool); @@ -132,24 +134,21 @@ static int report_raw_events(struct perf_mem *mem) if (mem->cpu_list) { ret = perf_session__cpu_bitmap(session, mem->cpu_list, mem->cpu_bitmap); - if (ret) + if (ret < 0) goto out_delete; } - if (symbol__init(&session->header.env) < 0) - return -1; + ret = symbol__init(&session->header.env); + if (ret < 0) + goto out_delete; printf("# PID, TID, IP, ADDR, LOCAL WEIGHT, DSRC, SYMBOL\n"); - err = perf_session__process_events(session, &mem->tool); - if (err) - return err; - - return 0; + ret = perf_session__process_events(session); out_delete: perf_session__delete(session); - return err; + return ret; } static int report_events(int argc, const char **argv, struct perf_mem *mem) @@ -286,10 +285,11 @@ int cmd_mem(int argc, const char **argv, const char *prefix __maybe_unused) "input file name"), OPT_STRING('C', "cpu", &mem.cpu_list, "cpu", "list of cpus to profile"), - OPT_STRING('x', "field-separator", &symbol_conf.field_sep, + OPT_STRING_NOEMPTY('x', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added" " between columns '.' is reserved."), + OPT_BOOLEAN('f', "force", &mem.force, "don't complain, do it"), OPT_END() }; const char *const mem_subcommands[] = { "record", "report", NULL }; diff --git a/tools/perf/builtin-probe.c b/tools/perf/builtin-probe.c index 921bb6942503..1272559fa22d 100644 --- a/tools/perf/builtin-probe.c +++ b/tools/perf/builtin-probe.c @@ -44,24 +44,19 @@ #define DEFAULT_VAR_FILTER "!__k???tab_* & !__crc_*" #define DEFAULT_FUNC_FILTER "!_*" +#define DEFAULT_LIST_FILTER "*:*" /* Session management structure */ static struct { + int command; /* Command short_name */ bool list_events; - bool force_add; - bool show_lines; - bool show_vars; - bool show_ext_vars; - bool show_funcs; - bool mod_events; bool uprobes; bool quiet; + bool target_used; int nevents; struct perf_probe_event events[MAX_PROBES]; - struct strlist *dellist; struct line_range line_range; char *target; - int max_probe_points; struct strfilter *filter; } params; @@ -78,6 +73,12 @@ static int parse_probe_event(const char *str) } pev->uprobes = params.uprobes; + if (params.target) { + pev->target = strdup(params.target); + if (!pev->target) + return -ENOMEM; + params.target_used = true; + } /* Parse a perf-probe command into event */ ret = parse_perf_probe_command(str, pev); @@ -86,6 +87,28 @@ static int parse_probe_event(const char *str) return ret; } +static int params_add_filter(const char *str) +{ + const char *err = NULL; + int ret = 0; + + pr_debug2("Add filter: %s\n", str); + if (!params.filter) { + params.filter = strfilter__new(str, &err); + if (!params.filter) + ret = err ? -EINVAL : -ENOMEM; + } else + ret = strfilter__or(params.filter, str, &err); + + if (ret == -EINVAL) { + pr_err("Filter parse error at %td.\n", err - str + 1); + pr_err("Source: \"%s\"\n", str); + pr_err(" %*c\n", (int)(err - str + 1), '^'); + } + + return ret; +} + static int set_target(const char *ptr) { int found = 0; @@ -102,6 +125,7 @@ static int set_target(const char *ptr) params.target = strdup(ptr); if (!params.target) return -ENOMEM; + params.target_used = false; found = 1; buf = ptr + (strlen(ptr) - 3); @@ -144,41 +168,18 @@ static int parse_probe_event_argv(int argc, const char **argv) len += sprintf(&buf[len], "%s ", argv[i]); } - params.mod_events = true; ret = parse_probe_event(buf); free(buf); return ret; } -static int opt_add_probe_event(const struct option *opt __maybe_unused, - const char *str, int unset __maybe_unused) -{ - if (str) { - params.mod_events = true; - return parse_probe_event(str); - } else - return 0; -} - -static int opt_del_probe_event(const struct option *opt __maybe_unused, - const char *str, int unset __maybe_unused) -{ - if (str) { - params.mod_events = true; - if (!params.dellist) - params.dellist = strlist__new(true, NULL); - strlist__add(params.dellist, str); - } - return 0; -} - static int opt_set_target(const struct option *opt, const char *str, int unset __maybe_unused) { int ret = -ENOENT; char *tmp; - if (str && !params.target) { + if (str) { if (!strcmp(opt->long_name, "exec")) params.uprobes = true; #ifdef HAVE_DWARF_SUPPORT @@ -200,15 +201,19 @@ static int opt_set_target(const struct option *opt, const char *str, if (!tmp) return -ENOMEM; } + free(params.target); params.target = tmp; + params.target_used = false; ret = 0; } return ret; } +/* Command option callbacks */ + #ifdef HAVE_DWARF_SUPPORT -static int opt_show_lines(const struct option *opt __maybe_unused, +static int opt_show_lines(const struct option *opt, const char *str, int unset __maybe_unused) { int ret = 0; @@ -216,19 +221,19 @@ static int opt_show_lines(const struct option *opt __maybe_unused, if (!str) return 0; - if (params.show_lines) { + if (params.command == 'L') { pr_warning("Warning: more than one --line options are" " detected. Only the first one is valid.\n"); return 0; } - params.show_lines = true; + params.command = opt->short_name; ret = parse_line_range_desc(str, ¶ms.line_range); return ret; } -static int opt_show_vars(const struct option *opt __maybe_unused, +static int opt_show_vars(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_probe_event *pev = ¶ms.events[params.nevents]; @@ -242,29 +247,39 @@ static int opt_show_vars(const struct option *opt __maybe_unused, pr_err(" Error: '--vars' doesn't accept arguments.\n"); return -EINVAL; } - params.show_vars = true; + params.command = opt->short_name; return ret; } #endif +static int opt_add_probe_event(const struct option *opt, + const char *str, int unset __maybe_unused) +{ + if (str) { + params.command = opt->short_name; + return parse_probe_event(str); + } + + return 0; +} + +static int opt_set_filter_with_command(const struct option *opt, + const char *str, int unset) +{ + if (!unset) + params.command = opt->short_name; + + if (str) + return params_add_filter(str); + + return 0; +} static int opt_set_filter(const struct option *opt __maybe_unused, const char *str, int unset __maybe_unused) { - const char *err; - - if (str) { - pr_debug2("Set filter: %s\n", str); - if (params.filter) - strfilter__delete(params.filter); - params.filter = strfilter__new(str, &err); - if (!params.filter) { - pr_err("Filter parse error at %td.\n", err - str + 1); - pr_err("Source: \"%s\"\n", str); - pr_err(" %*c\n", (int)(err - str + 1), '^'); - return -EINVAL; - } - } + if (str) + return params_add_filter(str); return 0; } @@ -280,8 +295,6 @@ static void cleanup_params(void) for (i = 0; i < params.nevents; i++) clear_perf_probe_event(params.events + i); - if (params.dellist) - strlist__delete(params.dellist); line_range__clear(¶ms.line_range); free(params.target); if (params.filter) @@ -306,22 +319,24 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "perf probe [<options>] 'PROBEDEF' ['PROBEDEF' ...]", "perf probe [<options>] --add 'PROBEDEF' [--add 'PROBEDEF' ...]", "perf probe [<options>] --del '[GROUP:]EVENT' ...", - "perf probe --list", + "perf probe --list [GROUP:]EVENT ...", #ifdef HAVE_DWARF_SUPPORT "perf probe [<options>] --line 'LINEDESC'", "perf probe [<options>] --vars 'PROBEPOINT'", #endif + "perf probe [<options>] --funcs", NULL -}; + }; struct option options[] = { OPT_INCR('v', "verbose", &verbose, "be more verbose (show parsed arguments, etc)"), OPT_BOOLEAN('q', "quiet", ¶ms.quiet, "be quiet (do not show any mesages)"), - OPT_BOOLEAN('l', "list", ¶ms.list_events, - "list up current probe events"), + OPT_CALLBACK_DEFAULT('l', "list", NULL, "[GROUP:]EVENT", + "list up probe events", + opt_set_filter_with_command, DEFAULT_LIST_FILTER), OPT_CALLBACK('d', "del", NULL, "[GROUP:]EVENT", "delete a probe event.", - opt_del_probe_event), + opt_set_filter_with_command), OPT_CALLBACK('a', "add", NULL, #ifdef HAVE_DWARF_SUPPORT "[EVENT=]FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT" @@ -346,7 +361,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) "\t\tARG:\tProbe argument (kprobe-tracer argument format.)\n", #endif opt_add_probe_event), - OPT_BOOLEAN('f', "force", ¶ms.force_add, "forcibly add events" + OPT_BOOLEAN('f', "force", &probe_conf.force_add, "forcibly add events" " with existing name"), #ifdef HAVE_DWARF_SUPPORT OPT_CALLBACK('L', "line", NULL, @@ -355,8 +370,10 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK('V', "vars", NULL, "FUNC[@SRC][+OFF|%return|:RL|;PT]|SRC:AL|SRC;PT", "Show accessible variables on PROBEDEF", opt_show_vars), - OPT_BOOLEAN('\0', "externs", ¶ms.show_ext_vars, + OPT_BOOLEAN('\0', "externs", &probe_conf.show_ext_vars, "Show external variables too (with --vars only)"), + OPT_BOOLEAN('\0', "range", &probe_conf.show_location_range, + "Show variables location range in scope (with --vars only)"), OPT_STRING('k', "vmlinux", &symbol_conf.vmlinux_name, "file", "vmlinux pathname"), OPT_STRING('s', "source", &symbol_conf.source_prefix, @@ -364,12 +381,15 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK('m', "module", NULL, "modname|path", "target module name (for online) or path (for offline)", opt_set_target), + OPT_BOOLEAN('\0', "no-inlines", &probe_conf.no_inlines, + "Don't search inlined functions"), #endif OPT__DRY_RUN(&probe_event_dry_run), - OPT_INTEGER('\0', "max-probes", ¶ms.max_probe_points, + OPT_INTEGER('\0', "max-probes", &probe_conf.max_probes, "Set how many probe points can be found for a probe."), - OPT_BOOLEAN('F', "funcs", ¶ms.show_funcs, - "Show potential probe-able functions."), + OPT_CALLBACK_DEFAULT('F', "funcs", NULL, "[FILTER]", + "Show potential probe-able functions.", + opt_set_filter_with_command, DEFAULT_FUNC_FILTER), OPT_CALLBACK('\0', "filter", NULL, "[!]FILTER", "Set a filter (with --vars/funcs only)\n" "\t\t\t(default: \"" DEFAULT_VAR_FILTER "\" for --vars,\n" @@ -392,6 +412,7 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) set_option_flag(options, 'L', "line", PARSE_OPT_EXCLUSIVE); set_option_flag(options, 'V', "vars", PARSE_OPT_EXCLUSIVE); #endif + set_option_flag(options, 'F', "funcs", PARSE_OPT_EXCLUSIVE); argc = parse_options(argc, argv, options, probe_usage, PARSE_OPT_STOP_AT_NON_OPTION); @@ -400,11 +421,16 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) pr_warning(" Error: '-' is not supported.\n"); usage_with_options(probe_usage, options); } + if (params.command && params.command != 'a') { + pr_warning(" Error: another command except --add is set.\n"); + usage_with_options(probe_usage, options); + } ret = parse_probe_event_argv(argc, argv); if (ret < 0) { pr_err_with_code(" Error: Command Parse Error.", ret); return ret; } + params.command = 'a'; } if (params.quiet) { @@ -415,84 +441,70 @@ __cmd_probe(int argc, const char **argv, const char *prefix __maybe_unused) verbose = -1; } - if (params.max_probe_points == 0) - params.max_probe_points = MAX_PROBES; - - if ((!params.nevents && !params.dellist && !params.list_events && - !params.show_lines && !params.show_funcs)) - usage_with_options(probe_usage, options); + if (probe_conf.max_probes == 0) + probe_conf.max_probes = MAX_PROBES; /* * Only consider the user's kernel image path if given. */ symbol_conf.try_vmlinux_path = (symbol_conf.vmlinux_name == NULL); - if (params.list_events) { + switch (params.command) { + case 'l': if (params.uprobes) { pr_warning(" Error: Don't use --list with --exec.\n"); usage_with_options(probe_usage, options); } - ret = show_perf_probe_events(); + ret = show_perf_probe_events(params.filter); if (ret < 0) pr_err_with_code(" Error: Failed to show event list.", ret); return ret; - } - if (params.show_funcs) { - if (!params.filter) - params.filter = strfilter__new(DEFAULT_FUNC_FILTER, - NULL); + case 'F': ret = show_available_funcs(params.target, params.filter, params.uprobes); - strfilter__delete(params.filter); - params.filter = NULL; if (ret < 0) pr_err_with_code(" Error: Failed to show functions.", ret); return ret; - } - #ifdef HAVE_DWARF_SUPPORT - if (params.show_lines) { + case 'L': ret = show_line_range(¶ms.line_range, params.target, params.uprobes); if (ret < 0) pr_err_with_code(" Error: Failed to show lines.", ret); return ret; - } - if (params.show_vars) { + case 'V': if (!params.filter) params.filter = strfilter__new(DEFAULT_VAR_FILTER, NULL); ret = show_available_vars(params.events, params.nevents, - params.max_probe_points, - params.target, - params.filter, - params.show_ext_vars); - strfilter__delete(params.filter); - params.filter = NULL; + params.filter); if (ret < 0) pr_err_with_code(" Error: Failed to show vars.", ret); return ret; - } #endif - - if (params.dellist) { - ret = del_perf_probe_events(params.dellist); + case 'd': + ret = del_perf_probe_events(params.filter); if (ret < 0) { pr_err_with_code(" Error: Failed to delete events.", ret); return ret; } - } + break; + case 'a': + /* Ensure the last given target is used */ + if (params.target && !params.target_used) { + pr_warning(" Error: -x/-m must follow the probe definitions.\n"); + usage_with_options(probe_usage, options); + } - if (params.nevents) { - ret = add_perf_probe_events(params.events, params.nevents, - params.max_probe_points, - params.target, - params.force_add); + ret = add_perf_probe_events(params.events, params.nevents); if (ret < 0) { pr_err_with_code(" Error: Failed to add events.", ret); return ret; } + break; + default: + usage_with_options(probe_usage, options); } return 0; } @@ -507,5 +519,5 @@ int cmd_probe(int argc, const char **argv, const char *prefix) cleanup_params(); } - return ret; + return ret < 0 ? ret : 0; } diff --git a/tools/perf/builtin-record.c b/tools/perf/builtin-record.c index 404ab3434052..de165a1b9240 100644 --- a/tools/perf/builtin-record.c +++ b/tools/perf/builtin-record.c @@ -27,6 +27,8 @@ #include "util/cpumap.h" #include "util/thread_map.h" #include "util/data.h" +#include "util/auxtrace.h" +#include "util/parse-branch-options.h" #include <unistd.h> #include <sched.h> @@ -38,6 +40,7 @@ struct record { struct record_opts opts; u64 bytes_written; struct perf_data_file file; + struct auxtrace_record *itr; struct perf_evlist *evlist; struct perf_session *session; const char *progname; @@ -70,8 +73,8 @@ static int process_synthesized_event(struct perf_tool *tool, static int record__mmap_read(struct record *rec, int idx) { struct perf_mmap *md = &rec->evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head = perf_mmap__read_head(md); + u64 old = md->prev; unsigned char *data = md->base + page_size; unsigned long size; void *buf; @@ -110,9 +113,12 @@ out: return rc; } -static volatile int done = 0; +static volatile int done; static volatile int signr = -1; -static volatile int child_finished = 0; +static volatile int child_finished; +static volatile int auxtrace_snapshot_enabled; +static volatile int auxtrace_snapshot_err; +static volatile int auxtrace_record__snapshot_started; static void sig_handler(int sig) { @@ -133,6 +139,133 @@ static void record__sig_exit(void) raise(signr); } +#ifdef HAVE_AUXTRACE_SUPPORT + +static int record__process_auxtrace(struct perf_tool *tool, + union perf_event *event, void *data1, + size_t len1, void *data2, size_t len2) +{ + struct record *rec = container_of(tool, struct record, tool); + struct perf_data_file *file = &rec->file; + size_t padding; + u8 pad[8] = {0}; + + if (!perf_data_file__is_pipe(file)) { + off_t file_offset; + int fd = perf_data_file__fd(file); + int err; + + file_offset = lseek(fd, 0, SEEK_CUR); + if (file_offset == -1) + return -1; + err = auxtrace_index__auxtrace_event(&rec->session->auxtrace_index, + event, file_offset); + if (err) + return err; + } + + /* event.auxtrace.size includes padding, see __auxtrace_mmap__read() */ + padding = (len1 + len2) & 7; + if (padding) + padding = 8 - padding; + + record__write(rec, event, event->header.size); + record__write(rec, data1, len1); + if (len2) + record__write(rec, data2, len2); + record__write(rec, &pad, padding); + + return 0; +} + +static int record__auxtrace_mmap_read(struct record *rec, + struct auxtrace_mmap *mm) +{ + int ret; + + ret = auxtrace_mmap__read(mm, rec->itr, &rec->tool, + record__process_auxtrace); + if (ret < 0) + return ret; + + if (ret) + rec->samples++; + + return 0; +} + +static int record__auxtrace_mmap_read_snapshot(struct record *rec, + struct auxtrace_mmap *mm) +{ + int ret; + + ret = auxtrace_mmap__read_snapshot(mm, rec->itr, &rec->tool, + record__process_auxtrace, + rec->opts.auxtrace_snapshot_size); + if (ret < 0) + return ret; + + if (ret) + rec->samples++; + + return 0; +} + +static int record__auxtrace_read_snapshot_all(struct record *rec) +{ + int i; + int rc = 0; + + for (i = 0; i < rec->evlist->nr_mmaps; i++) { + struct auxtrace_mmap *mm = + &rec->evlist->mmap[i].auxtrace_mmap; + + if (!mm->base) + continue; + + if (record__auxtrace_mmap_read_snapshot(rec, mm) != 0) { + rc = -1; + goto out; + } + } +out: + return rc; +} + +static void record__read_auxtrace_snapshot(struct record *rec) +{ + pr_debug("Recording AUX area tracing snapshot\n"); + if (record__auxtrace_read_snapshot_all(rec) < 0) { + auxtrace_snapshot_err = -1; + } else { + auxtrace_snapshot_err = auxtrace_record__snapshot_finish(rec->itr); + if (!auxtrace_snapshot_err) + auxtrace_snapshot_enabled = 1; + } +} + +#else + +static inline +int record__auxtrace_mmap_read(struct record *rec __maybe_unused, + struct auxtrace_mmap *mm __maybe_unused) +{ + return 0; +} + +static inline +void record__read_auxtrace_snapshot(struct record *rec __maybe_unused) +{ +} + +static inline +int auxtrace_record__snapshot_start(struct auxtrace_record *itr __maybe_unused) +{ + return 0; +} + +#endif + static int record__open(struct record *rec) { char msg[512]; @@ -161,20 +294,24 @@ try_again: } } - if (perf_evlist__apply_filters(evlist)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evlist, &pos)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + pos->filter, perf_evsel__name(pos), errno, strerror_r(errno, msg, sizeof(msg))); rc = -1; goto out; } - if (perf_evlist__mmap(evlist, opts->mmap_pages, false) < 0) { + if (perf_evlist__mmap_ex(evlist, opts->mmap_pages, false, + opts->auxtrace_mmap_pages, + opts->auxtrace_snapshot_mode) < 0) { if (errno == EPERM) { pr_err("Permission error mapping pages.\n" "Consider increasing " "/proc/sys/kernel/perf_event_mlock_kb,\n" "or try again with a smaller value of -m/--mmap_pages.\n" - "(current value: %u)\n", opts->mmap_pages); + "(current value: %u,%u)\n", + opts->mmap_pages, opts->auxtrace_mmap_pages); rc = -errno; } else { pr_err("failed to mmap with %d (%s)\n", errno, @@ -208,12 +345,9 @@ static int process_buildids(struct record *rec) struct perf_data_file *file = &rec->file; struct perf_session *session = rec->session; - u64 size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - if (size == 0) + if (file->size == 0) return 0; - file->size = size; - /* * During this process, it'll load kernel map and replace the * dso->long_name to a real pathname it found. In this case @@ -225,7 +359,7 @@ static int process_buildids(struct record *rec) */ symbol_conf.ignore_vmlinux_buildid = true; - return perf_session__process_events(session, &rec->tool); + return perf_session__process_events(session); } static void perf_event__synthesize_guest_os(struct machine *machine, void *data) @@ -269,12 +403,20 @@ static int record__mmap_read_all(struct record *rec) int rc = 0; for (i = 0; i < rec->evlist->nr_mmaps; i++) { + struct auxtrace_mmap *mm = &rec->evlist->mmap[i].auxtrace_mmap; + if (rec->evlist->mmap[i].base) { if (record__mmap_read(rec, i) != 0) { rc = -1; goto out; } } + + if (mm->base && !rec->opts.auxtrace_snapshot_mode && + record__auxtrace_mmap_read(rec, mm) != 0) { + rc = -1; + goto out; + } } /* @@ -304,6 +446,9 @@ static void record__init_features(struct record *rec) if (!rec->opts.branch_stack) perf_header__clear_feat(&session->header, HEADER_BRANCH_STACK); + + if (!rec->opts.full_auxtrace) + perf_header__clear_feat(&session->header, HEADER_AUXTRACE); } static volatile int workload_exec_errno; @@ -322,6 +467,8 @@ static void workload_exec_failed_signal(int signo __maybe_unused, child_finished = 1; } +static void snapshot_sig_handler(int sig); + static int __cmd_record(struct record *rec, int argc, const char **argv) { int err; @@ -342,8 +489,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) signal(SIGCHLD, sig_handler); signal(SIGINT, sig_handler); signal(SIGTERM, sig_handler); + if (rec->opts.auxtrace_snapshot_mode) + signal(SIGUSR2, snapshot_sig_handler); + else + signal(SIGUSR2, SIG_IGN); - session = perf_session__new(file, false, NULL); + session = perf_session__new(file, false, tool); if (session == NULL) { pr_err("Perf session creation failed.\n"); return -1; @@ -420,6 +571,13 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } } + if (rec->opts.full_auxtrace) { + err = perf_event__synthesize_auxtrace_info(rec->itr, tool, + session, process_synthesized_event); + if (err) + goto out_delete_session; + } + err = perf_event__synthesize_kernel_mmap(tool, process_synthesized_event, machine); if (err < 0) @@ -440,7 +598,8 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) } err = __machine__synthesize_threads(machine, tool, &opts->target, rec->evlist->threads, - process_synthesized_event, opts->sample_address); + process_synthesized_event, opts->sample_address, + opts->proc_map_timeout); if (err != 0) goto out_child; @@ -474,14 +633,27 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) perf_evlist__enable(rec->evlist); } + auxtrace_snapshot_enabled = 1; for (;;) { int hits = rec->samples; if (record__mmap_read_all(rec) < 0) { + auxtrace_snapshot_enabled = 0; err = -1; goto out_child; } + if (auxtrace_record__snapshot_started) { + auxtrace_record__snapshot_started = 0; + if (!auxtrace_snapshot_err) + record__read_auxtrace_snapshot(rec); + if (auxtrace_snapshot_err) { + pr_err("AUX area tracing snapshot failed\n"); + err = -1; + goto out_child; + } + } + if (hits == rec->samples) { if (done || draining) break; @@ -504,10 +676,12 @@ static int __cmd_record(struct record *rec, int argc, const char **argv) * disable events in this case. */ if (done && !disabled && !target__none(&opts->target)) { + auxtrace_snapshot_enabled = 0; perf_evlist__disable(rec->evlist); disabled = true; } } + auxtrace_snapshot_enabled = 0; if (forks && workload_exec_errno) { char msg[STRERR_BUFSIZE]; @@ -543,16 +717,25 @@ out_child: if (!err && !file->is_pipe) { rec->session->header.data_size += rec->bytes_written; + file->size = lseek(perf_data_file__fd(file), 0, SEEK_CUR); - if (!rec->no_buildid) + if (!rec->no_buildid) { process_buildids(rec); + /* + * We take all buildids when the file contains + * AUX area tracing data because we do not decode the + * trace because it would take too long. + */ + if (rec->opts.full_auxtrace) + dsos__hit_all(rec->session); + } perf_session__write_header(rec->session, rec->evlist, fd, true); } if (!err && !quiet) { char samples[128]; - if (rec->samples) + if (rec->samples && !rec->opts.full_auxtrace) scnprintf(samples, sizeof(samples), " (%" PRIu64 " samples)", rec->samples); else @@ -568,97 +751,9 @@ out_delete_session: return status; } -#define BRANCH_OPT(n, m) \ - { .name = n, .mode = (m) } - -#define BRANCH_END { .name = NULL } - -struct branch_mode { - const char *name; - int mode; -}; - -static const struct branch_mode branch_modes[] = { - BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), - BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), - BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), - BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), - BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), - BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), - BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), - BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), - BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), - BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), - BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), - BRANCH_END -}; - -static int -parse_branch_stack(const struct option *opt, const char *str, int unset) -{ -#define ONLY_PLM \ - (PERF_SAMPLE_BRANCH_USER |\ - PERF_SAMPLE_BRANCH_KERNEL |\ - PERF_SAMPLE_BRANCH_HV) - - uint64_t *mode = (uint64_t *)opt->value; - const struct branch_mode *br; - char *s, *os = NULL, *p; - int ret = -1; - - if (unset) - return 0; - - /* - * cannot set it twice, -b + --branch-filter for instance - */ - if (*mode) - return -1; - - /* str may be NULL in case no arg is passed to -b */ - if (str) { - /* because str is read-only */ - s = os = strdup(str); - if (!s) - return -1; - - for (;;) { - p = strchr(s, ','); - if (p) - *p = '\0'; - - for (br = branch_modes; br->name; br++) { - if (!strcasecmp(s, br->name)) - break; - } - if (!br->name) { - ui__warning("unknown branch filter %s," - " check man page\n", s); - goto error; - } - - *mode |= br->mode; - - if (!p) - break; - - s = p + 1; - } - } - ret = 0; - - /* default to any branch */ - if ((*mode & ~ONLY_PLM) == 0) { - *mode = PERF_SAMPLE_BRANCH_ANY; - } -error: - free(os); - return ret; -} - static void callchain_debug(void) { - static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF" }; + static const char *str[CALLCHAIN_MAX] = { "NONE", "FP", "DWARF", "LBR" }; pr_debug("callchain: type %s\n", str[callchain_param.record_mode]); @@ -710,6 +805,133 @@ static int perf_record_config(const char *var, const char *value, void *cb) return perf_default_config(var, value, cb); } +struct clockid_map { + const char *name; + int clockid; +}; + +#define CLOCKID_MAP(n, c) \ + { .name = n, .clockid = (c), } + +#define CLOCKID_END { .name = NULL, } + + +/* + * Add the missing ones, we need to build on many distros... + */ +#ifndef CLOCK_MONOTONIC_RAW +#define CLOCK_MONOTONIC_RAW 4 +#endif +#ifndef CLOCK_BOOTTIME +#define CLOCK_BOOTTIME 7 +#endif +#ifndef CLOCK_TAI +#define CLOCK_TAI 11 +#endif + +static const struct clockid_map clockids[] = { + /* available for all events, NMI safe */ + CLOCKID_MAP("monotonic", CLOCK_MONOTONIC), + CLOCKID_MAP("monotonic_raw", CLOCK_MONOTONIC_RAW), + + /* available for some events */ + CLOCKID_MAP("realtime", CLOCK_REALTIME), + CLOCKID_MAP("boottime", CLOCK_BOOTTIME), + CLOCKID_MAP("tai", CLOCK_TAI), + + /* available for the lazy */ + CLOCKID_MAP("mono", CLOCK_MONOTONIC), + CLOCKID_MAP("raw", CLOCK_MONOTONIC_RAW), + CLOCKID_MAP("real", CLOCK_REALTIME), + CLOCKID_MAP("boot", CLOCK_BOOTTIME), + + CLOCKID_END, +}; + +static int parse_clockid(const struct option *opt, const char *str, int unset) +{ + struct record_opts *opts = (struct record_opts *)opt->value; + const struct clockid_map *cm; + const char *ostr = str; + + if (unset) { + opts->use_clockid = 0; + return 0; + } + + /* no arg passed */ + if (!str) + return 0; + + /* no setting it twice */ + if (opts->use_clockid) + return -1; + + opts->use_clockid = true; + + /* if its a number, we're done */ + if (sscanf(str, "%d", &opts->clockid) == 1) + return 0; + + /* allow a "CLOCK_" prefix to the name */ + if (!strncasecmp(str, "CLOCK_", 6)) + str += 6; + + for (cm = clockids; cm->name; cm++) { + if (!strcasecmp(str, cm->name)) { + opts->clockid = cm->clockid; + return 0; + } + } + + opts->use_clockid = false; + ui__warning("unknown clockid %s, check man page\n", ostr); + return -1; +} + +static int record__parse_mmap_pages(const struct option *opt, + const char *str, + int unset __maybe_unused) +{ + struct record_opts *opts = opt->value; + char *s, *p; + unsigned int mmap_pages; + int ret; + + if (!str) + return -EINVAL; + + s = strdup(str); + if (!s) + return -ENOMEM; + + p = strchr(s, ','); + if (p) + *p = '\0'; + + if (*s) { + ret = __perf_evlist__parse_mmap_pages(&mmap_pages, s); + if (ret) + goto out_free; + opts->mmap_pages = mmap_pages; + } + + if (!p) { + ret = 0; + goto out_free; + } + + ret = __perf_evlist__parse_mmap_pages(&mmap_pages, p + 1); + if (ret) + goto out_free; + + opts->auxtrace_mmap_pages = mmap_pages; + +out_free: + free(s); + return ret; +} + static const char * const __record_usage[] = { "perf record [<options>] [<command>]", "perf record [<options>] -- <command> [<options>]", @@ -738,6 +960,7 @@ static struct record record = { .uses_mmap = true, .default_per_cpu = true, }, + .proc_map_timeout = 500, }, .tool = { .sample = process_sample_event, @@ -751,9 +974,9 @@ static struct record record = { #define CALLCHAIN_HELP "setup and enables call-graph (stack chain/backtrace) recording: " #ifdef HAVE_DWARF_UNWIND_SUPPORT -const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp dwarf lbr"; #else -const char record_callchain_help[] = CALLCHAIN_HELP "fp"; +const char record_callchain_help[] = CALLCHAIN_HELP "fp lbr"; #endif /* @@ -790,9 +1013,9 @@ struct option __record_options[] = { &record.opts.no_inherit_set, "child tasks do not inherit counters"), OPT_UINTEGER('F', "freq", &record.opts.user_freq, "profile at this frequency"), - OPT_CALLBACK('m', "mmap-pages", &record.opts.mmap_pages, "pages", - "number of mmap data pages", - perf_evlist__parse_mmap_pages), + OPT_CALLBACK('m', "mmap-pages", &record.opts, "pages[,pages]", + "number of mmap data pages and AUX area tracing mmap pages", + record__parse_mmap_pages), OPT_BOOLEAN(0, "group", &record.opts.group, "put the counters into a counter group"), OPT_CALLBACK_NOOPT('g', NULL, &record.opts, @@ -806,10 +1029,9 @@ struct option __record_options[] = { OPT_BOOLEAN('q', "quiet", &quiet, "don't print any message"), OPT_BOOLEAN('s', "stat", &record.opts.inherit_stat, "per thread counts"), - OPT_BOOLEAN('d', "data", &record.opts.sample_address, - "Sample addresses"), - OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Sample timestamps"), - OPT_BOOLEAN('P', "period", &record.opts.period, "Sample period"), + OPT_BOOLEAN('d', "data", &record.opts.sample_address, "Record the sample addresses"), + OPT_BOOLEAN('T', "timestamp", &record.opts.sample_time, "Record the sample timestamps"), + OPT_BOOLEAN('P', "period", &record.opts.period, "Record the sample period"), OPT_BOOLEAN('n', "no-samples", &record.opts.no_samples, "don't sample"), OPT_BOOLEAN('N', "no-buildid-cache", &record.no_buildid_cache, @@ -839,6 +1061,15 @@ struct option __record_options[] = { "use per-thread mmaps"), OPT_BOOLEAN('I', "intr-regs", &record.opts.sample_intr_regs, "Sample machine registers on interrupt"), + OPT_BOOLEAN(0, "running-time", &record.opts.running_time, + "Record running/enabled time of read (:S) events"), + OPT_CALLBACK('k', "clockid", &record.opts, + "clockid", "clockid to use for events, see clock_gettime()", + parse_clockid), + OPT_STRING_OPTARG('S', "snapshot", &record.opts.auxtrace_snapshot_opts, + "opts", "AUX area tracing Snapshot Mode", ""), + OPT_UINTEGER(0, "proc-map-timeout", &record.opts.proc_map_timeout, + "per thread proc mmap processing timeout in ms"), OPT_END() }; @@ -846,7 +1077,7 @@ struct option *record_options = __record_options; int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) { - int err = -ENOMEM; + int err; struct record *rec = &record; char errbuf[BUFSIZ]; @@ -867,6 +1098,19 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) usage_with_options(record_usage, record_options); } + if (!rec->itr) { + rec->itr = auxtrace_record__init(rec->evlist, &err); + if (err) + return err; + } + + err = auxtrace_parse_snapshot_options(rec->itr, &rec->opts, + rec->opts.auxtrace_snapshot_opts); + if (err) + return err; + + err = -ENOMEM; + symbol__init(NULL); if (symbol_conf.kptr_restrict) @@ -912,6 +1156,10 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) if (perf_evlist__create_maps(rec->evlist, &rec->opts.target) < 0) usage_with_options(record_usage, record_options); + err = auxtrace_record__options(rec->itr, rec->evlist, &rec->opts); + if (err) + goto out_symbol_exit; + if (record_opts__config(&rec->opts)) { err = -EINVAL; goto out_symbol_exit; @@ -921,5 +1169,15 @@ int cmd_record(int argc, const char **argv, const char *prefix __maybe_unused) out_symbol_exit: perf_evlist__delete(rec->evlist); symbol__exit(); + auxtrace_record__free(rec->itr); return err; } + +static void snapshot_sig_handler(int sig __maybe_unused) +{ + if (!auxtrace_snapshot_enabled) + return; + auxtrace_snapshot_enabled = 0; + auxtrace_snapshot_err = auxtrace_record__snapshot_start(record.itr); + auxtrace_record__snapshot_started = 1; +} diff --git a/tools/perf/builtin-report.c b/tools/perf/builtin-report.c index 2f91094e228b..95a47719aec3 100644 --- a/tools/perf/builtin-report.c +++ b/tools/perf/builtin-report.c @@ -36,6 +36,8 @@ #include "util/data.h" #include "arch/common.h" +#include "util/auxtrace.h" + #include <dlfcn.h> #include <linux/bitmap.h> @@ -137,10 +139,12 @@ static int process_sample_event(struct perf_tool *tool, struct report *rep = container_of(tool, struct report, tool); struct addr_location al; struct hist_entry_iter iter = { - .hide_unresolved = rep->hide_unresolved, - .add_entry_cb = hist_iter__report_callback, + .evsel = evsel, + .sample = sample, + .hide_unresolved = rep->hide_unresolved, + .add_entry_cb = hist_iter__report_callback, }; - int ret; + int ret = 0; if (perf_event__preprocess_sample(event, machine, &al, sample) < 0) { pr_debug("problem processing %d event, skipping it.\n", @@ -149,10 +153,10 @@ static int process_sample_event(struct perf_tool *tool, } if (rep->hide_unresolved && al.sym == NULL) - return 0; + goto out_put; if (rep->cpu_list && !test_bit(sample->cpu, rep->cpu_bitmap)) - return 0; + goto out_put; if (sort__mode == SORT_MODE__BRANCH) iter.ops = &hist_iter_branch; @@ -166,11 +170,11 @@ static int process_sample_event(struct perf_tool *tool, if (al.map != NULL) al.map->dso->hit = 1; - ret = hist_entry_iter__add(&iter, &al, evsel, sample, rep->max_stack, - rep); + ret = hist_entry_iter__add(&iter, &al, rep->max_stack, rep); if (ret < 0) pr_debug("problem adding hist entry, skipping event\n"); - +out_put: + addr_location__put(&al); return ret; } @@ -249,6 +253,8 @@ static int report__setup_sample_type(struct report *rep) if ((sample_type & PERF_SAMPLE_REGS_USER) && (sample_type & PERF_SAMPLE_STACK_USER)) callchain_param.record_mode = CALLCHAIN_DWARF; + else if (sample_type & PERF_SAMPLE_BRANCH_STACK) + callchain_param.record_mode = CALLCHAIN_LBR; else callchain_param.record_mode = CALLCHAIN_FP; } @@ -302,7 +308,7 @@ static size_t hists__fprintf_nr_sample_events(struct hists *hists, struct report if (rep->mem_mode) { ret += fprintf(fp, "\n# Total weight : %" PRIu64, nr_events); - ret += fprintf(fp, "\n# Sort order : %s", sort_order); + ret += fprintf(fp, "\n# Sort order : %s", sort_order ? : default_mem_sort_order); } else ret += fprintf(fp, "\n# Event count (approx.): %" PRIu64, nr_events); return ret + fprintf(fp, "\n#\n"); @@ -314,6 +320,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, { struct perf_evsel *pos; + fprintf(stdout, "#\n# Total Lost Samples: %" PRIu64 "\n#\n", evlist->stats.total_lost_samples); evlist__for_each(evlist, pos) { struct hists *hists = evsel__hists(pos); const char *evname = perf_evsel__name(pos); @@ -327,16 +334,15 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, fprintf(stdout, "\n\n"); } - if (sort_order == default_sort_order && - parent_pattern == default_parent_pattern) { + if (sort_order == NULL && + parent_pattern == default_parent_pattern) fprintf(stdout, "#\n# (%s)\n#\n", help); - if (rep->show_threads) { - bool style = !strcmp(rep->pretty_printing_style, "raw"); - perf_read_values_display(stdout, &rep->show_threads_values, - style); - perf_read_values_destroy(&rep->show_threads_values); - } + if (rep->show_threads) { + bool style = !strcmp(rep->pretty_printing_style, "raw"); + perf_read_values_display(stdout, &rep->show_threads_values, + style); + perf_read_values_destroy(&rep->show_threads_values); } return 0; @@ -345,7 +351,7 @@ static int perf_evlist__tty_browse_hists(struct perf_evlist *evlist, static void report__warn_kptr_restrict(const struct report *rep) { struct map *kernel_map = rep->session->machines.host.vmlinux_maps[MAP__FUNCTION]; - struct kmap *kernel_kmap = map__kmap(kernel_map); + struct kmap *kernel_kmap = kernel_map ? map__kmap(kernel_map) : NULL; if (kernel_map == NULL || (kernel_map->dso->hit && @@ -480,7 +486,7 @@ static int __cmd_report(struct report *rep) if (ret) return ret; - ret = perf_session__process_events(session, &rep->tool); + ret = perf_session__process_events(session); if (ret) return ret; @@ -583,6 +589,7 @@ parse_percent_limit(const struct option *opt, const char *str, int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) { struct perf_session *session; + struct itrace_synth_opts itrace_synth_opts = { .set = 0, }; struct stat st; bool has_br_stack = false; int branch_mode = -1; @@ -605,6 +612,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) .attr = perf_event__process_attr, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, + .id_index = perf_event__process_id_index, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, .ordered_events = true, .ordering_requires_timestamps = true, }, @@ -667,6 +677,10 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "only consider symbols in these dsos"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only consider symbols in these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", "only consider these symbols"), OPT_STRING(0, "symbol-filter", &report.symbol_filter_str, "filter", @@ -674,7 +688,7 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), - OPT_STRING('t', "field-separator", &symbol_conf.field_sep, "separator", + OPT_STRING_NOEMPTY('t', "field-separator", &symbol_conf.field_sep, "separator", "separator for columns, no spaces will be added between " "columns '.' is reserved."), OPT_BOOLEAN('U', "hide-unresolved", &report.hide_unresolved, @@ -711,6 +725,9 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) "Don't show entries under that percent", parse_percent_limit), OPT_CALLBACK(0, "percentage", NULL, "relative|absolute", "how to display percentage of filtered entries", parse_filter_percentage), + OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", + "Instruction Tracing options", + itrace_parse_synth_opts), OPT_END() }; struct perf_data_file file = { @@ -725,6 +742,17 @@ int cmd_report(int argc, const char **argv, const char *prefix __maybe_unused) argc = parse_options(argc, argv, options, report_usage, 0); + if (symbol_conf.vmlinux_name && + access(symbol_conf.vmlinux_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.vmlinux_name); + return -EINVAL; + } + if (symbol_conf.kallsyms_name && + access(symbol_conf.kallsyms_name, R_OK)) { + pr_err("Invalid file: %s\n", symbol_conf.kallsyms_name); + return -EINVAL; + } + if (report.use_stdio) use_browser = 0; else if (report.use_tui) @@ -755,6 +783,8 @@ repeat: report.queue_size); } + session->itrace_synth_opts = &itrace_synth_opts; + report.session = session; has_br_stack = perf_header__has_feat(&session->header, @@ -766,7 +796,7 @@ repeat: * 0/1 means the user chose a mode. */ if (((branch_mode == -1 && has_br_stack) || branch_mode == 1) && - branch_call_mode == -1) { + !branch_call_mode) { sort__mode = SORT_MODE__BRANCH; symbol_conf.cumulate_callchain = false; } @@ -797,8 +827,8 @@ repeat: goto error; } - /* Force tty output for header output. */ - if (report.header || report.header_only) + /* Force tty output for header output and per-thread stat. */ + if (report.header || report.header_only || report.show_threads) use_browser = 0; if (strcmp(input_name, "-") != 0) @@ -809,8 +839,10 @@ repeat: if (report.header || report.header_only) { perf_session__fprintf_info(session, stdout, report.show_full_info); - if (report.header_only) - return 0; + if (report.header_only) { + ret = 0; + goto error; + } } else if (use_browser == 0) { fputs("# To display the perf.data header info, please use --header/--header-only options.\n#\n", stdout); diff --git a/tools/perf/builtin-sched.c b/tools/perf/builtin-sched.c index 891c3930080e..33962612a5e9 100644 --- a/tools/perf/builtin-sched.c +++ b/tools/perf/builtin-sched.c @@ -23,12 +23,13 @@ #include <semaphore.h> #include <pthread.h> #include <math.h> +#include <api/fs/fs.h> #define PR_SET_NAME 15 /* Set process name */ #define MAX_CPUS 4096 #define COMM_LEN 20 #define SYM_LEN 129 -#define MAX_PID 65536 +#define MAX_PID 1024000 struct sched_atom; @@ -94,6 +95,7 @@ struct work_atoms { u64 total_lat; u64 nb_atoms; u64 total_runtime; + int num_merged; }; typedef int (*sort_fn_t)(struct work_atoms *, struct work_atoms *); @@ -124,7 +126,7 @@ struct perf_sched { struct perf_tool tool; const char *sort_order; unsigned long nr_tasks; - struct task_desc *pid_to_task[MAX_PID]; + struct task_desc **pid_to_task; struct task_desc **tasks; const struct trace_sched_handler *tp_handler; pthread_mutex_t start_work_mutex; @@ -167,8 +169,10 @@ struct perf_sched { u64 all_runtime; u64 all_count; u64 cpu_last_switched[MAX_CPUS]; - struct rb_root atom_root, sorted_atom_root; + struct rb_root atom_root, sorted_atom_root, merged_atom_root; struct list_head sort_list, cmp_pid; + bool force; + bool skip_merge; }; static u64 get_nsecs(void) @@ -326,8 +330,19 @@ static struct task_desc *register_pid(struct perf_sched *sched, unsigned long pid, const char *comm) { struct task_desc *task; + static int pid_max; - BUG_ON(pid >= MAX_PID); + if (sched->pid_to_task == NULL) { + if (sysctl__read_int("kernel/pid_max", &pid_max) < 0) + pid_max = MAX_PID; + BUG_ON((sched->pid_to_task = calloc(pid_max, sizeof(struct task_desc *))) == NULL); + } + if (pid >= (unsigned long)pid_max) { + BUG_ON((sched->pid_to_task = realloc(sched->pid_to_task, (pid + 1) * + sizeof(struct task_desc *))) == NULL); + while (pid >= (unsigned long)pid_max) + sched->pid_to_task[pid_max++] = NULL; + } task = sched->pid_to_task[pid]; @@ -346,7 +361,7 @@ static struct task_desc *register_pid(struct perf_sched *sched, sched->pid_to_task[pid] = task; sched->nr_tasks++; - sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_task *)); + sched->tasks = realloc(sched->tasks, sched->nr_tasks * sizeof(struct task_desc *)); BUG_ON(!sched->tasks); sched->tasks[task->nr] = task; @@ -425,24 +440,45 @@ static u64 get_cpu_usage_nsec_parent(void) return sum; } -static int self_open_counters(void) +static int self_open_counters(struct perf_sched *sched, unsigned long cur_task) { struct perf_event_attr attr; - char sbuf[STRERR_BUFSIZE]; + char sbuf[STRERR_BUFSIZE], info[STRERR_BUFSIZE]; int fd; + struct rlimit limit; + bool need_privilege = false; memset(&attr, 0, sizeof(attr)); attr.type = PERF_TYPE_SOFTWARE; attr.config = PERF_COUNT_SW_TASK_CLOCK; +force_again: fd = sys_perf_event_open(&attr, 0, -1, -1, perf_event_open_cloexec_flag()); - if (fd < 0) + if (fd < 0) { + if (errno == EMFILE) { + if (sched->force) { + BUG_ON(getrlimit(RLIMIT_NOFILE, &limit) == -1); + limit.rlim_cur += sched->nr_tasks - cur_task; + if (limit.rlim_cur > limit.rlim_max) { + limit.rlim_max = limit.rlim_cur; + need_privilege = true; + } + if (setrlimit(RLIMIT_NOFILE, &limit) == -1) { + if (need_privilege && errno == EPERM) + strcpy(info, "Need privilege\n"); + } else + goto force_again; + } else + strcpy(info, "Have a try with -f option\n"); + } pr_err("Error: sys_perf_event_open() syscall returned " - "with %d (%s)\n", fd, - strerror_r(errno, sbuf, sizeof(sbuf))); + "with %d (%s)\n%s", fd, + strerror_r(errno, sbuf, sizeof(sbuf)), info); + exit(EXIT_FAILURE); + } return fd; } @@ -460,6 +496,7 @@ static u64 get_cpu_usage_nsec_self(int fd) struct sched_thread_parms { struct task_desc *task; struct perf_sched *sched; + int fd; }; static void *thread_func(void *ctx) @@ -470,13 +507,12 @@ static void *thread_func(void *ctx) u64 cpu_usage_0, cpu_usage_1; unsigned long i, ret; char comm2[22]; - int fd; + int fd = parms->fd; zfree(&parms); sprintf(comm2, ":%s", this_task->comm); prctl(PR_SET_NAME, comm2); - fd = self_open_counters(); if (fd < 0) return NULL; again: @@ -528,6 +564,7 @@ static void create_tasks(struct perf_sched *sched) BUG_ON(parms == NULL); parms->task = task = sched->tasks[i]; parms->sched = sched; + parms->fd = self_open_counters(sched, i); sem_init(&task->sleep_sem, 0, 0); sem_init(&task->ready_for_work, 0, 0); sem_init(&task->work_done_sem, 0, 0); @@ -572,13 +609,13 @@ static void wait_for_tasks(struct perf_sched *sched) cpu_usage_1 = get_cpu_usage_nsec_parent(); if (!sched->runavg_cpu_usage) sched->runavg_cpu_usage = sched->cpu_usage; - sched->runavg_cpu_usage = (sched->runavg_cpu_usage * 9 + sched->cpu_usage) / 10; + sched->runavg_cpu_usage = (sched->runavg_cpu_usage * (sched->replay_repeat - 1) + sched->cpu_usage) / sched->replay_repeat; sched->parent_cpu_usage = cpu_usage_1 - cpu_usage_0; if (!sched->runavg_parent_cpu_usage) sched->runavg_parent_cpu_usage = sched->parent_cpu_usage; - sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * 9 + - sched->parent_cpu_usage)/10; + sched->runavg_parent_cpu_usage = (sched->runavg_parent_cpu_usage * (sched->replay_repeat - 1) + + sched->parent_cpu_usage)/sched->replay_repeat; ret = pthread_mutex_lock(&sched->start_work_mutex); BUG_ON(ret); @@ -610,7 +647,7 @@ static void run_one_test(struct perf_sched *sched) sched->sum_fluct += fluct; if (!sched->run_avg) sched->run_avg = delta; - sched->run_avg = (sched->run_avg * 9 + delta) / 10; + sched->run_avg = (sched->run_avg * (sched->replay_repeat - 1) + delta) / sched->replay_repeat; printf("#%-3ld: %0.3f, ", sched->nr_runs, (double)delta / 1000000.0); @@ -735,7 +772,7 @@ static int replay_fork_event(struct perf_sched *sched, if (child == NULL || parent == NULL) { pr_debug("thread does not exist on fork event: child %p, parent %p\n", child, parent); - return 0; + goto out_put; } if (verbose) { @@ -746,6 +783,9 @@ static int replay_fork_event(struct perf_sched *sched, register_pid(sched, parent->tid, thread__comm_str(parent)); register_pid(sched, child->tid, thread__comm_str(child)); +out_put: + thread__put(child); + thread__put(parent); return 0; } @@ -831,7 +871,7 @@ static int thread_atoms_insert(struct perf_sched *sched, struct thread *thread) return -1; } - atoms->thread = thread; + atoms->thread = thread__get(thread); INIT_LIST_HEAD(&atoms->work_list); __thread_latency_insert(&sched->atom_root, atoms, &sched->cmp_pid); return 0; @@ -922,7 +962,7 @@ static int latency_switch_event(struct perf_sched *sched, struct work_atoms *out_events, *in_events; struct thread *sched_out, *sched_in; u64 timestamp0, timestamp = sample->time; - int cpu = sample->cpu; + int cpu = sample->cpu, err = -1; s64 delta; BUG_ON(cpu >= MAX_CPUS || cpu < 0); @@ -941,15 +981,17 @@ static int latency_switch_event(struct perf_sched *sched, sched_out = machine__findnew_thread(machine, -1, prev_pid); sched_in = machine__findnew_thread(machine, -1, next_pid); + if (sched_out == NULL || sched_in == NULL) + goto out_put; out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { if (thread_atoms_insert(sched, sched_out)) - return -1; + goto out_put; out_events = thread_atoms_search(&sched->atom_root, sched_out, &sched->cmp_pid); if (!out_events) { pr_err("out-event: Internal tree error"); - return -1; + goto out_put; } } if (add_sched_out_event(out_events, sched_out_state(prev_state), timestamp)) @@ -958,22 +1000,25 @@ static int latency_switch_event(struct perf_sched *sched, in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); if (!in_events) { if (thread_atoms_insert(sched, sched_in)) - return -1; + goto out_put; in_events = thread_atoms_search(&sched->atom_root, sched_in, &sched->cmp_pid); if (!in_events) { pr_err("in-event: Internal tree error"); - return -1; + goto out_put; } /* * Take came in we have not heard about yet, * add in an initial atom in runnable state: */ if (add_sched_out_event(in_events, 'R', timestamp)) - return -1; + goto out_put; } add_sched_in_event(in_events, timestamp); - - return 0; + err = 0; +out_put: + thread__put(sched_out); + thread__put(sched_in); + return err; } static int latency_runtime_event(struct perf_sched *sched, @@ -986,23 +1031,29 @@ static int latency_runtime_event(struct perf_sched *sched, struct thread *thread = machine__findnew_thread(machine, -1, pid); struct work_atoms *atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); u64 timestamp = sample->time; - int cpu = sample->cpu; + int cpu = sample->cpu, err = -1; + + if (thread == NULL) + return -1; BUG_ON(cpu >= MAX_CPUS || cpu < 0); if (!atoms) { if (thread_atoms_insert(sched, thread)) - return -1; + goto out_put; atoms = thread_atoms_search(&sched->atom_root, thread, &sched->cmp_pid); if (!atoms) { pr_err("in-event: Internal tree error"); - return -1; + goto out_put; } if (add_sched_out_event(atoms, 'R', timestamp)) - return -1; + goto out_put; } add_runtime_event(atoms, runtime, timestamp); - return 0; + err = 0; +out_put: + thread__put(thread); + return err; } static int latency_wakeup_event(struct perf_sched *sched, @@ -1015,19 +1066,22 @@ static int latency_wakeup_event(struct perf_sched *sched, struct work_atom *atom; struct thread *wakee; u64 timestamp = sample->time; + int err = -1; wakee = machine__findnew_thread(machine, -1, pid); + if (wakee == NULL) + return -1; atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { if (thread_atoms_insert(sched, wakee)) - return -1; + goto out_put; atoms = thread_atoms_search(&sched->atom_root, wakee, &sched->cmp_pid); if (!atoms) { pr_err("wakeup-event: Internal tree error"); - return -1; + goto out_put; } if (add_sched_out_event(atoms, 'S', timestamp)) - return -1; + goto out_put; } BUG_ON(list_empty(&atoms->work_list)); @@ -1046,17 +1100,21 @@ static int latency_wakeup_event(struct perf_sched *sched, * skip in this case. */ if (sched->profile_cpu == -1 && atom->state != THREAD_SLEEPING) - return 0; + goto out_ok; sched->nr_timestamps++; if (atom->sched_out_time > timestamp) { sched->nr_unordered_timestamps++; - return 0; + goto out_ok; } atom->state = THREAD_WAIT_CPU; atom->wake_up_time = timestamp; - return 0; +out_ok: + err = 0; +out_put: + thread__put(wakee); + return err; } static int latency_migrate_task_event(struct perf_sched *sched, @@ -1069,6 +1127,7 @@ static int latency_migrate_task_event(struct perf_sched *sched, struct work_atoms *atoms; struct work_atom *atom; struct thread *migrant; + int err = -1; /* * Only need to worry about migration when profiling one CPU. @@ -1077,18 +1136,20 @@ static int latency_migrate_task_event(struct perf_sched *sched, return 0; migrant = machine__findnew_thread(machine, -1, pid); + if (migrant == NULL) + return -1; atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { if (thread_atoms_insert(sched, migrant)) - return -1; + goto out_put; register_pid(sched, migrant->tid, thread__comm_str(migrant)); atoms = thread_atoms_search(&sched->atom_root, migrant, &sched->cmp_pid); if (!atoms) { pr_err("migration-event: Internal tree error"); - return -1; + goto out_put; } if (add_sched_out_event(atoms, 'R', timestamp)) - return -1; + goto out_put; } BUG_ON(list_empty(&atoms->work_list)); @@ -1100,8 +1161,10 @@ static int latency_migrate_task_event(struct perf_sched *sched, if (atom->sched_out_time > timestamp) sched->nr_unordered_timestamps++; - - return 0; + err = 0; +out_put: + thread__put(migrant); + return err; } static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_list) @@ -1121,7 +1184,10 @@ static void output_lat_thread(struct perf_sched *sched, struct work_atoms *work_ sched->all_runtime += work_list->total_runtime; sched->all_count += work_list->nb_atoms; - ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); + if (work_list->num_merged > 1) + ret = printf(" %s:(%d) ", thread__comm_str(work_list->thread), work_list->num_merged); + else + ret = printf(" %s:%d ", thread__comm_str(work_list->thread), work_list->thread->tid); for (i = 0; i < 24 - ret; i++) printf(" "); @@ -1241,17 +1307,22 @@ static int sort_dimension__add(const char *tok, struct list_head *list) static void perf_sched__sort_lat(struct perf_sched *sched) { struct rb_node *node; - + struct rb_root *root = &sched->atom_root; +again: for (;;) { struct work_atoms *data; - node = rb_first(&sched->atom_root); + node = rb_first(root); if (!node) break; - rb_erase(node, &sched->atom_root); + rb_erase(node, root); data = rb_entry(node, struct work_atoms, node); __thread_latency_insert(&sched->sorted_atom_root, data, &sched->sort_list); } + if (root == &sched->atom_root) { + root = &sched->merged_atom_root; + goto again; + } } static int process_sched_wakeup_event(struct perf_tool *tool, @@ -1295,8 +1366,10 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, } sched_in = machine__findnew_thread(machine, -1, next_pid); + if (sched_in == NULL) + return -1; - sched->curr_thread[this_cpu] = sched_in; + sched->curr_thread[this_cpu] = thread__get(sched_in); printf(" "); @@ -1346,6 +1419,8 @@ static int map_switch_event(struct perf_sched *sched, struct perf_evsel *evsel, printf("\n"); } + thread__put(sched_in); + return 0; } @@ -1439,8 +1514,7 @@ static int perf_sched__process_tracepoint_sample(struct perf_tool *tool __maybe_ return err; } -static int perf_sched__read_events(struct perf_sched *sched, - struct perf_session **psession) +static int perf_sched__read_events(struct perf_sched *sched) { const struct perf_evsel_str_handler handlers[] = { { "sched:sched_switch", process_sched_switch_event, }, @@ -1453,7 +1527,9 @@ static int perf_sched__read_events(struct perf_sched *sched, struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = sched->force, }; + int rc = -1; session = perf_session__new(&file, false, &sched->tool); if (session == NULL) { @@ -1467,27 +1543,21 @@ static int perf_sched__read_events(struct perf_sched *sched, goto out_delete; if (perf_session__has_traces(session, "record -R")) { - int err = perf_session__process_events(session, &sched->tool); + int err = perf_session__process_events(session); if (err) { pr_err("Failed to process events, error %d", err); goto out_delete; } - sched->nr_events = session->stats.nr_events[0]; - sched->nr_lost_events = session->stats.total_lost; - sched->nr_lost_chunks = session->stats.nr_events[PERF_RECORD_LOST]; + sched->nr_events = session->evlist->stats.nr_events[0]; + sched->nr_lost_events = session->evlist->stats.total_lost; + sched->nr_lost_chunks = session->evlist->stats.nr_events[PERF_RECORD_LOST]; } - if (psession) - *psession = session; - else - perf_session__delete(session); - - return 0; - + rc = 0; out_delete: perf_session__delete(session); - return -1; + return rc; } static void print_bad_events(struct perf_sched *sched) @@ -1512,17 +1582,69 @@ static void print_bad_events(struct perf_sched *sched) } } +static void __merge_work_atoms(struct rb_root *root, struct work_atoms *data) +{ + struct rb_node **new = &(root->rb_node), *parent = NULL; + struct work_atoms *this; + const char *comm = thread__comm_str(data->thread), *this_comm; + + while (*new) { + int cmp; + + this = container_of(*new, struct work_atoms, node); + parent = *new; + + this_comm = thread__comm_str(this->thread); + cmp = strcmp(comm, this_comm); + if (cmp > 0) { + new = &((*new)->rb_left); + } else if (cmp < 0) { + new = &((*new)->rb_right); + } else { + this->num_merged++; + this->total_runtime += data->total_runtime; + this->nb_atoms += data->nb_atoms; + this->total_lat += data->total_lat; + list_splice(&data->work_list, &this->work_list); + if (this->max_lat < data->max_lat) { + this->max_lat = data->max_lat; + this->max_lat_at = data->max_lat_at; + } + zfree(&data); + return; + } + } + + data->num_merged++; + rb_link_node(&data->node, parent, new); + rb_insert_color(&data->node, root); +} + +static void perf_sched__merge_lat(struct perf_sched *sched) +{ + struct work_atoms *data; + struct rb_node *node; + + if (sched->skip_merge) + return; + + while ((node = rb_first(&sched->atom_root))) { + rb_erase(node, &sched->atom_root); + data = rb_entry(node, struct work_atoms, node); + __merge_work_atoms(&sched->merged_atom_root, data); + } +} + static int perf_sched__lat(struct perf_sched *sched) { struct rb_node *next; - struct perf_session *session; setup_pager(); - /* save session -- references to threads are held in work_list */ - if (perf_sched__read_events(sched, &session)) + if (perf_sched__read_events(sched)) return -1; + perf_sched__merge_lat(sched); perf_sched__sort_lat(sched); printf("\n -----------------------------------------------------------------------------------------------------------------\n"); @@ -1537,6 +1659,7 @@ static int perf_sched__lat(struct perf_sched *sched) work_list = rb_entry(next, struct work_atoms, node); output_lat_thread(sched, work_list); next = rb_next(next); + thread__zput(work_list->thread); } printf(" -----------------------------------------------------------------------------------------------------------------\n"); @@ -1548,7 +1671,6 @@ static int perf_sched__lat(struct perf_sched *sched) print_bad_events(sched); printf("\n"); - perf_session__delete(session); return 0; } @@ -1557,7 +1679,7 @@ static int perf_sched__map(struct perf_sched *sched) sched->max_cpu = sysconf(_SC_NPROCESSORS_CONF); setup_pager(); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; print_bad_events(sched); return 0; @@ -1572,7 +1694,7 @@ static int perf_sched__replay(struct perf_sched *sched) test_calibrations(sched); - if (perf_sched__read_events(sched, NULL)) + if (perf_sched__read_events(sched)) return -1; printf("nr_run_events: %ld\n", sched->nr_run_events); @@ -1674,6 +1796,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) .profile_cpu = -1, .next_shortname1 = 'A', .next_shortname2 = '0', + .skip_merge = 0, }; const struct option latency_options[] = { OPT_STRING('s', "sort", &sched.sort_order, "key[,key2...]", @@ -1684,6 +1807,8 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "CPU to profile on"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('p', "pids", &sched.skip_merge, + "latency stats per pid instead of per comm"), OPT_END() }; const struct option replay_options[] = { @@ -1693,6 +1818,7 @@ int cmd_sched(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), + OPT_BOOLEAN('f', "force", &sched.force, "don't complain, do it"), OPT_END() }; const struct option sched_options[] = { diff --git a/tools/perf/builtin-script.c b/tools/perf/builtin-script.c index ce304dfd962a..24809787369f 100644 --- a/tools/perf/builtin-script.c +++ b/tools/perf/builtin-script.c @@ -16,6 +16,7 @@ #include "util/evsel.h" #include "util/sort.h" #include "util/data.h" +#include "util/auxtrace.h" #include <linux/bitmap.h> static char const *script_name; @@ -26,6 +27,7 @@ static u64 nr_unordered; static bool no_callchain; static bool latency_format; static bool system_wide; +static bool print_flags; static const char *cpu_list; static DECLARE_BITMAP(cpu_bitmap, MAX_NR_CPUS); @@ -146,9 +148,10 @@ static const char *output_field2str(enum perf_output_field field) #define PRINT_FIELD(x) (output[attr->type].fields & PERF_OUTPUT_##x) -static int perf_evsel__check_stype(struct perf_evsel *evsel, - u64 sample_type, const char *sample_msg, - enum perf_output_field field) +static int perf_evsel__do_check_stype(struct perf_evsel *evsel, + u64 sample_type, const char *sample_msg, + enum perf_output_field field, + bool allow_user_set) { struct perf_event_attr *attr = &evsel->attr; int type = attr->type; @@ -158,6 +161,8 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; if (output[type].user_set) { + if (allow_user_set) + return 0; evname = perf_evsel__name(evsel); pr_err("Samples for '%s' event do not have %s attribute set. " "Cannot print '%s' field.\n", @@ -175,10 +180,22 @@ static int perf_evsel__check_stype(struct perf_evsel *evsel, return 0; } +static int perf_evsel__check_stype(struct perf_evsel *evsel, + u64 sample_type, const char *sample_msg, + enum perf_output_field field) +{ + return perf_evsel__do_check_stype(evsel, sample_type, sample_msg, field, + false); +} + static int perf_evsel__check_attr(struct perf_evsel *evsel, struct perf_session *session) { struct perf_event_attr *attr = &evsel->attr; + bool allow_user_set; + + allow_user_set = perf_header__has_feat(&session->header, + HEADER_AUXTRACE); if (PRINT_FIELD(TRACE) && !perf_session__has_traces(session, "record -R")) @@ -191,8 +208,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, } if (PRINT_FIELD(ADDR) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", - PERF_OUTPUT_ADDR)) + perf_evsel__do_check_stype(evsel, PERF_SAMPLE_ADDR, "ADDR", + PERF_OUTPUT_ADDR, allow_user_set)) return -EINVAL; if (PRINT_FIELD(SYM) && !PRINT_FIELD(IP) && !PRINT_FIELD(ADDR)) { @@ -229,8 +246,8 @@ static int perf_evsel__check_attr(struct perf_evsel *evsel, return -EINVAL; if (PRINT_FIELD(CPU) && - perf_evsel__check_stype(evsel, PERF_SAMPLE_CPU, "CPU", - PERF_OUTPUT_CPU)) + perf_evsel__do_check_stype(evsel, PERF_SAMPLE_CPU, "CPU", + PERF_OUTPUT_CPU, allow_user_set)) return -EINVAL; if (PRINT_FIELD(PERIOD) && @@ -445,10 +462,29 @@ static void print_sample_bts(union perf_event *event, printf("\n"); } +static void print_sample_flags(u32 flags) +{ + const char *chars = PERF_IP_FLAG_CHARS; + const int n = strlen(PERF_IP_FLAG_CHARS); + char str[33]; + int i, pos = 0; + + for (i = 0; i < n; i++, flags >>= 1) { + if (flags & 1) + str[pos++] = chars[i]; + } + for (; i < 32; i++, flags >>= 1) { + if (flags & 1) + str[pos++] = '?'; + } + str[pos] = 0; + printf(" %-4s ", str); +} + static void process_event(union perf_event *event, struct perf_sample *sample, - struct perf_evsel *evsel, struct thread *thread, - struct addr_location *al) + struct perf_evsel *evsel, struct addr_location *al) { + struct thread *thread = al->thread; struct perf_event_attr *attr = &evsel->attr; if (output[attr->type].fields == 0) @@ -464,6 +500,9 @@ static void process_event(union perf_event *event, struct perf_sample *sample, printf("%s: ", evname ? evname : "[unknown]"); } + if (print_flags) + print_sample_flags(sample->flags); + if (is_bts_event(attr)) { print_sample_bts(event, sample, evsel, thread, al); return; @@ -549,14 +588,6 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, struct machine *machine) { struct addr_location al; - struct thread *thread = machine__findnew_thread(machine, sample->pid, - sample->tid); - - if (thread == NULL) { - pr_debug("problem processing %d event, skipping it.\n", - event->header.type); - return -1; - } if (debug_mode) { if (sample->time < last_timestamp) { @@ -576,13 +607,14 @@ static int process_sample_event(struct perf_tool *tool __maybe_unused, } if (al.filtered) - return 0; + goto out_put; if (cpu_list && !test_bit(sample->cpu, cpu_bitmap)) - return 0; - - scripting_ops->process_event(event, sample, evsel, thread, &al); + goto out_put; + scripting_ops->process_event(event, sample, evsel, &al); +out_put: + addr_location__put(&al); return 0; } @@ -650,8 +682,8 @@ static int process_comm_event(struct perf_tool *tool, print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); ret = 0; - out: + thread__put(thread); return ret; } @@ -682,6 +714,7 @@ static int process_fork_event(struct perf_tool *tool, } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); + thread__put(thread); return 0; } @@ -690,6 +723,7 @@ static int process_exit_event(struct perf_tool *tool, struct perf_sample *sample, struct machine *machine) { + int err = 0; struct thread *thread; struct perf_script *script = container_of(tool, struct perf_script, tool); struct perf_session *session = script->session; @@ -711,9 +745,10 @@ static int process_exit_event(struct perf_tool *tool, perf_event__fprintf(event, stdout); if (perf_event__process_exit(tool, event, sample, machine) < 0) - return -1; + err = -1; - return 0; + thread__put(thread); + return err; } static int process_mmap_event(struct perf_tool *tool, @@ -743,7 +778,7 @@ static int process_mmap_event(struct perf_tool *tool, } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); - + thread__put(thread); return 0; } @@ -774,7 +809,7 @@ static int process_mmap2_event(struct perf_tool *tool, } print_sample_start(sample, thread, evsel); perf_event__fprintf(event, stdout); - + thread__put(thread); return 0; } @@ -800,7 +835,7 @@ static int __cmd_script(struct perf_script *script) script->tool.mmap2 = process_mmap2_event; } - ret = perf_session__process_events(script->session, &script->tool); + ret = perf_session__process_events(script->session); if (debug_mode) pr_err("Misordered timestamps: %" PRIu64 "\n", nr_unordered); @@ -1007,12 +1042,15 @@ static int parse_output_fields(const struct option *opt __maybe_unused, } } - tok = strtok(tok, ","); - while (tok) { + for (tok = strtok(tok, ","); tok; tok = strtok(NULL, ",")) { for (i = 0; i < imax; ++i) { if (strcmp(tok, all_output_options[i].str) == 0) break; } + if (i == imax && strcmp(tok, "flags") == 0) { + print_flags = true; + continue; + } if (i == imax) { fprintf(stderr, "Invalid field requested.\n"); rc = -EINVAL; @@ -1040,8 +1078,6 @@ static int parse_output_fields(const struct option *opt __maybe_unused, } output[type].fields |= all_output_options[i].field; } - - tok = strtok(NULL, ","); } if (type >= 0) { @@ -1505,6 +1541,7 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) char *rec_script_path = NULL; char *rep_script_path = NULL; struct perf_session *session; + struct itrace_synth_opts itrace_synth_opts = { .set = false, }; char *script_path = NULL; const char **__argv; int i, j, err = 0; @@ -1519,10 +1556,17 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) .attr = process_attr, .tracing_data = perf_event__process_tracing_data, .build_id = perf_event__process_build_id, + .id_index = perf_event__process_id_index, + .auxtrace_info = perf_event__process_auxtrace_info, + .auxtrace = perf_event__process_auxtrace, + .auxtrace_error = perf_event__process_auxtrace_error, .ordered_events = true, .ordering_requires_timestamps = true, }, }; + struct perf_data_file file = { + .mode = PERF_DATA_MODE_READ, + }; const struct option options[] = { OPT_BOOLEAN('D', "dump-raw-trace", &dump_trace, "dump raw trace in ASCII"), @@ -1550,11 +1594,11 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "When printing symbols do not display call chain"), OPT_STRING(0, "symfs", &symbol_conf.symfs, "directory", "Look for files with symbols relative to this directory"), - OPT_CALLBACK('f', "fields", NULL, "str", + OPT_CALLBACK('F', "fields", NULL, "str", "comma separated output fields prepend with 'type:'. " "Valid types: hw,sw,trace,raw. " "Fields: comm,tid,pid,time,cpu,event,trace,ip,sym,dso," - "addr,symoff,period", parse_output_fields), + "addr,symoff,period,flags", parse_output_fields), OPT_BOOLEAN('a', "all-cpus", &system_wide, "system-wide collection from all CPUs"), OPT_STRING('S', "symbols", &symbol_conf.sym_list_str, "symbol[,symbol...]", @@ -1562,6 +1606,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('C', "cpu", &cpu_list, "cpu", "list of cpus to profile"), OPT_STRING('c', "comms", &symbol_conf.comm_list_str, "comm[,comm...]", "only display events for these comms"), + OPT_STRING(0, "pid", &symbol_conf.pid_list_str, "pid[,pid...]", + "only consider symbols in these pids"), + OPT_STRING(0, "tid", &symbol_conf.tid_list_str, "tid[,tid...]", + "only consider symbols in these tids"), OPT_BOOLEAN('I', "show-info", &show_full_info, "display extended information from perf.data file"), OPT_BOOLEAN('\0', "show-kernel-path", &symbol_conf.show_kernel_path, @@ -1570,9 +1618,14 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "Show the fork/comm/exit events"), OPT_BOOLEAN('\0', "show-mmap-events", &script.show_mmap_events, "Show the mmap events"), + OPT_BOOLEAN('f', "force", &file.force, "don't complain, do it"), + OPT_CALLBACK_OPTARG(0, "itrace", &itrace_synth_opts, NULL, "opts", + "Instruction Tracing options", + itrace_parse_synth_opts), OPT_END() }; - const char * const script_usage[] = { + const char * const script_subcommands[] = { "record", "report", NULL }; + const char *script_usage[] = { "perf script [<options>]", "perf script [<options>] record <script> [<record-options>] <command>", "perf script [<options>] report <script> [script-args]", @@ -1580,13 +1633,10 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) "perf script [<options>] <top-script> [script-args]", NULL }; - struct perf_data_file file = { - .mode = PERF_DATA_MODE_READ, - }; setup_scripting(); - argc = parse_options(argc, argv, options, script_usage, + argc = parse_options_subcommand(argc, argv, options, script_subcommands, script_usage, PARSE_OPT_STOP_AT_NON_OPTION); file.path = input_name; @@ -1767,6 +1817,8 @@ int cmd_script(int argc, const char **argv, const char *prefix __maybe_unused) script.session = session; + session->itrace_synth_opts = &itrace_synth_opts; + if (cpu_list) { err = perf_session__cpu_bitmap(session, cpu_list, cpu_bitmap); if (err < 0) diff --git a/tools/perf/builtin-stat.c b/tools/perf/builtin-stat.c index e598e4e98170..d99d850e1444 100644 --- a/tools/perf/builtin-stat.c +++ b/tools/perf/builtin-stat.c @@ -67,14 +67,11 @@ #define CNTR_NOT_SUPPORTED "<not supported>" #define CNTR_NOT_COUNTED "<not counted>" -static void print_stat(int argc, const char **argv); -static void print_counter_aggr(struct perf_evsel *counter, char *prefix); -static void print_counter(struct perf_evsel *counter, char *prefix); -static void print_aggr(char *prefix); +static void print_counters(struct timespec *ts, int argc, const char **argv); /* Default events used for perf stat -T */ -static const char * const transaction_attrs[] = { - "task-clock", +static const char *transaction_attrs = { + "task-clock," "{" "instructions," "cycles," @@ -86,8 +83,8 @@ static const char * const transaction_attrs[] = { }; /* More limited version when the CPU does not have all events. */ -static const char * const transaction_limited_attrs[] = { - "task-clock", +static const char * transaction_limited_attrs = { + "task-clock," "{" "instructions," "cycles," @@ -96,30 +93,12 @@ static const char * const transaction_limited_attrs[] = { "}" }; -/* must match transaction_attrs and the beginning limited_attrs */ -enum { - T_TASK_CLOCK, - T_INSTRUCTIONS, - T_CYCLES, - T_CYCLES_IN_TX, - T_TRANSACTION_START, - T_ELISION_START, - T_CYCLES_IN_TX_CP, -}; - static struct perf_evlist *evsel_list; static struct target target = { .uid = UINT_MAX, }; -enum aggr_mode { - AGGR_NONE, - AGGR_GLOBAL, - AGGR_SOCKET, - AGGR_CORE, -}; - static int run_count = 1; static bool no_inherit = false; static bool scale = true; @@ -147,10 +126,6 @@ static int (*aggr_get_id)(struct cpu_map *m, int cpu); static volatile int done = 0; -struct perf_stat { - struct stats res_stats[3]; -}; - static inline void diff_timespec(struct timespec *r, struct timespec *a, struct timespec *b) { @@ -163,132 +138,10 @@ static inline void diff_timespec(struct timespec *r, struct timespec *a, } } -static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) -{ - return (evsel->cpus && !target.cpu_list) ? evsel->cpus : evsel_list->cpus; -} - -static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) -{ - return perf_evsel__cpus(evsel)->nr; -} - -static void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) -{ - int i; - struct perf_stat *ps = evsel->priv; - - for (i = 0; i < 3; i++) - init_stats(&ps->res_stats[i]); -} - -static int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) -{ - evsel->priv = zalloc(sizeof(struct perf_stat)); - if (evsel->priv == NULL) - return -ENOMEM; - perf_evsel__reset_stat_priv(evsel); - return 0; -} - -static void perf_evsel__free_stat_priv(struct perf_evsel *evsel) -{ - zfree(&evsel->priv); -} - -static int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel) -{ - void *addr; - size_t sz; - - sz = sizeof(*evsel->counts) + - (perf_evsel__nr_cpus(evsel) * sizeof(struct perf_counts_values)); - - addr = zalloc(sz); - if (!addr) - return -ENOMEM; - - evsel->prev_raw_counts = addr; - - return 0; -} - -static void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) -{ - zfree(&evsel->prev_raw_counts); -} - -static void perf_evlist__free_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__free_stat_priv(evsel); - perf_evsel__free_counts(evsel); - perf_evsel__free_prev_raw_counts(evsel); - } -} - -static int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) +static void perf_stat__reset_stats(void) { - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - if (perf_evsel__alloc_stat_priv(evsel) < 0 || - perf_evsel__alloc_counts(evsel, perf_evsel__nr_cpus(evsel)) < 0 || - (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel) < 0)) - goto out_free; - } - - return 0; - -out_free: - perf_evlist__free_stats(evlist); - return -1; -} - -static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_stats[MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_front_stats[MAX_NR_CPUS]; -static struct stats runtime_stalled_cycles_back_stats[MAX_NR_CPUS]; -static struct stats runtime_branches_stats[MAX_NR_CPUS]; -static struct stats runtime_cacherefs_stats[MAX_NR_CPUS]; -static struct stats runtime_l1_dcache_stats[MAX_NR_CPUS]; -static struct stats runtime_l1_icache_stats[MAX_NR_CPUS]; -static struct stats runtime_ll_cache_stats[MAX_NR_CPUS]; -static struct stats runtime_itlb_cache_stats[MAX_NR_CPUS]; -static struct stats runtime_dtlb_cache_stats[MAX_NR_CPUS]; -static struct stats runtime_cycles_in_tx_stats[MAX_NR_CPUS]; -static struct stats walltime_nsecs_stats; -static struct stats runtime_transaction_stats[MAX_NR_CPUS]; -static struct stats runtime_elision_stats[MAX_NR_CPUS]; - -static void perf_stat__reset_stats(struct perf_evlist *evlist) -{ - struct perf_evsel *evsel; - - evlist__for_each(evlist, evsel) { - perf_evsel__reset_stat_priv(evsel); - perf_evsel__reset_counts(evsel, perf_evsel__nr_cpus(evsel)); - } - - memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); - memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); - memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); - memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); - memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); - memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); - memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); - memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); - memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); - memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); - memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); - memset(runtime_cycles_in_tx_stats, 0, - sizeof(runtime_cycles_in_tx_stats)); - memset(runtime_transaction_stats, 0, - sizeof(runtime_transaction_stats)); - memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); - memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); + perf_evlist__reset_stats(evsel_list); + perf_stat__reset_shadow_stats(); } static int create_perf_stat_counter(struct perf_evsel *evsel) @@ -325,69 +178,6 @@ static inline int nsec_counter(struct perf_evsel *evsel) return 0; } -static struct perf_evsel *nth_evsel(int n) -{ - static struct perf_evsel **array; - static int array_len; - struct perf_evsel *ev; - int j; - - /* Assumes this only called when evsel_list does not change anymore. */ - if (!array) { - evlist__for_each(evsel_list, ev) - array_len++; - array = malloc(array_len * sizeof(void *)); - if (!array) - exit(ENOMEM); - j = 0; - evlist__for_each(evsel_list, ev) - array[j++] = ev; - } - if (n < array_len) - return array[n]; - return NULL; -} - -/* - * Update various tracking values we maintain to print - * more semantic information such as miss/hit ratios, - * instruction rates, etc: - */ -static void update_shadow_stats(struct perf_evsel *counter, u64 *count) -{ - if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) - update_stats(&runtime_nsecs_stats[0], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) - update_stats(&runtime_cycles_stats[0], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_CYCLES_IN_TX))) - update_stats(&runtime_cycles_in_tx_stats[0], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_TRANSACTION_START))) - update_stats(&runtime_transaction_stats[0], count[0]); - else if (transaction_run && - perf_evsel__cmp(counter, nth_evsel(T_ELISION_START))) - update_stats(&runtime_elision_stats[0], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) - update_stats(&runtime_stalled_cycles_front_stats[0], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) - update_stats(&runtime_stalled_cycles_back_stats[0], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) - update_stats(&runtime_branches_stats[0], count[0]); - else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) - update_stats(&runtime_cacherefs_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) - update_stats(&runtime_l1_dcache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) - update_stats(&runtime_l1_icache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) - update_stats(&runtime_ll_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) - update_stats(&runtime_dtlb_cache_stats[0], count[0]); - else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) - update_stats(&runtime_itlb_cache_stats[0], count[0]); -} - static void zero_per_pkg(struct perf_evsel *counter) { if (counter->per_pkg_mask) @@ -424,8 +214,9 @@ static int check_per_pkg(struct perf_evsel *counter, int cpu, bool *skip) return 0; } -static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, - struct perf_counts_values *count) +static int +process_counter_values(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { struct perf_counts_values *aggr = &evsel->counts->aggr; static struct perf_counts_values zero; @@ -440,14 +231,15 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, count = &zero; switch (aggr_mode) { + case AGGR_THREAD: case AGGR_CORE: case AGGR_SOCKET: case AGGR_NONE: if (!evsel->snapshot) - perf_evsel__compute_deltas(evsel, cpu, count); + perf_evsel__compute_deltas(evsel, cpu, thread, count); perf_counts_values__scale(count, scale, NULL); - evsel->counts->cpu[cpu] = *count; - update_shadow_stats(evsel, count->values); + if (aggr_mode == AGGR_NONE) + perf_stat__update_shadow_stats(evsel, count->values, cpu); break; case AGGR_GLOBAL: aggr->val += count->val; @@ -462,26 +254,48 @@ static int read_cb(struct perf_evsel *evsel, int cpu, int thread __maybe_unused, return 0; } -static int read_counter(struct perf_evsel *counter); +static int process_counter_maps(struct perf_evsel *counter) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = perf_evsel__nr_cpus(counter); + int cpu, thread; -/* - * Read out the results of a single counter: - * aggregate counts across CPUs in system-wide mode - */ -static int read_counter_aggr(struct perf_evsel *counter) + if (counter->system_wide) + nthreads = 1; + + for (thread = 0; thread < nthreads; thread++) { + for (cpu = 0; cpu < ncpus; cpu++) { + if (process_counter_values(counter, cpu, thread, + perf_counts(counter->counts, cpu, thread))) + return -1; + } + } + + return 0; +} + +static int process_counter(struct perf_evsel *counter) { struct perf_counts_values *aggr = &counter->counts->aggr; struct perf_stat *ps = counter->priv; u64 *count = counter->counts->aggr.values; - int i; + int i, ret; aggr->val = aggr->ena = aggr->run = 0; + init_stats(ps->res_stats); - if (read_counter(counter)) - return -1; + if (counter->per_pkg) + zero_per_pkg(counter); + + ret = process_counter_maps(counter); + if (ret) + return ret; + + if (aggr_mode != AGGR_GLOBAL) + return 0; if (!counter->snapshot) - perf_evsel__compute_deltas(counter, -1, aggr); + perf_evsel__compute_deltas(counter, -1, -1, aggr); perf_counts_values__scale(aggr, scale, &counter->counts->scaled); for (i = 0; i < 3; i++) @@ -495,7 +309,7 @@ static int read_counter_aggr(struct perf_evsel *counter) /* * Save the full runtime - to allow normalization during printout: */ - update_shadow_stats(counter, count); + perf_stat__update_shadow_stats(counter, count, 0); return 0; } @@ -510,15 +324,18 @@ static int read_counter(struct perf_evsel *counter) int ncpus = perf_evsel__nr_cpus(counter); int cpu, thread; + if (!counter->supported) + return -ENOENT; + if (counter->system_wide) nthreads = 1; - if (counter->per_pkg) - zero_per_pkg(counter); - for (thread = 0; thread < nthreads; thread++) { for (cpu = 0; cpu < ncpus; cpu++) { - if (perf_evsel__read_cb(counter, cpu, thread, read_cb)) + struct perf_counts_values *count; + + count = perf_counts(counter->counts, cpu, thread); + if (perf_evsel__read(counter, cpu, thread, count)) return -1; } } @@ -526,68 +343,34 @@ static int read_counter(struct perf_evsel *counter) return 0; } -static void print_interval(void) +static void read_counters(bool close_counters) { - static int num_print_interval; struct perf_evsel *counter; - struct perf_stat *ps; - struct timespec ts, rs; - char prefix[64]; - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter_aggr(counter); - } - } else { - evlist__for_each(evsel_list, counter) { - ps = counter->priv; - memset(ps->res_stats, 0, sizeof(ps->res_stats)); - read_counter(counter); - } - } + evlist__for_each(evsel_list, counter) { + if (read_counter(counter)) + pr_warning("failed to read counter %s\n", counter->name); - clock_gettime(CLOCK_MONOTONIC, &ts); - diff_timespec(&rs, &ts, &ref_time); - sprintf(prefix, "%6lu.%09lu%s", rs.tv_sec, rs.tv_nsec, csv_sep); + if (process_counter(counter)) + pr_warning("failed to process counter %s\n", counter->name); - if (num_print_interval == 0 && !csv_output) { - switch (aggr_mode) { - case AGGR_SOCKET: - fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_CORE: - fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); - break; - case AGGR_NONE: - fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); - break; - case AGGR_GLOBAL: - default: - fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + if (close_counters) { + perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), + thread_map__nr(evsel_list->threads)); } } +} - if (++num_print_interval == 25) - num_print_interval = 0; +static void process_interval(void) +{ + struct timespec ts, rs; - switch (aggr_mode) { - case AGGR_CORE: - case AGGR_SOCKET: - print_aggr(prefix); - break; - case AGGR_NONE: - evlist__for_each(evsel_list, counter) - print_counter(counter, prefix); - break; - case AGGR_GLOBAL: - default: - evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, prefix); - } + read_counters(false); - fflush(output); + clock_gettime(CLOCK_MONOTONIC, &ts); + diff_timespec(&rs, &ts, &ref_time); + + print_counters(&rs, 0, NULL); } static void handle_initial_delay(void) @@ -660,7 +443,10 @@ static int __run_perf_stat(int argc, const char **argv) ui__warning("%s event is not supported by the kernel.\n", perf_evsel__name(counter)); counter->supported = false; - continue; + + if ((counter->leader != counter) || + !(counter->leader->nr_members > 1)) + continue; } perf_evsel__open_strerror(counter, &target, @@ -679,8 +465,9 @@ static int __run_perf_stat(int argc, const char **argv) unit_width = l; } - if (perf_evlist__apply_filters(evsel_list)) { - error("failed to set filter with %d (%s)\n", errno, + if (perf_evlist__apply_filters(evsel_list, &counter)) { + error("failed to set filter \"%s\" on event %s with %d (%s)\n", + counter->filter, perf_evsel__name(counter), errno, strerror_r(errno, msg, sizeof(msg))); return -1; } @@ -698,7 +485,7 @@ static int __run_perf_stat(int argc, const char **argv) if (interval) { while (!waitpid(child_pid, &status, WNOHANG)) { nanosleep(&ts, NULL); - print_interval(); + process_interval(); } } wait(&status); @@ -716,7 +503,7 @@ static int __run_perf_stat(int argc, const char **argv) while (!done) { nanosleep(&ts, NULL); if (interval) - print_interval(); + process_interval(); } } @@ -724,18 +511,7 @@ static int __run_perf_stat(int argc, const char **argv) update_stats(&walltime_nsecs_stats, t1 - t0); - if (aggr_mode == AGGR_GLOBAL) { - evlist__for_each(evsel_list, counter) { - read_counter_aggr(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), - thread_map__nr(evsel_list->threads)); - } - } else { - evlist__for_each(evsel_list, counter) { - read_counter(counter); - perf_evsel__close_fd(counter, perf_evsel__nr_cpus(counter), 1); - } - } + read_counters(true); return WEXITSTATUS(status); } @@ -766,6 +542,19 @@ static int run_perf_stat(int argc, const char **argv) return ret; } +static void print_running(u64 run, u64 ena) +{ + if (csv_output) { + fprintf(output, "%s%" PRIu64 "%s%.2f", + csv_sep, + run, + csv_sep, + ena ? 100.0 * run / ena : 100.0); + } else if (run != ena) { + fprintf(output, " (%.2f%%)", 100.0 * run / ena); + } +} + static void print_noise_pct(double total, double avg) { double pct = rel_stddev_stats(total, avg); @@ -814,6 +603,14 @@ static void aggr_printout(struct perf_evsel *evsel, int id, int nr) csv_output ? 0 : -4, perf_evsel__cpus(evsel)->map[id], csv_sep); break; + case AGGR_THREAD: + fprintf(output, "%*s-%*d%s", + csv_output ? 0 : 16, + thread_map__comm(evsel->threads, id), + csv_output ? 0 : -8, + thread_map__pid(evsel->threads, id), + csv_sep); + break; case AGGR_GLOBAL: default: break; @@ -856,188 +653,8 @@ static void nsec_printout(int id, int nr, struct perf_evsel *evsel, double avg) fprintf(output, " "); } -/* used for get_ratio_color() */ -enum grc_type { - GRC_STALLED_CYCLES_FE, - GRC_STALLED_CYCLES_BE, - GRC_CACHE_MISSES, - GRC_MAX_NR -}; - -static const char *get_ratio_color(enum grc_type type, double ratio) -{ - static const double grc_table[GRC_MAX_NR][3] = { - [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, - [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, - [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, - }; - const char *color = PERF_COLOR_NORMAL; - - if (ratio > grc_table[type][0]) - color = PERF_COLOR_RED; - else if (ratio > grc_table[type][1]) - color = PERF_COLOR_MAGENTA; - else if (ratio > grc_table[type][2]) - color = PERF_COLOR_YELLOW; - - return color; -} - -static void print_stalled_cycles_frontend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_cycles_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " frontend cycles idle "); -} - -static void print_stalled_cycles_backend(int cpu, - struct perf_evsel *evsel - __maybe_unused, double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_cycles_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " backend cycles idle "); -} - -static void print_branch_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_branches_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all branches "); -} - -static void print_l1_dcache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_l1_dcache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all L1-dcache hits "); -} - -static void print_l1_icache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_l1_icache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all L1-icache hits "); -} - -static void print_dtlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_dtlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all dTLB cache hits "); -} - -static void print_itlb_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_itlb_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all iTLB cache hits "); -} - -static void print_ll_cache_misses(int cpu, - struct perf_evsel *evsel __maybe_unused, - double avg) -{ - double total, ratio = 0.0; - const char *color; - - total = avg_stats(&runtime_ll_cache_stats[cpu]); - - if (total) - ratio = avg / total * 100.0; - - color = get_ratio_color(GRC_CACHE_MISSES, ratio); - - fprintf(output, " # "); - color_fprintf(output, color, "%6.2f%%", ratio); - fprintf(output, " of all LL-cache hits "); -} - static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) { - double total, ratio = 0.0, total2; double sc = evsel->scale; const char *fmt; int cpu = cpu_map__id_to_cpu(id); @@ -1071,134 +688,7 @@ static void abs_printout(int id, int nr, struct perf_evsel *evsel, double avg) if (csv_output || interval) return; - if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { - total = avg_stats(&runtime_cycles_stats[cpu]); - if (total) { - ratio = avg / total; - fprintf(output, " # %5.2f insns per cycle ", ratio); - } - total = avg_stats(&runtime_stalled_cycles_front_stats[cpu]); - total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[cpu])); - - if (total && avg) { - ratio = total / avg; - fprintf(output, "\n"); - if (aggr_mode == AGGR_NONE) - fprintf(output, " "); - fprintf(output, " # %5.2f stalled cycles per insn", ratio); - } - - } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && - runtime_branches_stats[cpu].n != 0) { - print_branch_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_dcache_stats[cpu].n != 0) { - print_l1_dcache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_l1_icache_stats[cpu].n != 0) { - print_l1_icache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_dtlb_cache_stats[cpu].n != 0) { - print_dtlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_itlb_cache_stats[cpu].n != 0) { - print_itlb_cache_misses(cpu, evsel, avg); - } else if ( - evsel->attr.type == PERF_TYPE_HW_CACHE && - evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | - ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | - ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && - runtime_ll_cache_stats[cpu].n != 0) { - print_ll_cache_misses(cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && - runtime_cacherefs_stats[cpu].n != 0) { - total = avg_stats(&runtime_cacherefs_stats[cpu]); - - if (total) - ratio = avg * 100 / total; - - fprintf(output, " # %8.3f %% of all cache refs ", ratio); - - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { - print_stalled_cycles_frontend(cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { - print_stalled_cycles_backend(cpu, evsel, avg); - } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { - total = avg_stats(&runtime_nsecs_stats[cpu]); - - if (total) { - ratio = avg / total; - fprintf(output, " # %8.3f GHz ", ratio); - } - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX))) { - total = avg_stats(&runtime_cycles_stats[cpu]); - if (total) - fprintf(output, - " # %5.2f%% transactional cycles ", - 100.0 * (avg / total)); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_CYCLES_IN_TX_CP))) { - total = avg_stats(&runtime_cycles_stats[cpu]); - total2 = avg_stats(&runtime_cycles_in_tx_stats[cpu]); - if (total2 < avg) - total2 = avg; - if (total) - fprintf(output, - " # %5.2f%% aborted cycles ", - 100.0 * ((total2-avg) / total)); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_TRANSACTION_START)) && - avg > 0 && - runtime_cycles_in_tx_stats[cpu].n != 0) { - total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); - - if (total) - ratio = total / avg; - - fprintf(output, " # %8.0f cycles / transaction ", ratio); - } else if (transaction_run && - perf_evsel__cmp(evsel, nth_evsel(T_ELISION_START)) && - avg > 0 && - runtime_cycles_in_tx_stats[cpu].n != 0) { - total = avg_stats(&runtime_cycles_in_tx_stats[cpu]); - - if (total) - ratio = total / avg; - - fprintf(output, " # %8.0f cycles / elision ", ratio); - } else if (runtime_nsecs_stats[cpu].n != 0) { - char unit = 'M'; - - total = avg_stats(&runtime_nsecs_stats[cpu]); - - if (total) - ratio = 1000.0 * avg / total; - if (ratio < 0.001) { - ratio *= 1000; - unit = 'K'; - } - - fprintf(output, " # %8.3f %c/sec ", ratio, unit); - } else { - fprintf(output, " "); - } + perf_stat__print_shadow_stats(output, evsel, avg, cpu, aggr_mode); } static void print_aggr(char *prefix) @@ -1221,9 +711,9 @@ static void print_aggr(char *prefix) s2 = aggr_get_id(evsel_list->cpus, cpu2); if (s2 != id) continue; - val += counter->counts->cpu[cpu].val; - ena += counter->counts->cpu[cpu].ena; - run += counter->counts->cpu[cpu].run; + val += perf_counts(counter->counts, cpu, 0)->val; + ena += perf_counts(counter->counts, cpu, 0)->ena; + run += perf_counts(counter->counts, cpu, 0)->run; nr++; } if (prefix) @@ -1249,6 +739,7 @@ static void print_aggr(char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1259,18 +750,49 @@ static void print_aggr(char *prefix) else abs_printout(id, nr, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } + print_running(run, ena); fputc('\n', output); } } } +static void print_aggr_thread(struct perf_evsel *counter, char *prefix) +{ + int nthreads = thread_map__nr(counter->threads); + int ncpus = cpu_map__nr(counter->cpus); + int cpu, thread; + double uval; + + for (thread = 0; thread < nthreads; thread++) { + u64 ena = 0, run = 0, val = 0; + + for (cpu = 0; cpu < ncpus; cpu++) { + val += perf_counts(counter->counts, cpu, thread)->val; + ena += perf_counts(counter->counts, cpu, thread)->ena; + run += perf_counts(counter->counts, cpu, thread)->run; + } + + if (prefix) + fprintf(output, "%s", prefix); + + uval = val * counter->scale; + + if (nsec_counter(counter)) + nsec_printout(thread, 0, counter, uval); + else + abs_printout(thread, 0, counter, uval); + + if (!csv_output) + print_noise(counter, 1.0); + + print_running(run, ena); + fputc('\n', output); + } +} + /* * Print out the results of a single counter: * aggregated counts in system-wide mode @@ -1281,11 +803,15 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) double avg = avg_stats(&ps->res_stats[0]); int scaled = counter->counts->scaled; double uval; + double avg_enabled, avg_running; + + avg_enabled = avg_stats(&ps->res_stats[1]); + avg_running = avg_stats(&ps->res_stats[2]); if (prefix) fprintf(output, "%s", prefix); - if (scaled == -1) { + if (scaled == -1 || !counter->supported) { fprintf(output, "%*s%s", csv_output ? 0 : 18, counter->supported ? CNTR_NOT_COUNTED : CNTR_NOT_SUPPORTED, @@ -1300,6 +826,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) if (counter->cgrp) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(avg_running, avg_enabled); fputc('\n', output); return; } @@ -1313,19 +840,7 @@ static void print_counter_aggr(struct perf_evsel *counter, char *prefix) print_noise(counter, avg); - if (csv_output) { - fputc('\n', output); - return; - } - - if (scaled) { - double avg_enabled, avg_running; - - avg_enabled = avg_stats(&ps->res_stats[1]); - avg_running = avg_stats(&ps->res_stats[2]); - - fprintf(output, " [%5.2f%%]", 100 * avg_running / avg_enabled); - } + print_running(avg_running, avg_enabled); fprintf(output, "\n"); } @@ -1340,9 +855,9 @@ static void print_counter(struct perf_evsel *counter, char *prefix) int cpu; for (cpu = 0; cpu < perf_evsel__nr_cpus(counter); cpu++) { - val = counter->counts->cpu[cpu].val; - ena = counter->counts->cpu[cpu].ena; - run = counter->counts->cpu[cpu].run; + val = perf_counts(counter->counts, cpu, 0)->val; + ena = perf_counts(counter->counts, cpu, 0)->ena; + run = perf_counts(counter->counts, cpu, 0)->run; if (prefix) fprintf(output, "%s", prefix); @@ -1367,6 +882,7 @@ static void print_counter(struct perf_evsel *counter, char *prefix) fprintf(output, "%s%s", csv_sep, counter->cgrp->name); + print_running(run, ena); fputc('\n', output); continue; } @@ -1378,20 +894,46 @@ static void print_counter(struct perf_evsel *counter, char *prefix) else abs_printout(cpu, 0, counter, uval); - if (!csv_output) { + if (!csv_output) print_noise(counter, 1.0); + print_running(run, ena); - if (run != ena) - fprintf(output, " (%.2f%%)", - 100.0 * run / ena); - } fputc('\n', output); } } -static void print_stat(int argc, const char **argv) +static void print_interval(char *prefix, struct timespec *ts) +{ + static int num_print_interval; + + sprintf(prefix, "%6lu.%09lu%s", ts->tv_sec, ts->tv_nsec, csv_sep); + + if (num_print_interval == 0 && !csv_output) { + switch (aggr_mode) { + case AGGR_SOCKET: + fprintf(output, "# time socket cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_CORE: + fprintf(output, "# time core cpus counts %*s events\n", unit_width, "unit"); + break; + case AGGR_NONE: + fprintf(output, "# time CPU counts %*s events\n", unit_width, "unit"); + break; + case AGGR_THREAD: + fprintf(output, "# time comm-pid counts %*s events\n", unit_width, "unit"); + break; + case AGGR_GLOBAL: + default: + fprintf(output, "# time counts %*s events\n", unit_width, "unit"); + } + } + + if (++num_print_interval == 25) + num_print_interval = 0; +} + +static void print_header(int argc, const char **argv) { - struct perf_evsel *counter; int i; fflush(stdout); @@ -1417,36 +959,57 @@ static void print_stat(int argc, const char **argv) fprintf(output, " (%d runs)", run_count); fprintf(output, ":\n\n"); } +} + +static void print_footer(void) +{ + if (!null_run) + fprintf(output, "\n"); + fprintf(output, " %17.9f seconds time elapsed", + avg_stats(&walltime_nsecs_stats)/1e9); + if (run_count > 1) { + fprintf(output, " "); + print_noise_pct(stddev_stats(&walltime_nsecs_stats), + avg_stats(&walltime_nsecs_stats)); + } + fprintf(output, "\n\n"); +} + +static void print_counters(struct timespec *ts, int argc, const char **argv) +{ + struct perf_evsel *counter; + char buf[64], *prefix = NULL; + + if (interval) + print_interval(prefix = buf, ts); + else + print_header(argc, argv); switch (aggr_mode) { case AGGR_CORE: case AGGR_SOCKET: - print_aggr(NULL); + print_aggr(prefix); + break; + case AGGR_THREAD: + evlist__for_each(evsel_list, counter) + print_aggr_thread(counter, prefix); break; case AGGR_GLOBAL: evlist__for_each(evsel_list, counter) - print_counter_aggr(counter, NULL); + print_counter_aggr(counter, prefix); break; case AGGR_NONE: evlist__for_each(evsel_list, counter) - print_counter(counter, NULL); + print_counter(counter, prefix); break; default: break; } - if (!csv_output) { - if (!null_run) - fprintf(output, "\n"); - fprintf(output, " %17.9f seconds time elapsed", - avg_stats(&walltime_nsecs_stats)/1e9); - if (run_count > 1) { - fprintf(output, " "); - print_noise_pct(stddev_stats(&walltime_nsecs_stats), - avg_stats(&walltime_nsecs_stats)); - } - fprintf(output, "\n\n"); - } + if (!interval && !csv_output) + print_footer(); + + fflush(output); } static volatile int signr = -1; @@ -1518,23 +1081,13 @@ static int perf_stat_init_aggr_mode(void) break; case AGGR_NONE: case AGGR_GLOBAL: + case AGGR_THREAD: default: break; } return 0; } -static int setup_events(const char * const *attrs, unsigned len) -{ - unsigned i; - - for (i = 0; i < len; i++) { - if (parse_events(evsel_list, attrs[i])) - return -1; - } - return 0; -} - /* * Add default attributes, if there were no attributes specified or * if -d/--detailed, -d -d or -d -d -d is used: @@ -1656,12 +1209,10 @@ static int add_default_attributes(void) int err; if (pmu_have_event("cpu", "cycles-ct") && pmu_have_event("cpu", "el-start")) - err = setup_events(transaction_attrs, - ARRAY_SIZE(transaction_attrs)); + err = parse_events(evsel_list, transaction_attrs, NULL); else - err = setup_events(transaction_limited_attrs, - ARRAY_SIZE(transaction_limited_attrs)); - if (err < 0) { + err = parse_events(evsel_list, transaction_limited_attrs, NULL); + if (err) { fprintf(stderr, "Cannot set up transaction events\n"); return -1; } @@ -1755,6 +1306,8 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) "aggregate counts per processor socket", AGGR_SOCKET), OPT_SET_UINT(0, "per-core", &aggr_mode, "aggregate counts per physical processor core", AGGR_CORE), + OPT_SET_UINT(0, "per-thread", &aggr_mode, + "aggregate counts per thread", AGGR_THREAD), OPT_UINTEGER('D', "delay", &initial_delay, "ms to wait before starting measurement after program start"), OPT_END() @@ -1846,8 +1399,19 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) run_count = 1; } - /* no_aggr, cgroup are for system-wide only */ - if ((aggr_mode != AGGR_GLOBAL || nr_cgroups) && + if ((aggr_mode == AGGR_THREAD) && !target__has_task(&target)) { + fprintf(stderr, "The --per-thread option is only available " + "when monitoring via -p -t options.\n"); + parse_options_usage(NULL, options, "p", 1); + parse_options_usage(NULL, options, "t", 1); + goto out; + } + + /* + * no_aggr, cgroup are for system-wide only + * --per-thread is aggregated per thread, we dont mix it with cpu mode + */ + if (((aggr_mode != AGGR_GLOBAL && aggr_mode != AGGR_THREAD) || nr_cgroups) && !target__has_cpu(&target)) { fprintf(stderr, "both cgroup and no-aggregation " "modes only available in system-wide mode\n"); @@ -1875,6 +1439,14 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) } goto out; } + + /* + * Initialize thread_map with comm names, + * so we could print it out on output. + */ + if (aggr_mode == AGGR_THREAD) + thread_map__read_comms(evsel_list->threads); + if (interval && interval < 100) { pr_err("print interval must be >= 100ms\n"); parse_options_usage(stat_usage, options, "I", 1); @@ -1908,13 +1480,13 @@ int cmd_stat(int argc, const char **argv, const char *prefix __maybe_unused) status = run_perf_stat(argc, argv); if (forever && status != -1) { - print_stat(argc, argv); - perf_stat__reset_stats(evsel_list); + print_counters(NULL, argc, argv); + perf_stat__reset_stats(); } } if (!forever && status != -1 && !interval) - print_stat(argc, argv); + print_counters(NULL, argc, argv); perf_evlist__free_stats(evsel_list); out: diff --git a/tools/perf/builtin-timechart.c b/tools/perf/builtin-timechart.c index f3bb1a4bf060..30e59620179d 100644 --- a/tools/perf/builtin-timechart.c +++ b/tools/perf/builtin-timechart.c @@ -61,10 +61,11 @@ struct timechart { tasks_only, with_backtrace, topology; + bool force; /* IO related settings */ - u64 io_events; bool io_only, skip_eagain; + u64 io_events; u64 min_time, merge_dist; }; @@ -522,7 +523,7 @@ static const char *cat_backtrace(union perf_event *event, * Discard all. */ zfree(&p); - goto exit; + goto exit_put; } continue; } @@ -537,7 +538,8 @@ static const char *cat_backtrace(union perf_event *event, else fprintf(f, "..... %016" PRIx64 "\n", ip); } - +exit_put: + addr_location__put(&al); exit: fclose(f); @@ -1598,6 +1600,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = tchart->force, }; struct perf_session *session = perf_session__new(&file, false, @@ -1623,7 +1626,7 @@ static int __cmd_timechart(struct timechart *tchart, const char *output_name) goto out_delete; } - ret = perf_session__process_events(session, &tchart->tool); + ret = perf_session__process_events(session); if (ret) goto out_delete; @@ -1956,9 +1959,11 @@ int cmd_timechart(int argc, const char **argv, OPT_CALLBACK(0, "io-merge-dist", &tchart.merge_dist, "time", "merge events that are merge-dist us apart", parse_time), + OPT_BOOLEAN('f', "force", &tchart.force, "don't complain, do it"), OPT_END() }; - const char * const timechart_usage[] = { + const char * const timechart_subcommands[] = { "record", NULL }; + const char *timechart_usage[] = { "perf timechart [<options>] {record}", NULL }; @@ -1976,8 +1981,8 @@ int cmd_timechart(int argc, const char **argv, "perf timechart record [<options>]", NULL }; - argc = parse_options(argc, argv, timechart_options, timechart_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + argc = parse_options_subcommand(argc, argv, timechart_options, timechart_subcommands, + timechart_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (tchart.power_only && tchart.tasks_only) { pr_err("-P and -T options cannot be used at the same time.\n"); diff --git a/tools/perf/builtin-top.c b/tools/perf/builtin-top.c index c4c7eac69de4..ecf319728f25 100644 --- a/tools/perf/builtin-top.c +++ b/tools/perf/builtin-top.c @@ -235,10 +235,13 @@ static void perf_top__show_details(struct perf_top *top) more = symbol__annotate_printf(symbol, he->ms.map, top->sym_evsel, 0, top->sym_pcnt_filter, top->print_entries, 4); - if (top->zero) - symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); - else - symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); + + if (top->evlist->enabled) { + if (top->zero) + symbol__annotate_zero_histogram(symbol, top->sym_evsel->idx); + else + symbol__annotate_decay_histogram(symbol, top->sym_evsel->idx); + } if (more != 0) printf("%d lines not displayed, maybe increase display entries [e]\n", more); out_unlock: @@ -276,11 +279,13 @@ static void perf_top__print_sym_table(struct perf_top *top) return; } - if (top->zero) { - hists__delete_entries(hists); - } else { - hists__decay_entries(hists, top->hide_user_symbols, - top->hide_kernel_symbols); + if (top->evlist->enabled) { + if (top->zero) { + hists__delete_entries(hists); + } else { + hists__decay_entries(hists, top->hide_user_symbols, + top->hide_kernel_symbols); + } } hists__collapse_resort(hists, NULL); @@ -545,11 +550,13 @@ static void perf_top__sort_new_samples(void *arg) hists = evsel__hists(t->sym_evsel); - if (t->zero) { - hists__delete_entries(hists); - } else { - hists__decay_entries(hists, t->hide_user_symbols, - t->hide_kernel_symbols); + if (t->evlist->enabled) { + if (t->zero) { + hists__delete_entries(hists); + } else { + hists__decay_entries(hists, t->hide_user_symbols, + t->hide_kernel_symbols); + } } hists__collapse_resort(hists, NULL); @@ -579,7 +586,8 @@ static void *display_thread_tui(void *arg) hists->uid_filter_str = top->record_opts.target.uid_str; } - perf_evlist__tui_browse_hists(top->evlist, help, &hbt, top->min_percent, + perf_evlist__tui_browse_hists(top->evlist, help, &hbt, + top->min_percent, &top->session->header.env); done = 1; @@ -716,7 +724,7 @@ static void perf_event__process_sample(struct perf_tool *tool, if (!machine) { pr_err("%u unprocessable samples recorded.\r", - top->session->stats.nr_unprocessable_samples++); + top->session->evlist->stats.nr_unprocessable_samples++); return; } @@ -733,7 +741,7 @@ static void perf_event__process_sample(struct perf_tool *tool, "Kernel address maps (/proc/{kallsyms,modules}) are restricted.\n\n" "Check /proc/sys/kernel/kptr_restrict.\n\n" "Kernel%s samples will not be resolved.\n", - !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? + al.map && !RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION]) ? " modules" : ""); if (use_browser <= 0) sleep(5); @@ -757,8 +765,10 @@ static void perf_event__process_sample(struct perf_tool *tool, al.map == machine->vmlinux_maps[MAP__FUNCTION] && RB_EMPTY_ROOT(&al.map->dso->symbols[MAP__FUNCTION])) { if (symbol_conf.vmlinux_name) { - ui__warning("The %s file can't be used.\n%s", - symbol_conf.vmlinux_name, msg); + char serr[256]; + dso__strerror_load(al.map->dso, serr, sizeof(serr)); + ui__warning("The %s file can't be used: %s\n%s", + symbol_conf.vmlinux_name, serr, msg); } else { ui__warning("A vmlinux file was not found.\n%s", msg); @@ -773,7 +783,9 @@ static void perf_event__process_sample(struct perf_tool *tool, if (al.sym == NULL || !al.sym->ignore) { struct hists *hists = evsel__hists(evsel); struct hist_entry_iter iter = { - .add_entry_cb = hist_iter__top_callback, + .evsel = evsel, + .sample = sample, + .add_entry_cb = hist_iter__top_callback, }; if (symbol_conf.cumulate_callchain) @@ -783,15 +795,14 @@ static void perf_event__process_sample(struct perf_tool *tool, pthread_mutex_lock(&hists->lock); - err = hist_entry_iter__add(&iter, &al, evsel, sample, - top->max_stack, top); + err = hist_entry_iter__add(&iter, &al, top->max_stack, top); if (err < 0) pr_err("Problem incrementing symbol period, skipping event\n"); pthread_mutex_unlock(&hists->lock); } - return; + addr_location__put(&al); } static void perf_top__mmap_read_idx(struct perf_top *top, int idx) @@ -856,7 +867,7 @@ static void perf_top__mmap_read_idx(struct perf_top *top, int idx) hists__inc_nr_events(evsel__hists(evsel), event->header.type); machine__process_event(machine, event, &sample); } else - ++session->stats.nr_unknown_events; + ++session->evlist->stats.nr_unknown_events; next_event: perf_evlist__mmap_consume(top->evlist, idx); } @@ -948,7 +959,7 @@ static int __cmd_top(struct perf_top *top) goto out_delete; machine__synthesize_threads(&top->session->machines.host, &opts->target, - top->evlist->threads, false); + top->evlist->threads, false, opts->proc_map_timeout); ret = perf_top__start_counters(top); if (ret) goto out_delete; @@ -1058,6 +1069,7 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) .target = { .uses_mmap = true, }, + .proc_map_timeout = 500, }, .max_stack = PERF_MAX_STACK_DEPTH, .sym_pcnt_filter = 5, @@ -1157,6 +1169,8 @@ int cmd_top(int argc, const char **argv, const char *prefix __maybe_unused) OPT_STRING('w', "column-widths", &symbol_conf.col_width_list_str, "width[,width...]", "don't try to adjust column width, use these fixed values"), + OPT_UINTEGER(0, "proc-map-timeout", &opts->proc_map_timeout, + "per thread proc mmap processing timeout in ms"), OPT_END() }; const char * const top_usage[] = { diff --git a/tools/perf/builtin-trace.c b/tools/perf/builtin-trace.c index 7e935f1083ec..39ad4d0ca884 100644 --- a/tools/perf/builtin-trace.c +++ b/tools/perf/builtin-trace.c @@ -16,7 +16,6 @@ #include <libaudit.h> #include <stdlib.h> -#include <sys/eventfd.h> #include <sys/mman.h> #include <linux/futex.h> @@ -41,6 +40,51 @@ # define EFD_SEMAPHORE 1 #endif +#ifndef EFD_NONBLOCK +# define EFD_NONBLOCK 00004000 +#endif + +#ifndef EFD_CLOEXEC +# define EFD_CLOEXEC 02000000 +#endif + +#ifndef O_CLOEXEC +# define O_CLOEXEC 02000000 +#endif + +#ifndef SOCK_DCCP +# define SOCK_DCCP 6 +#endif + +#ifndef SOCK_CLOEXEC +# define SOCK_CLOEXEC 02000000 +#endif + +#ifndef SOCK_NONBLOCK +# define SOCK_NONBLOCK 00004000 +#endif + +#ifndef MSG_CMSG_CLOEXEC +# define MSG_CMSG_CLOEXEC 0x40000000 +#endif + +#ifndef PERF_FLAG_FD_NO_GROUP +# define PERF_FLAG_FD_NO_GROUP (1UL << 0) +#endif + +#ifndef PERF_FLAG_FD_OUTPUT +# define PERF_FLAG_FD_OUTPUT (1UL << 1) +#endif + +#ifndef PERF_FLAG_PID_CGROUP +# define PERF_FLAG_PID_CGROUP (1UL << 2) /* pid=cgroup id, per-cpu mode only */ +#endif + +#ifndef PERF_FLAG_FD_CLOEXEC +# define PERF_FLAG_FD_CLOEXEC (1UL << 3) /* O_CLOEXEC */ +#endif + + struct tp_field { int offset; union { @@ -52,7 +96,9 @@ struct tp_field { #define TP_UINT_FIELD(bits) \ static u64 tp_field__u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - return *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ + return value; \ } TP_UINT_FIELD(8); @@ -63,7 +109,8 @@ TP_UINT_FIELD(64); #define TP_UINT_FIELD__SWAPPED(bits) \ static u64 tp_field__swapped_u##bits(struct tp_field *field, struct perf_sample *sample) \ { \ - u##bits value = *(u##bits *)(sample->raw_data + field->offset); \ + u##bits value; \ + memcpy(&value, sample->raw_data + field->offset, sizeof(value)); \ return bswap_##bits(value);\ } @@ -328,6 +375,14 @@ static size_t syscall_arg__scnprintf_hex(char *bf, size_t size, #define SCA_HEX syscall_arg__scnprintf_hex +static size_t syscall_arg__scnprintf_int(char *bf, size_t size, + struct syscall_arg *arg) +{ + return scnprintf(bf, size, "%d", arg->val); +} + +#define SCA_INT syscall_arg__scnprintf_int + static size_t syscall_arg__scnprintf_mmap_prot(char *bf, size_t size, struct syscall_arg *arg) { @@ -780,6 +835,34 @@ static size_t syscall_arg__scnprintf_open_flags(char *bf, size_t size, #define SCA_OPEN_FLAGS syscall_arg__scnprintf_open_flags +static size_t syscall_arg__scnprintf_perf_flags(char *bf, size_t size, + struct syscall_arg *arg) +{ + int printed = 0, flags = arg->val; + + if (flags == 0) + return 0; + +#define P_FLAG(n) \ + if (flags & PERF_FLAG_##n) { \ + printed += scnprintf(bf + printed, size - printed, "%s%s", printed ? "|" : "", #n); \ + flags &= ~PERF_FLAG_##n; \ + } + + P_FLAG(FD_NO_GROUP); + P_FLAG(FD_OUTPUT); + P_FLAG(PID_CGROUP); + P_FLAG(FD_CLOEXEC); +#undef P_FLAG + + if (flags) + printed += scnprintf(bf + printed, size - printed, "%s%#x", printed ? "|" : "", flags); + + return printed; +} + +#define SCA_PERF_FLAGS syscall_arg__scnprintf_perf_flags + static size_t syscall_arg__scnprintf_eventfd_flags(char *bf, size_t size, struct syscall_arg *arg) { @@ -1047,6 +1130,11 @@ static struct syscall_fmt { { .name = "openat", .errmsg = true, .arg_scnprintf = { [0] = SCA_FDAT, /* dfd */ [2] = SCA_OPEN_FLAGS, /* flags */ }, }, + { .name = "perf_event_open", .errmsg = true, + .arg_scnprintf = { [1] = SCA_INT, /* pid */ + [2] = SCA_INT, /* cpu */ + [3] = SCA_FD, /* group_fd */ + [4] = SCA_PERF_FLAGS, /* flags */ }, }, { .name = "pipe2", .errmsg = true, .arg_scnprintf = { [1] = SCA_PIPE_FLAGS, /* flags */ }, }, { .name = "poll", .errmsg = true, .timeout = true, }, @@ -1132,6 +1220,8 @@ static struct syscall_fmt *syscall_fmt__find(const char *name) struct syscall { struct event_format *tp_format; + int nr_args; + struct format_field *args; const char *name; bool filtered; bool is_exit; @@ -1219,7 +1309,9 @@ struct trace { struct syscall *table; } syscalls; struct record_opts opts; + struct perf_evlist *evlist; struct machine *host; + struct thread *current; u64 base_time; FILE *output; unsigned long nr_events; @@ -1227,6 +1319,10 @@ struct trace { const char *last_vfs_getname; struct intlist *tid_list; struct intlist *pid_list; + struct { + size_t nr; + pid_t *entries; + } filter_pids; double duration_filter; double runtime_ms; struct { @@ -1243,6 +1339,7 @@ struct trace { bool show_comm; bool show_tool_stats; bool trace_syscalls; + bool force; int trace_pgfaults; }; @@ -1421,7 +1518,8 @@ static int trace__symbols_init(struct trace *trace, struct perf_evlist *evlist) return -ENOMEM; err = __machine__synthesize_threads(trace->host, &trace->tool, &trace->opts.target, - evlist->threads, trace__tool_process, false); + evlist->threads, trace__tool_process, false, + trace->opts.proc_map_timeout); if (err) symbol__exit(); @@ -1433,14 +1531,14 @@ static int syscall__set_arg_fmts(struct syscall *sc) struct format_field *field; int idx = 0; - sc->arg_scnprintf = calloc(sc->tp_format->format.nr_fields - 1, sizeof(void *)); + sc->arg_scnprintf = calloc(sc->nr_args, sizeof(void *)); if (sc->arg_scnprintf == NULL) return -1; if (sc->fmt) sc->arg_parm = sc->fmt->arg_parm; - for (field = sc->tp_format->format.fields->next; field; field = field->next) { + for (field = sc->args; field; field = field->next) { if (sc->fmt && sc->fmt->arg_scnprintf[idx]) sc->arg_scnprintf[idx] = sc->fmt->arg_scnprintf[idx]; else if (field->flags & FIELD_IS_POINTER) @@ -1506,18 +1604,65 @@ static int trace__read_syscall_info(struct trace *trace, int id) if (sc->tp_format == NULL) return -1; + sc->args = sc->tp_format->format.fields; + sc->nr_args = sc->tp_format->format.nr_fields; + /* drop nr field - not relevant here; does not exist on older kernels */ + if (sc->args && strcmp(sc->args->name, "nr") == 0) { + sc->args = sc->args->next; + --sc->nr_args; + } + sc->is_exit = !strcmp(name, "exit_group") || !strcmp(name, "exit"); return syscall__set_arg_fmts(sc); } +static int trace__validate_ev_qualifier(struct trace *trace) +{ + int err = 0; + struct str_node *pos; + + strlist__for_each(pos, trace->ev_qualifier) { + const char *sc = pos->s; + + if (audit_name_to_syscall(sc, trace->audit.machine) < 0) { + if (err == 0) { + fputs("Error:\tInvalid syscall ", trace->output); + err = -EINVAL; + } else { + fputs(", ", trace->output); + } + + fputs(sc, trace->output); + } + } + + if (err < 0) { + fputs("\nHint:\ttry 'perf list syscalls:sys_enter_*'" + "\nHint:\tand: 'man syscalls'\n", trace->output); + } + + return err; +} + +/* + * args is to be interpreted as a series of longs but we need to handle + * 8-byte unaligned accesses. args points to raw_data within the event + * and raw_data is guaranteed to be 8-byte unaligned because it is + * preceded by raw_size which is a u32. So we need to copy args to a temp + * variable to read it. Most notably this avoids extended load instructions + * on unaligned addresses + */ + static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, - unsigned long *args, struct trace *trace, + unsigned char *args, struct trace *trace, struct thread *thread) { size_t printed = 0; + unsigned char *p; + unsigned long val; - if (sc->tp_format != NULL) { + if (sc->args != NULL) { struct format_field *field; u8 bit = 1; struct syscall_arg arg = { @@ -1527,16 +1672,21 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, .thread = thread, }; - for (field = sc->tp_format->format.fields->next; field; + for (field = sc->args; field; field = field->next, ++arg.idx, bit <<= 1) { if (arg.mask & bit) continue; + + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * arg.idx; + memcpy(&val, p, sizeof(val)); + /* * Suppress this argument if its value is zero and * and we don't have a string associated in an * strarray for it. */ - if (args[arg.idx] == 0 && + if (val == 0 && !(sc->arg_scnprintf && sc->arg_scnprintf[arg.idx] == SCA_STRARRAY && sc->arg_parm[arg.idx])) @@ -1545,23 +1695,26 @@ static size_t syscall__scnprintf_args(struct syscall *sc, char *bf, size_t size, printed += scnprintf(bf + printed, size - printed, "%s%s: ", printed ? ", " : "", field->name); if (sc->arg_scnprintf && sc->arg_scnprintf[arg.idx]) { - arg.val = args[arg.idx]; + arg.val = val; if (sc->arg_parm) arg.parm = sc->arg_parm[arg.idx]; printed += sc->arg_scnprintf[arg.idx](bf + printed, size - printed, &arg); } else { printed += scnprintf(bf + printed, size - printed, - "%ld", args[arg.idx]); + "%ld", val); } } } else { int i = 0; while (i < 6) { + /* special care for unaligned accesses */ + p = args + sizeof(unsigned long) * i; + memcpy(&val, p, sizeof(val)); printed += scnprintf(bf + printed, size - printed, "%sarg%d: %ld", - printed ? ", " : "", i, args[i]); + printed ? ", " : "", i, val); ++i; } } @@ -1642,6 +1795,29 @@ static void thread__update_stats(struct thread_trace *ttrace, update_stats(stats, duration); } +static int trace__printf_interrupted_entry(struct trace *trace, struct perf_sample *sample) +{ + struct thread_trace *ttrace; + u64 duration; + size_t printed; + + if (trace->current == NULL) + return 0; + + ttrace = thread__priv(trace->current); + + if (!ttrace->entry_pending) + return 0; + + duration = sample->time - ttrace->entry_time; + + printed = trace__fprintf_entry_head(trace, trace->current, duration, sample->time, trace->output); + printed += fprintf(trace->output, "%-70s) ...\n", ttrace->entry_str); + ttrace->entry_pending = false; + + return printed; +} + static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, union perf_event *event __maybe_unused, struct perf_sample *sample) @@ -1650,7 +1826,7 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, void *args; size_t printed = 0; struct thread *thread; - int id = perf_evsel__sc_tp_uint(evsel, id, sample); + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1663,16 +1839,19 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) - return -1; + goto out_put; args = perf_evsel__sc_tp_ptr(evsel, args, sample); if (ttrace->entry_str == NULL) { ttrace->entry_str = malloc(1024); if (!ttrace->entry_str) - return -1; + goto out_put; } + if (!trace->summary_only) + trace__printf_interrupted_entry(trace, sample); + ttrace->entry_time = sample->time; msg = ttrace->entry_str; printed += scnprintf(msg + printed, 1024 - printed, "%s(", sc->name); @@ -1688,7 +1867,14 @@ static int trace__sys_enter(struct trace *trace, struct perf_evsel *evsel, } else ttrace->entry_pending = true; - return 0; + if (trace->current != thread) { + thread__put(trace->current); + trace->current = thread__get(thread); + } + err = 0; +out_put: + thread__put(thread); + return err; } static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, @@ -1698,7 +1884,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, long ret; u64 duration = 0; struct thread *thread; - int id = perf_evsel__sc_tp_uint(evsel, id, sample); + int id = perf_evsel__sc_tp_uint(evsel, id, sample), err = -1; struct syscall *sc = trace__syscall_info(trace, evsel, id); struct thread_trace *ttrace; @@ -1711,7 +1897,7 @@ static int trace__sys_exit(struct trace *trace, struct perf_evsel *evsel, thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) - return -1; + goto out_put; if (trace->summary) thread__update_stats(ttrace, id, sample); @@ -1765,8 +1951,10 @@ signed_print: fputc('\n', trace->output); out: ttrace->entry_pending = false; - - return 0; + err = 0; +out_put: + thread__put(thread); + return err; } static int trace__vfs_getname(struct trace *trace, struct perf_evsel *evsel, @@ -1793,6 +1981,7 @@ static int trace__sched_stat_runtime(struct trace *trace, struct perf_evsel *evs ttrace->runtime_ms += runtime_ms; trace->runtime_ms += runtime_ms; + thread__put(thread); return 0; out_dump: @@ -1802,6 +1991,29 @@ out_dump: (pid_t)perf_evsel__intval(evsel, sample, "pid"), runtime, perf_evsel__intval(evsel, sample, "vruntime")); + thread__put(thread); + return 0; +} + +static int trace__event_handler(struct trace *trace, struct perf_evsel *evsel, + union perf_event *event __maybe_unused, + struct perf_sample *sample) +{ + trace__printf_interrupted_entry(trace, sample); + trace__fprintf_tstamp(trace, sample->time, trace->output); + + if (trace->trace_syscalls) + fprintf(trace->output, "( ): "); + + fprintf(trace->output, "%s:", evsel->name); + + if (evsel->tp_format) { + event_format__fprintf(evsel->tp_format, sample->cpu, + sample->raw_data, sample->raw_size, + trace->output); + } + + fprintf(trace->output, ")\n"); return 0; } @@ -1832,11 +2044,12 @@ static int trace__pgfault(struct trace *trace, struct addr_location al; char map_type = 'd'; struct thread_trace *ttrace; + int err = -1; thread = machine__findnew_thread(trace->host, sample->pid, sample->tid); ttrace = thread__trace(thread, trace->output); if (ttrace == NULL) - return -1; + goto out_put; if (evsel->attr.config == PERF_COUNT_SW_PAGE_FAULTS_MAJ) ttrace->pfmaj++; @@ -1844,7 +2057,7 @@ static int trace__pgfault(struct trace *trace, ttrace->pfmin++; if (trace->summary_only) - return 0; + goto out; thread__find_addr_location(thread, cpumode, MAP__FUNCTION, sample->ip, &al); @@ -1875,8 +2088,11 @@ static int trace__pgfault(struct trace *trace, print_location(trace->output, sample, &al, true, false); fprintf(trace->output, " (%c%c)\n", map_type, al.level); - - return 0; +out: + err = 0; +out_put: + thread__put(thread); + return err; } static bool skip_sample(struct trace *trace, struct perf_sample *sample) @@ -2037,10 +2253,39 @@ static int perf_evlist__add_pgfault(struct perf_evlist *evlist, return 0; } -static int trace__run(struct trace *trace, int argc, const char **argv) +static void trace__handle_event(struct trace *trace, union perf_event *event, struct perf_sample *sample) { - struct perf_evlist *evlist = perf_evlist__new(); + const u32 type = event->header.type; struct perf_evsel *evsel; + + if (!trace->full_time && trace->base_time == 0) + trace->base_time = sample->time; + + if (type != PERF_RECORD_SAMPLE) { + trace__process_event(trace, trace->host, event, sample); + return; + } + + evsel = perf_evlist__id2evsel(trace->evlist, sample->id); + if (evsel == NULL) { + fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample->id); + return; + } + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT && + sample->raw_data == NULL) { + fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", + perf_evsel__name(evsel), sample->tid, + sample->cpu, sample->raw_size); + } else { + tracepoint_handler handler = evsel->handler; + handler(trace, evsel, event, sample); + } +} + +static int trace__run(struct trace *trace, int argc, const char **argv) +{ + struct perf_evlist *evlist = trace->evlist; int err = -1, i; unsigned long before; const bool forks = argc > 0; @@ -2048,11 +2293,6 @@ static int trace__run(struct trace *trace, int argc, const char **argv) trace->live = true; - if (evlist == NULL) { - fprintf(trace->output, "Not enough memory to run!\n"); - goto out; - } - if (trace->trace_syscalls && perf_evlist__add_syscall_newtp(evlist, trace__sys_enter, trace__sys_exit)) @@ -2105,16 +2345,35 @@ static int trace__run(struct trace *trace, int argc, const char **argv) if (err < 0) goto out_error_open; + /* + * Better not use !target__has_task() here because we need to cover the + * case where no threads were specified in the command line, but a + * workload was, and in that case we will fill in the thread_map when + * we fork the workload in perf_evlist__prepare_workload. + */ + if (trace->filter_pids.nr > 0) + err = perf_evlist__set_filter_pids(evlist, trace->filter_pids.nr, trace->filter_pids.entries); + else if (thread_map__pid(evlist->threads, 0) == -1) + err = perf_evlist__set_filter_pid(evlist, getpid()); + + if (err < 0) { + printf("err=%d,%s\n", -err, strerror(-err)); + exit(1); + } + err = perf_evlist__mmap(evlist, trace->opts.mmap_pages, false); if (err < 0) goto out_error_mmap; - perf_evlist__enable(evlist); + if (!target__none(&trace->opts.target)) + perf_evlist__enable(evlist); if (forks) perf_evlist__start_workload(evlist); - trace->multiple_threads = evlist->threads->map[0] == -1 || evlist->threads->nr > 1; + trace->multiple_threads = thread_map__pid(evlist->threads, 0) == -1 || + evlist->threads->nr > 1 || + perf_evlist__first(evlist)->attr.inherit; again: before = trace->nr_events; @@ -2122,8 +2381,6 @@ again: union perf_event *event; while ((event = perf_evlist__mmap_read(evlist, i)) != NULL) { - const u32 type = event->header.type; - tracepoint_handler handler; struct perf_sample sample; ++trace->nr_events; @@ -2134,35 +2391,17 @@ again: goto next_event; } - if (!trace->full_time && trace->base_time == 0) - trace->base_time = sample.time; - - if (type != PERF_RECORD_SAMPLE) { - trace__process_event(trace, trace->host, event, &sample); - continue; - } - - evsel = perf_evlist__id2evsel(evlist, sample.id); - if (evsel == NULL) { - fprintf(trace->output, "Unknown tp ID %" PRIu64 ", skipping...\n", sample.id); - goto next_event; - } - - if (evsel->attr.type == PERF_TYPE_TRACEPOINT && - sample.raw_data == NULL) { - fprintf(trace->output, "%s sample with no payload for tid: %d, cpu %d, raw_size=%d, skipping...\n", - perf_evsel__name(evsel), sample.tid, - sample.cpu, sample.raw_size); - goto next_event; - } - - handler = evsel->handler; - handler(trace, evsel, event, &sample); + trace__handle_event(trace, event, &sample); next_event: perf_evlist__mmap_consume(evlist, i); if (interrupted) goto out_disable; + + if (done && !draining) { + perf_evlist__disable(evlist); + draining = true; + } } } @@ -2180,6 +2419,8 @@ next_event: } out_disable: + thread__zput(trace->current); + perf_evlist__disable(evlist); if (!err) { @@ -2197,7 +2438,7 @@ out_disable: out_delete_evlist: perf_evlist__delete(evlist); -out: + trace->evlist = NULL; trace->live = false; return err; { @@ -2235,6 +2476,7 @@ static int trace__replay(struct trace *trace) struct perf_data_file file = { .path = input_name, .mode = PERF_DATA_MODE_READ, + .force = trace->force, }; struct perf_session *session; struct perf_evsel *evsel; @@ -2309,7 +2551,7 @@ static int trace__replay(struct trace *trace) setup_pager(); - err = perf_session__process_events(session, &trace->tool); + err = perf_session__process_events(session); if (err) pr_err("Failed to process events, error %d", err); @@ -2434,6 +2676,38 @@ static int trace__set_duration(const struct option *opt, const char *str, return 0; } +static int trace__set_filter_pids(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + int ret = -1; + size_t i; + struct trace *trace = opt->value; + /* + * FIXME: introduce a intarray class, plain parse csv and create a + * { int nr, int entries[] } struct... + */ + struct intlist *list = intlist__new(str); + + if (list == NULL) + return -1; + + i = trace->filter_pids.nr = intlist__nr_entries(list) + 1; + trace->filter_pids.entries = calloc(i, sizeof(pid_t)); + + if (trace->filter_pids.entries == NULL) + goto out; + + trace->filter_pids.entries[0] = getpid(); + + for (i = 1; i < trace->filter_pids.nr; ++i) + trace->filter_pids.entries[i] = intlist__entry(list, i - 1)->i; + + intlist__delete(list); + ret = 0; +out: + return ret; +} + static int trace__open_output(struct trace *trace, const char *filename) { struct stat st; @@ -2468,9 +2742,17 @@ static int parse_pagefaults(const struct option *opt, const char *str, return 0; } +static void evlist__set_evsel_handler(struct perf_evlist *evlist, void *handler) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) + evsel->handler = handler; +} + int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const trace_usage[] = { + const char *trace_usage[] = { "perf trace [<options>] [<command>]", "perf trace [<options>] -- <command> [<options>]", "perf trace record [<options>] [<command>]", @@ -2494,6 +2776,7 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) .user_interval = ULLONG_MAX, .no_buffering = true, .mmap_pages = UINT_MAX, + .proc_map_timeout = 500, }, .output = stdout, .show_comm = true, @@ -2502,17 +2785,21 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) const char *output_name = NULL; const char *ev_qualifier_str = NULL; const struct option trace_options[] = { + OPT_CALLBACK(0, "event", &trace.evlist, "event", + "event selector. use 'perf list' to list available events", + parse_events_option), OPT_BOOLEAN(0, "comm", &trace.show_comm, "show the thread COMM next to its id"), OPT_BOOLEAN(0, "tool_stats", &trace.show_tool_stats, "show tool stats"), - OPT_STRING('e', "expr", &ev_qualifier_str, "expr", - "list of events to trace"), + OPT_STRING('e', "expr", &ev_qualifier_str, "expr", "list of syscalls to trace"), OPT_STRING('o', "output", &output_name, "file", "output file name"), OPT_STRING('i', "input", &input_name, "file", "Analyze events in file"), OPT_STRING('p', "pid", &trace.opts.target.pid, "pid", "trace events on existing process id"), OPT_STRING('t', "tid", &trace.opts.target.tid, "tid", "trace events on existing thread id"), + OPT_CALLBACK(0, "filter-pids", &trace, "CSV list of pids", + "pids to filter (by the kernel)", trace__set_filter_pids), OPT_BOOLEAN('a', "all-cpus", &trace.opts.target.system_wide, "system-wide collection from all CPUs"), OPT_STRING('C', "cpu", &trace.opts.target.cpu_list, "cpu", @@ -2538,19 +2825,37 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) OPT_CALLBACK_DEFAULT('F', "pf", &trace.trace_pgfaults, "all|maj|min", "Trace pagefaults", parse_pagefaults, "maj"), OPT_BOOLEAN(0, "syscalls", &trace.trace_syscalls, "Trace syscalls"), + OPT_BOOLEAN('f', "force", &trace.force, "don't complain, do it"), + OPT_UINTEGER(0, "proc-map-timeout", &trace.opts.proc_map_timeout, + "per thread proc mmap processing timeout in ms"), OPT_END() }; + const char * const trace_subcommands[] = { "record", NULL }; int err; char bf[BUFSIZ]; - argc = parse_options(argc, argv, trace_options, trace_usage, - PARSE_OPT_STOP_AT_NON_OPTION); + signal(SIGSEGV, sighandler_dump_stack); + signal(SIGFPE, sighandler_dump_stack); + + trace.evlist = perf_evlist__new(); + + if (trace.evlist == NULL) { + pr_err("Not enough memory to run!\n"); + err = -ENOMEM; + goto out; + } + + argc = parse_options_subcommand(argc, argv, trace_options, trace_subcommands, + trace_usage, PARSE_OPT_STOP_AT_NON_OPTION); if (trace.trace_pgfaults) { trace.opts.sample_address = true; trace.opts.sample_time = true; } + if (trace.evlist->nr_entries > 0) + evlist__set_evsel_handler(trace.evlist, trace__event_handler); + if ((argc >= 1) && (strcmp(argv[0], "record") == 0)) return trace__record(&trace, argc-1, &argv[1]); @@ -2558,7 +2863,8 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) if (trace.summary_only) trace.summary = trace.summary_only; - if (!trace.trace_syscalls && !trace.trace_pgfaults) { + if (!trace.trace_syscalls && !trace.trace_pgfaults && + trace.evlist->nr_entries == 0 /* Was --events used? */) { pr_err("Please specify something to trace.\n"); return -1; } @@ -2584,6 +2890,10 @@ int cmd_trace(int argc, const char **argv, const char *prefix __maybe_unused) err = -ENOMEM; goto out_close; } + + err = trace__validate_ev_qualifier(&trace); + if (err) + goto out_close; } err = target__validate(&trace.opts.target); diff --git a/tools/perf/builtin.h b/tools/perf/builtin.h index b210d62907e4..3688ad29085f 100644 --- a/tools/perf/builtin.h +++ b/tools/perf/builtin.h @@ -37,6 +37,7 @@ extern int cmd_test(int argc, const char **argv, const char *prefix); extern int cmd_trace(int argc, const char **argv, const char *prefix); extern int cmd_inject(int argc, const char **argv, const char *prefix); extern int cmd_mem(int argc, const char **argv, const char *prefix); +extern int cmd_data(int argc, const char **argv, const char *prefix); extern int find_scripts(char **scripts_array, char **scripts_path_array); #endif diff --git a/tools/perf/command-list.txt b/tools/perf/command-list.txt index 0906fc401c52..00fcaf8a5b8d 100644 --- a/tools/perf/command-list.txt +++ b/tools/perf/command-list.txt @@ -7,6 +7,7 @@ perf-archive mainporcelain common perf-bench mainporcelain common perf-buildid-cache mainporcelain common perf-buildid-list mainporcelain common +perf-data mainporcelain common perf-diff mainporcelain common perf-evlist mainporcelain common perf-inject mainporcelain common diff --git a/tools/perf/config/Makefile b/tools/perf/config/Makefile index cc224080b525..094ddaee104c 100644 --- a/tools/perf/config/Makefile +++ b/tools/perf/config/Makefile @@ -11,21 +11,28 @@ ifneq ($(obj-perf),) obj-perf := $(abspath $(obj-perf))/ endif -LIB_INCLUDE := $(srctree)/tools/lib/ +$(shell echo -n > $(OUTPUT).config-detected) +detected = $(shell echo "$(1)=y" >> $(OUTPUT).config-detected) +detected_var = $(shell echo "$(1)=$($(1))" >> $(OUTPUT).config-detected) + CFLAGS := $(EXTRA_CFLAGS) $(EXTRA_WARNINGS) include $(src-perf)/config/Makefile.arch +$(call detected_var,ARCH) + NO_PERF_REGS := 1 # Additional ARCH settings for x86 ifeq ($(ARCH),x86) + $(call detected,CONFIG_X86) ifeq (${IS_64_BIT}, 1) CFLAGS += -DHAVE_ARCH_X86_64_SUPPORT ARCH_INCLUDE = ../../arch/x86/lib/memcpy_64.S ../../arch/x86/lib/memset_64.S LIBUNWIND_LIBS = -lunwind -lunwind-x86_64 + $(call detected,CONFIG_X86_64) else - LIBUNWIND_LIBS = -lunwind -lunwind-x86 + LIBUNWIND_LIBS = -lunwind-x86 -llzma -lunwind endif NO_PERF_REGS := 0 endif @@ -40,6 +47,10 @@ ifeq ($(ARCH),arm64) LIBUNWIND_LIBS = -lunwind -lunwind-aarch64 endif +ifeq ($(NO_PERF_REGS),0) + $(call detected,CONFIG_PERF_REGS) +endif + # So far there's only x86 and arm libdw unwind support merged in perf. # Disable it on all other architectures in case libdw unwind # support is detected in system. Add supported architectures @@ -84,6 +95,17 @@ ifndef NO_LIBELF FEATURE_CHECK_LDFLAGS-libdw-dwarf-unwind := $(LIBDW_LDFLAGS) -ldw endif +ifdef LIBBABELTRACE + # for linking with debug library, run like: + # make DEBUG=1 LIBBABELTRACE_DIR=/opt/libbabeltrace/ + ifdef LIBBABELTRACE_DIR + LIBBABELTRACE_CFLAGS := -I$(LIBBABELTRACE_DIR)/include + LIBBABELTRACE_LDFLAGS := -L$(LIBBABELTRACE_DIR)/lib + endif + FEATURE_CHECK_CFLAGS-libbabeltrace := $(LIBBABELTRACE_CFLAGS) + FEATURE_CHECK_LDFLAGS-libbabeltrace := $(LIBBABELTRACE_LDFLAGS) -lbabeltrace-ctf +endif + # include ARCH specific config -include $(src-perf)/arch/$(ARCH)/Makefile @@ -108,12 +130,16 @@ endif ifeq ($(DEBUG),0) CFLAGS += -O6 +else + CFLAGS += $(call cc-option,-Og,-O0) endif ifdef PARSER_DEBUG PARSER_DEBUG_BISON := -t PARSER_DEBUG_FLEX := -d CFLAGS += -DPARSER_DEBUG + $(call detected_var,PARSER_DEBUG_BISON) + $(call detected_var,PARSER_DEBUG_FLEX) endif ifndef NO_LIBPYTHON @@ -152,121 +178,7 @@ LDFLAGS += -Wl,-z,noexecstack EXTLIBS = -lpthread -lrt -lm -ldl -ifneq ($(OUTPUT),) - OUTPUT_FEATURES = $(OUTPUT)config/feature-checks/ - $(shell mkdir -p $(OUTPUT_FEATURES)) -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - feature-$(1) := $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS) $(FEATURE_CHECK_CFLAGS-$(1))" LDFLAGS="$(LDFLAGS) $(FEATURE_CHECK_LDFLAGS-$(1))" -C config/feature-checks test-$1.bin >/dev/null 2>/dev/null && echo 1 || echo 0) -endef - -feature_set = $(eval $(feature_set_code)) -define feature_set_code - feature-$(1) := 1 -endef - -# -# Build the feature check binaries in parallel, ignore errors, ignore return value and suppress output: -# - -# -# Note that this is not a complete list of all feature tests, just -# those that are typically built on a fully configured system. -# -# [ Feature tests not mentioned here have to be built explicitly in -# the rule that uses them - an example for that is the 'bionic' -# feature check. ] -# -CORE_FEATURE_TESTS = \ - backtrace \ - dwarf \ - fortify-source \ - sync-compare-and-swap \ - glibc \ - gtk2 \ - gtk2-infobar \ - libaudit \ - libbfd \ - libelf \ - libelf-getphdrnum \ - libelf-mmap \ - libnuma \ - libperl \ - libpython \ - libpython-version \ - libslang \ - libunwind \ - pthread-attr-setaffinity-np \ - stackprotector-all \ - timerfd \ - libdw-dwarf-unwind \ - zlib - -LIB_FEATURE_TESTS = \ - dwarf \ - glibc \ - gtk2 \ - libaudit \ - libbfd \ - libelf \ - libnuma \ - libperl \ - libpython \ - libslang \ - libunwind \ - libdw-dwarf-unwind \ - zlib - -VF_FEATURE_TESTS = \ - backtrace \ - fortify-source \ - sync-compare-and-swap \ - gtk2-infobar \ - libelf-getphdrnum \ - libelf-mmap \ - libpython-version \ - pthread-attr-setaffinity-np \ - stackprotector-all \ - timerfd \ - libunwind-debug-frame \ - bionic \ - liberty \ - liberty-z \ - cplus-demangle \ - compile-32 \ - compile-x32 - -# Set FEATURE_CHECK_(C|LD)FLAGS-all for all CORE_FEATURE_TESTS features. -# If in the future we need per-feature checks/flags for features not -# mentioned in this list we need to refactor this ;-). -set_test_all_flags = $(eval $(set_test_all_flags_code)) -define set_test_all_flags_code - FEATURE_CHECK_CFLAGS-all += $(FEATURE_CHECK_CFLAGS-$(1)) - FEATURE_CHECK_LDFLAGS-all += $(FEATURE_CHECK_LDFLAGS-$(1)) -endef - -$(foreach feat,$(CORE_FEATURE_TESTS),$(call set_test_all_flags,$(feat))) - -# -# Special fast-path for the 'all features are available' case: -# -$(call feature_check,all,$(MSG)) - -# -# Just in case the build freshly failed, make sure we print the -# feature matrix: -# -ifeq ($(feature-all), 1) - # - # test-all.c passed - just set all the core feature flags to 1: - # - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_set,$(feat))) -else - $(shell $(MAKE) OUTPUT=$(OUTPUT_FEATURES) CFLAGS="$(EXTRA_CFLAGS)" LDFLAGS=$(LDFLAGS) -i -j -C config/feature-checks $(addsuffix .bin,$(CORE_FEATURE_TESTS)) >/dev/null 2>&1) - $(foreach feat,$(CORE_FEATURE_TESTS),$(call feature_check,$(feat))) -endif +include $(srctree)/tools/build/Makefile.feature ifeq ($(feature-stackprotector-all), 1) CFLAGS += -fstack-protector-all @@ -295,7 +207,7 @@ endif CFLAGS += -I$(src-perf)/util CFLAGS += -I$(src-perf) -CFLAGS += -I$(LIB_INCLUDE) +CFLAGS += -I$(srctree)/tools/lib/ CFLAGS += -D_LARGEFILE64_SOURCE -D_FILE_OFFSET_BITS=64 -D_GNU_SOURCE @@ -358,9 +270,14 @@ else endif # libelf support endif # NO_LIBELF +ifdef NO_DWARF + NO_LIBDW_DWARF_UNWIND := 1 +endif + ifndef NO_LIBELF CFLAGS += -DHAVE_LIBELF_SUPPORT EXTLIBS += -lelf + $(call detected,CONFIG_LIBELF) ifeq ($(feature-libelf-mmap), 1) CFLAGS += -DHAVE_LIBELF_MMAP_SUPPORT @@ -381,6 +298,7 @@ ifndef NO_LIBELF CFLAGS += -DHAVE_DWARF_SUPPORT $(LIBDW_CFLAGS) LDFLAGS += $(LIBDW_LDFLAGS) EXTLIBS += -ldw + $(call detected,CONFIG_DWARF) endif # PERF_HAVE_DWARF_REGS endif # NO_DWARF endif # NO_LIBELF @@ -408,9 +326,11 @@ ifdef NO_LIBUNWIND dwarf-post-unwind := 0 else dwarf-post-unwind-text := libdw + $(call detected,CONFIG_LIBDW_DWARF_UNWIND) endif else dwarf-post-unwind-text := libunwind + $(call detected,CONFIG_LIBUNWIND) # Enable libunwind support by default. ifndef NO_LIBDW_DWARF_UNWIND NO_LIBDW_DWARF_UNWIND := 1 @@ -419,6 +339,7 @@ endif ifeq ($(dwarf-post-unwind),1) CFLAGS += -DHAVE_DWARF_UNWIND_SUPPORT + $(call detected,CONFIG_DWARF_UNWIND) else NO_DWARF_UNWIND := 1 endif @@ -447,6 +368,7 @@ ifndef NO_LIBAUDIT else CFLAGS += -DHAVE_LIBAUDIT_SUPPORT EXTLIBS += -laudit + $(call detected,CONFIG_AUDIT) endif endif @@ -463,6 +385,7 @@ ifndef NO_SLANG CFLAGS += -I/usr/include/slang CFLAGS += -DHAVE_SLANG_SUPPORT EXTLIBS += -lslang + $(call detected,CONFIG_SLANG) endif endif @@ -497,10 +420,11 @@ else ifneq ($(feature-libperl), 1) CFLAGS += -DNO_LIBPERL NO_LIBPERL := 1 - msg := $(warning Missing perl devel files. Disabling perl scripting support, consider installing perl-ExtUtils-Embed); + msg := $(warning Missing perl devel files. Disabling perl scripting support, please install perl-ExtUtils-Embed/libperl-dev); else LDFLAGS += $(PERL_EMBED_LDFLAGS) EXTLIBS += $(PERL_EMBED_LIBADD) + $(call detected,CONFIG_LIBPERL) endif endif @@ -513,22 +437,21 @@ endif disable-python = $(eval $(disable-python_code)) define disable-python_code CFLAGS += -DNO_LIBPYTHON - $(if $(1),$(warning No $(1) was found)) - $(warning Python support will not be built) + $(warning $1) NO_LIBPYTHON := 1 endef ifdef NO_LIBPYTHON - $(call disable-python) + $(call disable-python,Python support disabled by user) else ifndef PYTHON - $(call disable-python,python interpreter) + $(call disable-python,No python interpreter was found: disables Python support - please install python-devel/python-dev) else PYTHON_WORD := $(call shell-wordify,$(PYTHON)) ifndef PYTHON_CONFIG - $(call disable-python,python-config tool) + $(call disable-python,No 'python-config' tool was found: disables Python support - please install python-devel/python-dev) else PYTHON_CONFIG_SQ := $(call shell-sq,$(PYTHON_CONFIG)) @@ -540,7 +463,7 @@ else FLAGS_PYTHON_EMBED := $(PYTHON_EMBED_CCOPTS) $(PYTHON_EMBED_LDOPTS) ifneq ($(feature-libpython), 1) - $(call disable-python,Python.h (for Python 2.x)) + $(call disable-python,No 'Python.h' (for Python 2.x support) was found: disables Python support - please install python-devel/python-dev) else ifneq ($(feature-libpython-version), 1) @@ -560,6 +483,7 @@ else LDFLAGS += $(PYTHON_EMBED_LDFLAGS) EXTLIBS += $(PYTHON_EMBED_LIBADD) LANG_BINDINGS += $(obj-perf)python/perf.so + $(call detected,CONFIG_LIBPYTHON) endif endif endif @@ -600,7 +524,7 @@ else EXTLIBS += -liberty CFLAGS += -DHAVE_CPLUS_DEMANGLE_SUPPORT else - msg := $(warning No bfd.h/libbfd found, install binutils-dev[el]/zlib-static to gain symbol demangling) + msg := $(warning No bfd.h/libbfd found, please install binutils-dev[el]/zlib-static/libiberty-dev to gain symbol demangling) CFLAGS += -DNO_DEMANGLE endif endif @@ -617,11 +541,23 @@ ifndef NO_ZLIB ifeq ($(feature-zlib), 1) CFLAGS += -DHAVE_ZLIB_SUPPORT EXTLIBS += -lz + $(call detected,CONFIG_ZLIB) else NO_ZLIB := 1 endif endif +ifndef NO_LZMA + ifeq ($(feature-lzma), 1) + CFLAGS += -DHAVE_LZMA_SUPPORT + EXTLIBS += -llzma + $(call detected,CONFIG_LZMA) + else + msg := $(warning No liblzma found, disables xz kernel module decompression, please install xz-devel/liblzma-dev); + NO_LZMA := 1 + endif +endif + ifndef NO_BACKTRACE ifeq ($(feature-backtrace), 1) CFLAGS += -DHAVE_BACKTRACE_SUPPORT @@ -635,6 +571,7 @@ ifndef NO_LIBNUMA else CFLAGS += -DHAVE_LIBNUMA_SUPPORT EXTLIBS += -lnuma + $(call detected,CONFIG_NUMA) endif endif @@ -651,7 +588,7 @@ ifeq (${IS_64_BIT}, 1) NO_PERF_READ_VDSO32 := 1 endif endif - ifneq (${IS_X86_64}, 1) + ifneq ($(ARCH), x86) NO_PERF_READ_VDSOX32 := 1 endif ifndef NO_PERF_READ_VDSOX32 @@ -667,6 +604,23 @@ else NO_PERF_READ_VDSOX32 := 1 endif +ifdef LIBBABELTRACE + $(call feature_check,libbabeltrace) + ifeq ($(feature-libbabeltrace), 1) + CFLAGS += -DHAVE_LIBBABELTRACE_SUPPORT $(LIBBABELTRACE_CFLAGS) + LDFLAGS += $(LIBBABELTRACE_LDFLAGS) + EXTLIBS += -lbabeltrace-ctf + $(call detected,CONFIG_LIBBABELTRACE) + else + msg := $(warning No libbabeltrace found, disables 'perf data' CTF format support, please install libbabeltrace-dev[el]/libbabeltrace-ctf-dev); + endif +endif + +ifndef NO_AUXTRACE + $(call detected,CONFIG_AUXTRACE) + CFLAGS += -DHAVE_AUXTRACE_SUPPORT +endif + # Among the variables below, these: # perfexecdir # template_dir @@ -699,7 +653,7 @@ sysconfdir = $(prefix)/etc ETC_PERFCONFIG = etc/perfconfig endif ifndef lib -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib @@ -735,83 +689,33 @@ plugindir=$(libdir)/traceevent/plugins plugindir_SQ= $(subst ','\'',$(plugindir)) endif -# -# Print the result of the feature test: -# -feature_print_status = $(eval $(feature_print_status_code)) $(info $(MSG)) - -define feature_print_status_code - ifeq ($(feature-$(1)), 1) - MSG = $(shell printf '...%30s: [ \033[32mon\033[m ]' $(1)) - else - MSG = $(shell printf '...%30s: [ \033[31mOFF\033[m ]' $(1)) - endif -endef - -feature_print_var = $(eval $(feature_print_var_code)) $(info $(MSG)) -define feature_print_var_code +print_var = $(eval $(print_var_code)) $(info $(MSG)) +define print_var_code MSG = $(shell printf '...%30s: %s' $(1) $($(1))) endef -feature_print_text = $(eval $(feature_print_text_code)) $(info $(MSG)) -define feature_print_text_code - MSG = $(shell printf '...%30s: %s' $(1) $(2)) -endef - -PERF_FEATURES := $(foreach feat,$(LIB_FEATURE_TESTS),feature-$(feat)($(feature-$(feat)))) -PERF_FEATURES_FILE := $(shell touch $(OUTPUT)PERF-FEATURES; cat $(OUTPUT)PERF-FEATURES) - -ifeq ($(dwarf-post-unwind),1) - PERF_FEATURES += dwarf-post-unwind($(dwarf-post-unwind-text)) -endif - -# The $(display_lib) controls the default detection message -# output. It's set if: -# - detected features differes from stored features from -# last build (in PERF-FEATURES file) -# - one of the $(LIB_FEATURE_TESTS) is not detected -# - VF is enabled - -ifneq ("$(PERF_FEATURES)","$(PERF_FEATURES_FILE)") - $(shell echo "$(PERF_FEATURES)" > $(OUTPUT)PERF-FEATURES) - display_lib := 1 -endif - -feature_check = $(eval $(feature_check_code)) -define feature_check_code - ifneq ($(feature-$(1)), 1) - display_lib := 1 - endif -endef - -$(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_check,$(feat))) - ifeq ($(VF),1) - display_lib := 1 - display_vf := 1 -endif - -ifeq ($(display_lib),1) + $(call print_var,prefix) + $(call print_var,bindir) + $(call print_var,libdir) + $(call print_var,sysconfdir) + $(call print_var,LIBUNWIND_DIR) + $(call print_var,LIBDW_DIR) $(info ) - $(info Auto-detecting system features:) - $(foreach feat,$(LIB_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - - ifeq ($(dwarf-post-unwind),1) - $(call feature_print_text,"DWARF post unwind library", $(dwarf-post-unwind-text)) - endif -endif - -ifeq ($(display_vf),1) - $(foreach feat,$(VF_FEATURE_TESTS),$(call feature_print_status,$(feat),)) - $(info ) - $(call feature_print_var,prefix) - $(call feature_print_var,bindir) - $(call feature_print_var,libdir) - $(call feature_print_var,sysconfdir) - $(call feature_print_var,LIBUNWIND_DIR) - $(call feature_print_var,LIBDW_DIR) endif -ifeq ($(display_lib),1) - $(info ) -endif +$(call detected_var,bindir_SQ) +$(call detected_var,PYTHON_WORD) +ifneq ($(OUTPUT),) +$(call detected_var,OUTPUT) +endif +$(call detected_var,htmldir_SQ) +$(call detected_var,infodir_SQ) +$(call detected_var,mandir_SQ) +$(call detected_var,ETC_PERFCONFIG_SQ) +$(call detected_var,prefix_SQ) +$(call detected_var,perfexecdir_SQ) +$(call detected_var,LIBDIR) +$(call detected_var,GTK_CFLAGS) +$(call detected_var,PERL_EMBED_CCOPTS) +$(call detected_var,PYTHON_EMBED_CCOPTS) diff --git a/tools/perf/config/Makefile.arch b/tools/perf/config/Makefile.arch index ac8721ffa6c8..e11fbd6fae78 100644 --- a/tools/perf/config/Makefile.arch +++ b/tools/perf/config/Makefile.arch @@ -1,32 +1,15 @@ +ifndef ARCH +ARCH := $(shell uname -m 2>/dev/null || echo not) +endif -uname_M := $(shell uname -m 2>/dev/null || echo not) - -RAW_ARCH := $(shell echo $(uname_M) | sed -e s/i.86/i386/ -e s/sun4u/sparc64/ \ - -e s/arm.*/arm/ -e s/sa110/arm/ \ +ARCH := $(shell echo $(ARCH) | sed -e s/i.86/x86/ -e s/x86_64/x86/ \ + -e s/sun4u/sparc/ -e s/sparc64/sparc/ \ + -e /arm64/!s/arm.*/arm/ -e s/sa110/arm/ \ -e s/s390x/s390/ -e s/parisc64/parisc/ \ -e s/ppc.*/powerpc/ -e s/mips.*/mips/ \ -e s/sh[234].*/sh/ -e s/aarch64.*/arm64/ \ -e s/tile.*/tile/ ) -# Additional ARCH settings for x86 -ifeq ($(RAW_ARCH),i386) - ARCH ?= x86 -endif - -ifeq ($(RAW_ARCH),x86_64) - ARCH ?= x86 - - ifneq (, $(findstring m32,$(CFLAGS))) - RAW_ARCH := x86_32 - endif -endif - -ifeq ($(RAW_ARCH),sparc64) - ARCH ?= sparc -endif - -ARCH ?= $(RAW_ARCH) - LP64 := $(shell echo __LP64__ | ${CC} ${CFLAGS} -E -x c - | tail -n 1) ifeq ($(LP64), 1) IS_64_BIT := 1 diff --git a/tools/perf/config/utilities.mak b/tools/perf/config/utilities.mak index 7076a62d0ff7..0ebef09c0842 100644 --- a/tools/perf/config/utilities.mak +++ b/tools/perf/config/utilities.mak @@ -175,6 +175,24 @@ _ge-abspath = $(if $(is-executable),$(1)) define get-executable-or-default $(if $($(1)),$(call _ge_attempt,$($(1)),$(1)),$(call _ge_attempt,$(2))) endef -_ge_attempt = $(if $(get-executable),$(get-executable),$(_gea_warn)$(call _gea_err,$(2))) -_gea_warn = $(warning The path '$(1)' is not executable.) +_ge_attempt = $(if $(get-executable),$(get-executable),$(call _gea_err,$(2))) _gea_err = $(if $(1),$(error Please set '$(1)' appropriately)) + +# try-run +# Usage: option = $(call try-run, $(CC)...-o "$$TMP",option-ok,otherwise) +# Exit code chooses option. "$$TMP" is can be used as temporary file and +# is automatically cleaned up. +try-run = $(shell set -e; \ + TMP="$(TMPOUT).$$$$.tmp"; \ + TMPO="$(TMPOUT).$$$$.o"; \ + if ($(1)) >/dev/null 2>&1; \ + then echo "$(2)"; \ + else echo "$(3)"; \ + fi; \ + rm -f "$$TMP" "$$TMPO") + +# cc-option +# Usage: cflags-y += $(call cc-option,-march=winchip-c6,-march=i586) + +cc-option = $(call try-run,\ + $(CC) $(KBUILD_CPPFLAGS) $(KBUILD_CFLAGS) $(1) -c -x c /dev/null -o "$$TMP",$(1),$(2)) diff --git a/tools/perf/perf-completion.sh b/tools/perf/perf-completion.sh index 33569847fdcc..3ba80b2359cc 100644 --- a/tools/perf/perf-completion.sh +++ b/tools/perf/perf-completion.sh @@ -47,8 +47,16 @@ __my_reassemble_comp_words_by_ref() done } -type _get_comp_words_by_ref &>/dev/null || -_get_comp_words_by_ref() +# Define preload_get_comp_words_by_ref="false", if the function +# __perf_get_comp_words_by_ref() is required instead. +preload_get_comp_words_by_ref="true" + +if [ $preload_get_comp_words_by_ref = "true" ]; then + type _get_comp_words_by_ref &>/dev/null || + preload_get_comp_words_by_ref="false" +fi +[ $preload_get_comp_words_by_ref = "true" ] || +__perf_get_comp_words_by_ref() { local exclude cur_ words_ cword_ if [ "$1" = "-n" ]; then @@ -76,8 +84,16 @@ _get_comp_words_by_ref() done } -type __ltrim_colon_completions &>/dev/null || -__ltrim_colon_completions() +# Define preload__ltrim_colon_completions="false", if the function +# __perf__ltrim_colon_completions() is required instead. +preload__ltrim_colon_completions="true" + +if [ $preload__ltrim_colon_completions = "true" ]; then + type __ltrim_colon_completions &>/dev/null || + preload__ltrim_colon_completions="false" +fi +[ $preload__ltrim_colon_completions = "true" ] || +__perf__ltrim_colon_completions() { if [[ "$1" == *:* && "$COMP_WORDBREAKS" == *:* ]]; then # Remove colon-word prefix from COMPREPLY items @@ -97,7 +113,32 @@ __perfcomp () __perfcomp_colon () { __perfcomp "$1" "$2" - __ltrim_colon_completions $cur + if [ $preload__ltrim_colon_completions = "true" ]; then + __ltrim_colon_completions $cur + else + __perf__ltrim_colon_completions $cur + fi +} + +__perf_prev_skip_opts () +{ + local i cmd_ cmds_ + + let i=cword-1 + cmds_=$($cmd $1 --list-cmds) + prev_skip_opts=() + while [ $i -ge 0 ]; do + if [[ ${words[i]} == $1 ]]; then + return + fi + for cmd_ in $cmds_; do + if [[ ${words[i]} == $cmd_ ]]; then + prev_skip_opts=${words[i]} + return + fi + done + ((i--)) + done } __perf_main () @@ -107,29 +148,36 @@ __perf_main () cmd=${words[0]} COMPREPLY=() + # Skip options backward and find the last perf command + __perf_prev_skip_opts # List perf subcommands or long options - if [ $cword -eq 1 ]; then + if [ -z $prev_skip_opts ]; then if [[ $cur == --* ]]; then - __perfcomp '--help --version \ - --exec-path --html-path --paginate --no-pager \ - --perf-dir --work-tree --debugfs-dir' -- "$cur" + cmds=$($cmd --list-opts) else cmds=$($cmd --list-cmds) - __perfcomp "$cmds" "$cur" fi + __perfcomp "$cmds" "$cur" # List possible events for -e option - elif [[ $prev == "-e" && "${words[1]}" == @(record|stat|top) ]]; then + elif [[ $prev == @("-e"|"--event") && + $prev_skip_opts == @(record|stat|top) ]]; then evts=$($cmd list --raw-dump) __perfcomp_colon "$evts" "$cur" - # List subcommands for perf commands - elif [[ $prev == @(kvm|kmem|mem|lock|sched) ]]; then - subcmds=$($cmd $prev --list-cmds) - __perfcomp_colon "$subcmds" "$cur" - # List long option names - elif [[ $cur == --* ]]; then - subcmd=${words[1]} - opts=$($cmd $subcmd --list-opts) - __perfcomp "$opts" "$cur" + else + # List subcommands for perf commands + if [[ $prev_skip_opts == @(kvm|kmem|mem|lock|sched| + |data|help|script|test|timechart|trace) ]]; then + subcmds=$($cmd $prev_skip_opts --list-cmds) + __perfcomp_colon "$subcmds" "$cur" + fi + # List long option names + if [[ $cur == --* ]]; then + subcmd=$prev_skip_opts + __perf_prev_skip_opts $subcmd + subcmd=$subcmd" "$prev_skip_opts + opts=$($cmd $subcmd --list-opts) + __perfcomp "$opts" "$cur" + fi fi } @@ -198,7 +246,11 @@ type perf &>/dev/null && _perf() { local cur words cword prev - _get_comp_words_by_ref -n =: cur words cword prev + if [ $preload_get_comp_words_by_ref = "true" ]; then + _get_comp_words_by_ref -n =: cur words cword prev + else + __perf_get_comp_words_by_ref -n =: cur words cword prev + fi __perf_main } && diff --git a/tools/perf/perf-sys.h b/tools/perf/perf-sys.h index 6ef68165c9db..83a25cef82fd 100644 --- a/tools/perf/perf-sys.h +++ b/tools/perf/perf-sys.h @@ -6,11 +6,9 @@ #include <sys/syscall.h> #include <linux/types.h> #include <linux/perf_event.h> +#include <asm/barrier.h> #if defined(__i386__) -#define mb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") -#define wmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") -#define rmb() asm volatile("lock; addl $0,0(%%esp)" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} #ifndef __NR_perf_event_open @@ -25,9 +23,6 @@ #endif #if defined(__x86_64__) -#define mb() asm volatile("mfence" ::: "memory") -#define wmb() asm volatile("sfence" ::: "memory") -#define rmb() asm volatile("lfence" ::: "memory") #define cpu_relax() asm volatile("rep; nop" ::: "memory"); #define CPUINFO_PROC {"model name"} #ifndef __NR_perf_event_open @@ -43,129 +38,63 @@ #ifdef __powerpc__ #include "../../arch/powerpc/include/uapi/asm/unistd.h" -#define mb() asm volatile ("sync" ::: "memory") -#define wmb() asm volatile ("sync" ::: "memory") -#define rmb() asm volatile ("sync" ::: "memory") #define CPUINFO_PROC {"cpu"} #endif #ifdef __s390__ -#define mb() asm volatile("bcr 15,0" ::: "memory") -#define wmb() asm volatile("bcr 15,0" ::: "memory") -#define rmb() asm volatile("bcr 15,0" ::: "memory") #define CPUINFO_PROC {"vendor_id"} #endif #ifdef __sh__ -#if defined(__SH4A__) || defined(__SH5__) -# define mb() asm volatile("synco" ::: "memory") -# define wmb() asm volatile("synco" ::: "memory") -# define rmb() asm volatile("synco" ::: "memory") -#else -# define mb() asm volatile("" ::: "memory") -# define wmb() asm volatile("" ::: "memory") -# define rmb() asm volatile("" ::: "memory") -#endif #define CPUINFO_PROC {"cpu type"} #endif #ifdef __hppa__ -#define mb() asm volatile("" ::: "memory") -#define wmb() asm volatile("" ::: "memory") -#define rmb() asm volatile("" ::: "memory") #define CPUINFO_PROC {"cpu"} #endif #ifdef __sparc__ -#ifdef __LP64__ -#define mb() asm volatile("ba,pt %%xcc, 1f\n" \ - "membar #StoreLoad\n" \ - "1:\n":::"memory") -#else -#define mb() asm volatile("":::"memory") -#endif -#define wmb() asm volatile("":::"memory") -#define rmb() asm volatile("":::"memory") #define CPUINFO_PROC {"cpu"} #endif #ifdef __alpha__ -#define mb() asm volatile("mb" ::: "memory") -#define wmb() asm volatile("wmb" ::: "memory") -#define rmb() asm volatile("mb" ::: "memory") #define CPUINFO_PROC {"cpu model"} #endif #ifdef __ia64__ -#define mb() asm volatile ("mf" ::: "memory") -#define wmb() asm volatile ("mf" ::: "memory") -#define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("hint @pause" ::: "memory") #define CPUINFO_PROC {"model name"} #endif #ifdef __arm__ -/* - * Use the __kuser_memory_barrier helper in the CPU helper page. See - * arch/arm/kernel/entry-armv.S in the kernel source for details. - */ -#define mb() ((void(*)(void))0xffff0fa0)() -#define wmb() ((void(*)(void))0xffff0fa0)() -#define rmb() ((void(*)(void))0xffff0fa0)() #define CPUINFO_PROC {"model name", "Processor"} #endif #ifdef __aarch64__ -#define mb() asm volatile("dmb ish" ::: "memory") -#define wmb() asm volatile("dmb ishst" ::: "memory") -#define rmb() asm volatile("dmb ishld" ::: "memory") #define cpu_relax() asm volatile("yield" ::: "memory") #endif #ifdef __mips__ -#define mb() asm volatile( \ - ".set mips2\n\t" \ - "sync\n\t" \ - ".set mips0" \ - : /* no output */ \ - : /* no input */ \ - : "memory") -#define wmb() mb() -#define rmb() mb() #define CPUINFO_PROC {"cpu model"} #endif #ifdef __arc__ -#define mb() asm volatile("" ::: "memory") -#define wmb() asm volatile("" ::: "memory") -#define rmb() asm volatile("" ::: "memory") #define CPUINFO_PROC {"Processor"} #endif #ifdef __metag__ -#define mb() asm volatile("" ::: "memory") -#define wmb() asm volatile("" ::: "memory") -#define rmb() asm volatile("" ::: "memory") #define CPUINFO_PROC {"CPU"} #endif #ifdef __xtensa__ -#define mb() asm volatile("memw" ::: "memory") -#define wmb() asm volatile("memw" ::: "memory") -#define rmb() asm volatile("" ::: "memory") #define CPUINFO_PROC {"core ID"} #endif #ifdef __tile__ -#define mb() asm volatile ("mf" ::: "memory") -#define wmb() asm volatile ("mf" ::: "memory") -#define rmb() asm volatile ("mf" ::: "memory") #define cpu_relax() asm volatile ("mfspr zero, PASS" ::: "memory") #define CPUINFO_PROC {"model name"} #endif -#define barrier() asm volatile ("" ::: "memory") - #ifndef cpu_relax #define cpu_relax() barrier() #endif diff --git a/tools/perf/perf.c b/tools/perf/perf.c index 3700a7faca6c..b857fcbd00cf 100644 --- a/tools/perf/perf.c +++ b/tools/perf/perf.c @@ -13,6 +13,7 @@ #include "util/quote.h" #include "util/run-command.h" #include "util/parse-events.h" +#include "util/parse-options.h" #include "util/debug.h" #include <api/fs/debugfs.h> #include <pthread.h> @@ -62,6 +63,7 @@ static struct cmd_struct commands[] = { #endif { "inject", cmd_inject, 0 }, { "mem", cmd_mem, 0 }, + { "data", cmd_data, 0 }, }; struct pager_config { @@ -124,6 +126,23 @@ static void commit_pager_choice(void) } } +struct option options[] = { + OPT_ARGUMENT("help", "help"), + OPT_ARGUMENT("version", "version"), + OPT_ARGUMENT("exec-path", "exec-path"), + OPT_ARGUMENT("html-path", "html-path"), + OPT_ARGUMENT("paginate", "paginate"), + OPT_ARGUMENT("no-pager", "no-pager"), + OPT_ARGUMENT("perf-dir", "perf-dir"), + OPT_ARGUMENT("work-tree", "work-tree"), + OPT_ARGUMENT("debugfs-dir", "debugfs-dir"), + OPT_ARGUMENT("buildid-dir", "buildid-dir"), + OPT_ARGUMENT("list-cmds", "list-cmds"), + OPT_ARGUMENT("list-opts", "list-opts"), + OPT_ARGUMENT("debug", "debug"), + OPT_END() +}; + static int handle_options(const char ***argv, int *argc, int *envchanged) { int handled = 0; @@ -222,6 +241,16 @@ static int handle_options(const char ***argv, int *argc, int *envchanged) struct cmd_struct *p = commands+i; printf("%s ", p->cmd); } + putchar('\n'); + exit(0); + } else if (!strcmp(cmd, "--list-opts")) { + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(options)-1; i++) { + struct option *p = options+i; + printf("--%s ", p->long_name); + } + putchar('\n'); exit(0); } else if (!strcmp(cmd, "--debug")) { if (*argc < 2) { diff --git a/tools/perf/perf.h b/tools/perf/perf.h index 1dabb8553499..4a5827fff799 100644 --- a/tools/perf/perf.h +++ b/tools/perf/perf.h @@ -29,7 +29,7 @@ static inline unsigned long long rdclock(void) return ts.tv_sec * 1000000000ULL + ts.tv_nsec; } -#define MAX_NR_CPUS 256 +#define MAX_NR_CPUS 1024 extern const char *input_name; extern bool perf_host, perf_guest; @@ -53,14 +53,23 @@ struct record_opts { bool sample_time; bool period; bool sample_intr_regs; + bool running_time; + bool full_auxtrace; + bool auxtrace_snapshot_mode; unsigned int freq; unsigned int mmap_pages; + unsigned int auxtrace_mmap_pages; unsigned int user_freq; u64 branch_stack; u64 default_interval; u64 user_interval; + size_t auxtrace_snapshot_size; + const char *auxtrace_snapshot_opts; bool sample_transaction; unsigned initial_delay; + bool use_clockid; + clockid_t clockid; + unsigned int proc_map_timeout; }; struct option; diff --git a/tools/perf/scripts/Build b/tools/perf/scripts/Build new file mode 100644 index 000000000000..41efd7e368b3 --- /dev/null +++ b/tools/perf/scripts/Build @@ -0,0 +1,2 @@ +libperf-$(CONFIG_LIBPERL) += perl/Perf-Trace-Util/ +libperf-$(CONFIG_LIBPYTHON) += python/Perf-Trace-Util/ diff --git a/tools/perf/scripts/perl/Perf-Trace-Util/Build b/tools/perf/scripts/perl/Perf-Trace-Util/Build new file mode 100644 index 000000000000..928e110179cb --- /dev/null +++ b/tools/perf/scripts/perl/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs -Wno-undef -Wno-switch-default diff --git a/tools/perf/scripts/python/Perf-Trace-Util/Build b/tools/perf/scripts/python/Perf-Trace-Util/Build new file mode 100644 index 000000000000..aefc15c9444a --- /dev/null +++ b/tools/perf/scripts/python/Perf-Trace-Util/Build @@ -0,0 +1,3 @@ +libperf-y += Context.o + +CFLAGS_Context.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-nested-externs diff --git a/tools/perf/tests/Build b/tools/perf/tests/Build new file mode 100644 index 000000000000..d20d6e6ab65b --- /dev/null +++ b/tools/perf/tests/Build @@ -0,0 +1,44 @@ +perf-y += builtin-test.o +perf-y += parse-events.o +perf-y += dso-data.o +perf-y += attr.o +perf-y += vmlinux-kallsyms.o +perf-y += openat-syscall.o +perf-y += openat-syscall-all-cpus.o +perf-y += openat-syscall-tp-fields.o +perf-y += mmap-basic.o +perf-y += perf-record.o +perf-y += rdpmc.o +perf-y += evsel-roundtrip-name.o +perf-y += evsel-tp-sched.o +perf-y += fdarray.o +perf-y += pmu.o +perf-y += hists_common.o +perf-y += hists_link.o +perf-y += hists_filter.o +perf-y += hists_output.o +perf-y += hists_cumulate.o +perf-y += python-use.o +perf-y += bp_signal.o +perf-y += bp_signal_overflow.o +perf-y += task-exit.o +perf-y += sw-clock.o +perf-y += mmap-thread-lookup.o +perf-y += thread-mg-share.o +perf-y += switch-tracking.o +perf-y += keep-tracking.o +perf-y += code-reading.o +perf-y += sample-parsing.o +perf-y += parse-no-sample-id-all.o +perf-y += kmod-path.o +perf-y += thread-map.o + +perf-$(CONFIG_X86) += perf-time-to-tsc.o + +ifeq ($(ARCH),$(filter $(ARCH),x86 arm arm64)) +perf-$(CONFIG_DWARF_UNWIND) += dwarf-unwind.o +endif + +CFLAGS_attr.o += -DBINDIR="BUILD_STR($(bindir_SQ))" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_python-use.o += -DPYTHONPATH="BUILD_STR($(OUTPUT)python)" -DPYTHON="BUILD_STR($(PYTHON_WORD))" +CFLAGS_dwarf-unwind.o += -fno-optimize-sibling-calls diff --git a/tools/perf/tests/attr/base-record b/tools/perf/tests/attr/base-record index d3095dafed36..7e6d74946e04 100644 --- a/tools/perf/tests/attr/base-record +++ b/tools/perf/tests/attr/base-record @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0|1 -size=104 +size=112 config=0 sample_period=4000 sample_type=263 diff --git a/tools/perf/tests/attr/base-stat b/tools/perf/tests/attr/base-stat index 872ed7e24c7c..f4cf148f14cb 100644 --- a/tools/perf/tests/attr/base-stat +++ b/tools/perf/tests/attr/base-stat @@ -5,7 +5,7 @@ group_fd=-1 flags=0|8 cpu=* type=0 -size=104 +size=112 config=0 sample_period=0 sample_type=0 diff --git a/tools/perf/tests/builtin-test.c b/tools/perf/tests/builtin-test.c index 4b7d9ab0f049..c1dde733c3a6 100644 --- a/tools/perf/tests/builtin-test.c +++ b/tools/perf/tests/builtin-test.c @@ -23,12 +23,12 @@ static struct test { .func = test__vmlinux_matches_kallsyms, }, { - .desc = "detect open syscall event", - .func = test__open_syscall_event, + .desc = "detect openat syscall event", + .func = test__openat_syscall_event, }, { - .desc = "detect open syscall event on all cpus", - .func = test__open_syscall_event_on_all_cpus, + .desc = "detect openat syscall event on all cpus", + .func = test__openat_syscall_event_on_all_cpus, }, { .desc = "read samples using the mmap interface", @@ -73,8 +73,8 @@ static struct test { .func = test__perf_evsel__tp_sched_test, }, { - .desc = "Generate and check syscalls:sys_enter_open event fields", - .func = test__syscall_open_tp_fields, + .desc = "Generate and check syscalls:sys_enter_openat event fields", + .func = test__syscall_openat_tp_fields, }, { .desc = "struct perf_event_attr setup", @@ -126,7 +126,7 @@ static struct test { .desc = "Test parsing with no sample_id_all bit set", .func = test__parse_no_sample_id_all, }, -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) +#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT { .desc = "Test dwarf unwind", @@ -167,6 +167,14 @@ static struct test { .func = test__fdarray__add, }, { + .desc = "Test kmod_path__parse function", + .func = test__kmod_path__parse, + }, + { + .desc = "Test thread map", + .func = test__thread_map, + }, + { .func = NULL, }, }; @@ -215,7 +223,7 @@ static int run_test(struct test *test) wait(&status); if (WIFEXITED(status)) { - err = WEXITSTATUS(status); + err = (signed char)WEXITSTATUS(status); pr_debug("test child finished with %d\n", err); } else if (WIFSIGNALED(status)) { err = -1; @@ -291,7 +299,7 @@ static int perf_test__list(int argc, const char **argv) int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) { - const char * const test_usage[] = { + const char *test_usage[] = { "perf test [<options>] [{list <test-name-fragment>|[<test-name-fragments>|<test-numbers>]}]", NULL, }; @@ -302,13 +310,14 @@ int cmd_test(int argc, const char **argv, const char *prefix __maybe_unused) "be more verbose (show symbol address, etc)"), OPT_END() }; + const char * const test_subcommands[] = { "list", NULL }; struct intlist *skiplist = NULL; int ret = hists__init(); if (ret < 0) return ret; - argc = parse_options(argc, argv, test_options, test_usage, 0); + argc = parse_options_subcommand(argc, argv, test_options, test_subcommands, test_usage, 0); if (argc >= 1 && !strcmp(argv[0], "list")) return perf_test__list(argc, argv); diff --git a/tools/perf/tests/code-reading.c b/tools/perf/tests/code-reading.c index f671ec37a7c4..39c784a100a9 100644 --- a/tools/perf/tests/code-reading.c +++ b/tools/perf/tests/code-reading.c @@ -248,6 +248,7 @@ static int process_sample_event(struct machine *machine, struct perf_sample sample; struct thread *thread; u8 cpumode; + int ret; if (perf_evlist__parse_sample(evlist, event, &sample)) { pr_debug("perf_evlist__parse_sample failed\n"); @@ -262,7 +263,9 @@ static int process_sample_event(struct machine *machine, cpumode = event->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - return read_object_code(sample.ip, READLEN, cpumode, thread, state); + ret = read_object_code(sample.ip, READLEN, cpumode, thread, state); + thread__put(thread); + return ret; } static int process_event(struct machine *machine, struct perf_evlist *evlist, @@ -448,7 +451,7 @@ static int do_test_code_reading(bool try_kcore) } ret = perf_event__synthesize_thread_map(NULL, threads, - perf_event__process, machine, false); + perf_event__process, machine, false, 500); if (ret < 0) { pr_debug("perf_event__synthesize_thread_map failed\n"); goto out_err; @@ -457,13 +460,13 @@ static int do_test_code_reading(bool try_kcore) thread = machine__findnew_thread(machine, pid, pid); if (!thread) { pr_debug("machine__findnew_thread failed\n"); - goto out_err; + goto out_put; } cpus = cpu_map__new(NULL); if (!cpus) { pr_debug("cpu_map__new failed\n"); - goto out_err; + goto out_put; } while (1) { @@ -472,7 +475,7 @@ static int do_test_code_reading(bool try_kcore) evlist = perf_evlist__new(); if (!evlist) { pr_debug("perf_evlist__new failed\n"); - goto out_err; + goto out_put; } perf_evlist__set_maps(evlist, cpus, threads); @@ -482,10 +485,10 @@ static int do_test_code_reading(bool try_kcore) else str = "cycles"; pr_debug("Parsing event '%s'\n", str); - ret = parse_events(evlist, str); + ret = parse_events(evlist, str, NULL); if (ret < 0) { pr_debug("parse_events failed\n"); - goto out_err; + goto out_put; } perf_evlist__config(evlist, &opts); @@ -506,7 +509,7 @@ static int do_test_code_reading(bool try_kcore) continue; } pr_debug("perf_evlist__open failed\n"); - goto out_err; + goto out_put; } break; } @@ -514,7 +517,7 @@ static int do_test_code_reading(bool try_kcore) ret = perf_evlist__mmap(evlist, UINT_MAX, false); if (ret < 0) { pr_debug("perf_evlist__mmap failed\n"); - goto out_err; + goto out_put; } perf_evlist__enable(evlist); @@ -525,7 +528,7 @@ static int do_test_code_reading(bool try_kcore) ret = process_events(machine, evlist, &state); if (ret < 0) - goto out_err; + goto out_put; if (!have_vmlinux && !have_kcore && !try_kcore) err = TEST_CODE_READING_NO_KERNEL_OBJ; @@ -535,12 +538,15 @@ static int do_test_code_reading(bool try_kcore) err = TEST_CODE_READING_NO_ACCESS; else err = TEST_CODE_READING_OK; +out_put: + thread__put(thread); out_err: + if (evlist) { perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } machines__destroy_kernel_maps(&machines); machine__delete_threads(machine); diff --git a/tools/perf/tests/dso-data.c b/tools/perf/tests/dso-data.c index caaf37f079b1..a218aeaf56a0 100644 --- a/tools/perf/tests/dso-data.c +++ b/tools/perf/tests/dso-data.c @@ -99,6 +99,17 @@ struct test_data_offset offsets[] = { }, }; +/* move it from util/dso.c for compatibility */ +static int dso__data_fd(struct dso *dso, struct machine *machine) +{ + int fd = dso__data_get_fd(dso, machine); + + if (fd >= 0) + dso__data_put_fd(dso); + + return fd; +} + int test__dso_data(void) { struct machine machine; @@ -112,6 +123,9 @@ int test__dso_data(void) dso = dso__new((const char *)file); + TEST_ASSERT_VAL("Failed to access to dso", + dso__data_fd(dso, &machine) >= 0); + /* Basic 10 bytes tests. */ for (i = 0; i < ARRAY_SIZE(offsets); i++) { struct test_data_offset *data = &offsets[i]; @@ -152,7 +166,7 @@ int test__dso_data(void) free(buf); } - dso__delete(dso); + dso__put(dso); unlink(file); return 0; } @@ -212,7 +226,7 @@ static void dsos__delete(int cnt) struct dso *dso = dsos[i]; unlink(dso->name); - dso__delete(dso); + dso__put(dso); } free(dsos); @@ -243,8 +257,8 @@ int test__dso_data_cache(void) limit = nr * 4; TEST_ASSERT_VAL("failed to set file limit", !set_fd_limit(limit)); - /* and this is now our dso open FDs limit + 1 extra */ - dso_cnt = limit / 2 + 1; + /* and this is now our dso open FDs limit */ + dso_cnt = limit / 2; TEST_ASSERT_VAL("failed to create dsos\n", !dsos__create(dso_cnt, TEST_FILE_SIZE)); @@ -252,13 +266,13 @@ int test__dso_data_cache(void) struct dso *dso = dsos[i]; /* - * Open dsos via dso__data_fd or dso__data_read_offset. - * Both opens the data file and keep it open. + * Open dsos via dso__data_fd(), it opens the data + * file and keep it open (unless open file limit). */ + fd = dso__data_fd(dso, &machine); + TEST_ASSERT_VAL("failed to get fd", fd > 0); + if (i % 2) { - fd = dso__data_fd(dso, &machine); - TEST_ASSERT_VAL("failed to get fd", fd > 0); - } else { #define BUFSIZE 10 u8 buf[BUFSIZE]; ssize_t n; @@ -268,7 +282,10 @@ int test__dso_data_cache(void) } } - /* open +1 dso over the allowed limit */ + /* verify the first one is already open */ + TEST_ASSERT_VAL("dsos[0] is not open", dsos[0]->data.fd != -1); + + /* open +1 dso to reach the allowed limit */ fd = dso__data_fd(dsos[i], &machine); TEST_ASSERT_VAL("failed to get fd", fd > 0); diff --git a/tools/perf/tests/dwarf-unwind.c b/tools/perf/tests/dwarf-unwind.c index 0bf06bec68c7..40b36c462427 100644 --- a/tools/perf/tests/dwarf-unwind.c +++ b/tools/perf/tests/dwarf-unwind.c @@ -28,7 +28,7 @@ static int init_live_machine(struct machine *machine) pid_t pid = getpid(); return perf_event__synthesize_mmap_events(NULL, &event, pid, pid, - mmap_handler, machine, true); + mmap_handler, machine, true, 500); } #define MAX_STACK 8 @@ -170,6 +170,7 @@ int test__dwarf_unwind(void) } err = krava_1(thread); + thread__put(thread); out: machine__delete_threads(machine); diff --git a/tools/perf/tests/evsel-roundtrip-name.c b/tools/perf/tests/evsel-roundtrip-name.c index b8d8341b383e..3fa715987a5e 100644 --- a/tools/perf/tests/evsel-roundtrip-name.c +++ b/tools/perf/tests/evsel-roundtrip-name.c @@ -23,7 +23,7 @@ static int perf_evsel__roundtrip_cache_name_test(void) for (i = 0; i < PERF_COUNT_HW_CACHE_RESULT_MAX; i++) { __perf_evsel__hw_cache_type_op_res_name(type, op, i, name, sizeof(name)); - err = parse_events(evlist, name); + err = parse_events(evlist, name, NULL); if (err) ret = err; } @@ -71,7 +71,7 @@ static int __perf_evsel__name_array_test(const char *names[], int nr_names) return -ENOMEM; for (i = 0; i < nr_names; ++i) { - err = parse_events(evlist, names[i]); + err = parse_events(evlist, names[i], NULL); if (err) { pr_debug("failed to parse event '%s', err %d\n", names[i], err); diff --git a/tools/perf/tests/hists_common.c b/tools/perf/tests/hists_common.c index a62c09134516..ce80b274b097 100644 --- a/tools/perf/tests/hists_common.c +++ b/tools/perf/tests/hists_common.c @@ -96,6 +96,7 @@ struct machine *setup_fake_machine(struct machines *machines) goto out; thread__set_comm(thread, fake_threads[i].comm, 0); + thread__put(thread); } for (i = 0; i < ARRAY_SIZE(fake_mmap_info); i++) { @@ -120,8 +121,7 @@ struct machine *setup_fake_machine(struct machines *machines) size_t k; struct dso *dso; - dso = __dsos__findnew(&machine->user_dsos, - fake_symbols[i].dso_name); + dso = machine__findnew_dso(machine, fake_symbols[i].dso_name); if (dso == NULL) goto out; @@ -134,11 +134,15 @@ struct machine *setup_fake_machine(struct machines *machines) sym = symbol__new(fsym->start, fsym->length, STB_GLOBAL, fsym->name); - if (sym == NULL) + if (sym == NULL) { + dso__put(dso); goto out; + } symbols__insert(&dso->symbols[MAP__FUNCTION], sym); } + + dso__put(dso); } return machine; diff --git a/tools/perf/tests/hists_cumulate.c b/tools/perf/tests/hists_cumulate.c index 18619966454c..7d82c8be5e36 100644 --- a/tools/perf/tests/hists_cumulate.c +++ b/tools/perf/tests/hists_cumulate.c @@ -87,6 +87,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) }, }; struct hist_entry_iter iter = { + .evsel = evsel, + .sample = &sample, .hide_unresolved = false, }; @@ -104,9 +106,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, evsel, &sample, - PERF_MAX_STACK_DEPTH, NULL) < 0) + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + NULL) < 0) { + addr_location__put(&al); goto out; + } fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; @@ -695,7 +699,7 @@ int test__hists_cumulate(void) TEST_ASSERT_VAL("No memory", evlist); - err = parse_events(evlist, "cpu-clock"); + err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; diff --git a/tools/perf/tests/hists_filter.c b/tools/perf/tests/hists_filter.c index 59e53db7914c..ce48775e6ada 100644 --- a/tools/perf/tests/hists_filter.c +++ b/tools/perf/tests/hists_filter.c @@ -63,6 +63,8 @@ static int add_hist_entries(struct perf_evlist *evlist, }, }; struct hist_entry_iter iter = { + .evsel = evsel, + .sample = &sample, .ops = &hist_iter_normal, .hide_unresolved = false, }; @@ -81,9 +83,11 @@ static int add_hist_entries(struct perf_evlist *evlist, &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, evsel, &sample, - PERF_MAX_STACK_DEPTH, NULL) < 0) + if (hist_entry_iter__add(&iter, &al, + PERF_MAX_STACK_DEPTH, NULL) < 0) { + addr_location__put(&al); goto out; + } fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; @@ -108,10 +112,10 @@ int test__hists_filter(void) TEST_ASSERT_VAL("No memory", evlist); - err = parse_events(evlist, "cpu-clock"); + err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; - err = parse_events(evlist, "task-clock"); + err = parse_events(evlist, "task-clock", NULL); if (err) goto out; diff --git a/tools/perf/tests/hists_link.c b/tools/perf/tests/hists_link.c index 278ba8344c23..8c102b011424 100644 --- a/tools/perf/tests/hists_link.c +++ b/tools/perf/tests/hists_link.c @@ -91,8 +91,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) he = __hists__add_entry(hists, &al, NULL, NULL, NULL, 1, 1, 0, true); - if (he == NULL) + if (he == NULL) { + addr_location__put(&al); goto out; + } fake_common_samples[k].thread = al.thread; fake_common_samples[k].map = al.map; @@ -115,8 +117,10 @@ static int add_hist_entries(struct perf_evlist *evlist, struct machine *machine) he = __hists__add_entry(hists, &al, NULL, NULL, NULL, 1, 1, 0, true); - if (he == NULL) + if (he == NULL) { + addr_location__put(&al); goto out; + } fake_samples[i][k].thread = al.thread; fake_samples[i][k].map = al.map; @@ -282,10 +286,10 @@ int test__hists_link(void) if (evlist == NULL) return -ENOMEM; - err = parse_events(evlist, "cpu-clock"); + err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; - err = parse_events(evlist, "task-clock"); + err = parse_events(evlist, "task-clock", NULL); if (err) goto out; diff --git a/tools/perf/tests/hists_output.c b/tools/perf/tests/hists_output.c index b52c9faea224..adbebc852cc8 100644 --- a/tools/perf/tests/hists_output.c +++ b/tools/perf/tests/hists_output.c @@ -57,6 +57,8 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) }, }; struct hist_entry_iter iter = { + .evsel = evsel, + .sample = &sample, .ops = &hist_iter_normal, .hide_unresolved = false, }; @@ -70,9 +72,11 @@ static int add_hist_entries(struct hists *hists, struct machine *machine) &sample) < 0) goto out; - if (hist_entry_iter__add(&iter, &al, evsel, &sample, - PERF_MAX_STACK_DEPTH, NULL) < 0) + if (hist_entry_iter__add(&iter, &al, PERF_MAX_STACK_DEPTH, + NULL) < 0) { + addr_location__put(&al); goto out; + } fake_samples[i].thread = al.thread; fake_samples[i].map = al.map; @@ -590,7 +594,7 @@ int test__hists_output(void) TEST_ASSERT_VAL("No memory", evlist); - err = parse_events(evlist, "cpu-clock"); + err = parse_events(evlist, "cpu-clock", NULL); if (err) goto out; diff --git a/tools/perf/tests/keep-tracking.c b/tools/perf/tests/keep-tracking.c index 7a5ab7b0b8f6..4d4b9837b630 100644 --- a/tools/perf/tests/keep-tracking.c +++ b/tools/perf/tests/keep-tracking.c @@ -78,8 +78,8 @@ int test__keep_tracking(void) perf_evlist__set_maps(evlist, cpus, threads); - CHECK__(parse_events(evlist, "dummy:u")); - CHECK__(parse_events(evlist, "cycles:u")); + CHECK__(parse_events(evlist, "dummy:u", NULL)); + CHECK__(parse_events(evlist, "cycles:u", NULL)); perf_evlist__config(evlist, &opts); @@ -144,8 +144,8 @@ out_err: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } return err; diff --git a/tools/perf/tests/kmod-path.c b/tools/perf/tests/kmod-path.c new file mode 100644 index 000000000000..08c433b4bf4f --- /dev/null +++ b/tools/perf/tests/kmod-path.c @@ -0,0 +1,145 @@ +#include <stdbool.h> +#include "tests.h" +#include "dso.h" +#include "debug.h" + +static int test(const char *path, bool alloc_name, bool alloc_ext, + bool kmod, bool comp, const char *name, const char *ext) +{ + struct kmod_path m; + + memset(&m, 0x0, sizeof(m)); + + TEST_ASSERT_VAL("kmod_path__parse", + !__kmod_path__parse(&m, path, alloc_name, alloc_ext)); + + pr_debug("%s - alloc name %d, alloc ext %d, kmod %d, comp %d, name '%s', ext '%s'\n", + path, alloc_name, alloc_ext, m.kmod, m.comp, m.name, m.ext); + + TEST_ASSERT_VAL("wrong kmod", m.kmod == kmod); + TEST_ASSERT_VAL("wrong comp", m.comp == comp); + + if (ext) + TEST_ASSERT_VAL("wrong ext", m.ext && !strcmp(ext, m.ext)); + else + TEST_ASSERT_VAL("wrong ext", !m.ext); + + if (name) + TEST_ASSERT_VAL("wrong name", m.name && !strcmp(name, m.name)); + else + TEST_ASSERT_VAL("wrong name", !m.name); + + free(m.name); + free(m.ext); + return 0; +} + +static int test_is_kernel_module(const char *path, int cpumode, bool expect) +{ + TEST_ASSERT_VAL("is_kernel_module", + (!!is_kernel_module(path, cpumode)) == (!!expect)); + pr_debug("%s (cpumode: %d) - is_kernel_module: %s\n", + path, cpumode, expect ? "true" : "false"); + return 0; +} + +#define T(path, an, ae, k, c, n, e) \ + TEST_ASSERT_VAL("failed", !test(path, an, ae, k, c, n, e)) + +#define M(path, c, e) \ + TEST_ASSERT_VAL("failed", !test_is_kernel_module(path, c, e)) + +int test__kmod_path__parse(void) +{ + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x-x.ko", true , true , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , true , true, false, NULL , NULL); + T("/xxxx/xxxx/x-x.ko", true , false , true, false, "[x_x]", NULL); + T("/xxxx/xxxx/x-x.ko", false , false , true, false, NULL , NULL); + M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); + M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_KERNEL, true); + M("/xxxx/xxxx/x-x.ko", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.ko.gz", true , true , true, true, "[x]", "gz"); + T("/xxxx/xxxx/x.ko.gz", false , true , true, true, NULL , "gz"); + T("/xxxx/xxxx/x.ko.gz", true , false , true, true, "[x]", NULL); + T("/xxxx/xxxx/x.ko.gz", false , false , true, true, NULL , NULL); + M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); + M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_KERNEL, true); + M("/xxxx/xxxx/x.ko.gz", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("/xxxx/xxxx/x.gz", true , true , false, true, "x.gz" ,"gz"); + T("/xxxx/xxxx/x.gz", false , true , false, true, NULL ,"gz"); + T("/xxxx/xxxx/x.gz", true , false , false, true, "x.gz" , NULL); + T("/xxxx/xxxx/x.gz", false , false , false, true, NULL , NULL); + M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_KERNEL, false); + M("/xxxx/xxxx/x.gz", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.gz", true , true , false, true, "x.gz", "gz"); + T("x.gz", false , true , false, true, NULL , "gz"); + T("x.gz", true , false , false, true, "x.gz", NULL); + T("x.gz", false , false , false, true, NULL , NULL); + M("x.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("x.gz", PERF_RECORD_MISC_KERNEL, false); + M("x.gz", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("x.ko.gz", true , true , true, true, "[x]", "gz"); + T("x.ko.gz", false , true , true, true, NULL , "gz"); + T("x.ko.gz", true , false , true, true, "[x]", NULL); + T("x.ko.gz", false , false , true, true, NULL , NULL); + M("x.ko.gz", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); + M("x.ko.gz", PERF_RECORD_MISC_KERNEL, true); + M("x.ko.gz", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("[test_module]", true , true , true, false, "[test_module]", NULL); + T("[test_module]", false , true , true, false, NULL , NULL); + T("[test_module]", true , false , true, false, "[test_module]", NULL); + T("[test_module]", false , false , true, false, NULL , NULL); + M("[test_module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); + M("[test_module]", PERF_RECORD_MISC_KERNEL, true); + M("[test_module]", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("[test.module]", true , true , true, false, "[test.module]", NULL); + T("[test.module]", false , true , true, false, NULL , NULL); + T("[test.module]", true , false , true, false, "[test.module]", NULL); + T("[test.module]", false , false , true, false, NULL , NULL); + M("[test.module]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, true); + M("[test.module]", PERF_RECORD_MISC_KERNEL, true); + M("[test.module]", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("[vdso]", true , true , false, false, "[vdso]", NULL); + T("[vdso]", false , true , false, false, NULL , NULL); + T("[vdso]", true , false , false, false, "[vdso]", NULL); + T("[vdso]", false , false , false, false, NULL , NULL); + M("[vdso]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vdso]", PERF_RECORD_MISC_KERNEL, false); + M("[vdso]", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("[vsyscall]", true , true , false, false, "[vsyscall]", NULL); + T("[vsyscall]", false , true , false, false, NULL , NULL); + T("[vsyscall]", true , false , false, false, "[vsyscall]", NULL); + T("[vsyscall]", false , false , false, false, NULL , NULL); + M("[vsyscall]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[vsyscall]", PERF_RECORD_MISC_KERNEL, false); + M("[vsyscall]", PERF_RECORD_MISC_USER, false); + + /* path alloc_name alloc_ext kmod comp name ext */ + T("[kernel.kallsyms]", true , true , false, false, "[kernel.kallsyms]", NULL); + T("[kernel.kallsyms]", false , true , false, false, NULL , NULL); + T("[kernel.kallsyms]", true , false , false, false, "[kernel.kallsyms]", NULL); + T("[kernel.kallsyms]", false , false , false, false, NULL , NULL); + M("[kernel.kallsyms]", PERF_RECORD_MISC_CPUMODE_UNKNOWN, false); + M("[kernel.kallsyms]", PERF_RECORD_MISC_KERNEL, false); + M("[kernel.kallsyms]", PERF_RECORD_MISC_USER, false); + + return 0; +} diff --git a/tools/perf/tests/make b/tools/perf/tests/make index 75709d2b17b4..729112f4cfaa 100644 --- a/tools/perf/tests/make +++ b/tools/perf/tests/make @@ -1,11 +1,22 @@ +ifndef MK +ifeq ($(MAKECMDGOALS),) +# no target specified, trigger the whole suite +all: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile + @echo "Testing Makefile.perf"; $(MAKE) -sf tests/make MK=Makefile.perf +else +# run only specific test over 'Makefile' +%: + @echo "Testing Makefile"; $(MAKE) -sf tests/make MK=Makefile $@ +endif +else PERF := . -MK := Makefile include config/Makefile.arch # FIXME looks like x86 is the only arch running tests ;-) # we need some IS_(32/64) flag to make this generic -ifeq ($(IS_X86_64),1) +ifeq ($(ARCH)$(IS_64_BIT), x861) lib = lib64 else lib = lib @@ -32,6 +43,7 @@ make_no_backtrace := NO_BACKTRACE=1 make_no_libnuma := NO_LIBNUMA=1 make_no_libaudit := NO_LIBAUDIT=1 make_no_libbionic := NO_LIBBIONIC=1 +make_no_auxtrace := NO_AUXTRACE=1 make_tags := tags make_cscope := cscope make_help := help @@ -46,17 +58,23 @@ make_install_man := install-man make_install_html := install-html make_install_info := install-info make_install_pdf := install-pdf +make_install_prefix := install prefix=/tmp/krava make_static := LDFLAGS=-static # all the NO_* variable combined make_minimal := NO_LIBPERL=1 NO_LIBPYTHON=1 NO_NEWT=1 NO_GTK2=1 make_minimal += NO_DEMANGLE=1 NO_LIBELF=1 NO_LIBUNWIND=1 NO_BACKTRACE=1 make_minimal += NO_LIBNUMA=1 NO_LIBAUDIT=1 NO_LIBBIONIC=1 -make_minimal += NO_LIBDW_DWARF_UNWIND=1 +make_minimal += NO_LIBDW_DWARF_UNWIND=1 NO_AUXTRACE=1 # $(run) contains all available tests run := make_pure +# Targets 'clean all' can be run together only through top level +# Makefile because we detect clean target in Makefile.perf and +# disable features detection +ifeq ($(MK),Makefile) run += make_clean_all +endif run += make_python_perf_so run += make_debug run += make_no_libperl @@ -74,6 +92,7 @@ run += make_no_backtrace run += make_no_libnuma run += make_no_libaudit run += make_no_libbionic +run += make_no_auxtrace run += make_help run += make_doc run += make_perf_o @@ -81,6 +100,7 @@ run += make_util_map_o run += make_util_pmu_bison_o run += make_install run += make_install_bin +run += make_install_prefix # FIXME 'install-*' commented out till they're fixed # run += make_install_doc # run += make_install_man @@ -155,6 +175,12 @@ test_make_install_O := $(call test_dest_files,$(installed_files_all)) test_make_install_bin := $(call test_dest_files,$(installed_files_bin)) test_make_install_bin_O := $(call test_dest_files,$(installed_files_bin)) +# We prefix all installed files for make_install_prefix +# with '/tmp/krava' to match installed/prefix-ed files. +installed_files_all_prefix := $(addprefix /tmp/krava/,$(installed_files_all)) +test_make_install_prefix := $(call test_dest_files,$(installed_files_all_prefix)) +test_make_install_prefix_O := $(call test_dest_files,$(installed_files_all_prefix)) + # FIXME nothing gets installed test_make_install_man := test -f $$TMP_DEST/share/man/man1/perf.1 test_make_install_man_O := $(test_make_install_man) @@ -223,10 +249,23 @@ tarpkg: echo "- $@: $$cmd" && echo $$cmd > $@ && \ ( eval $$cmd ) >> $@ 2>&1 -all: $(run) $(run_O) tarpkg +make_kernelsrc: + @echo "- make -C <kernelsrc> tools/perf" + $(call clean); \ + (make -C ../.. tools/perf) > $@ 2>&1 && \ + test -x perf && rm -f $@ || (cat $@ ; false) + +make_kernelsrc_tools: + @echo "- make -C <kernelsrc>/tools perf" + $(call clean); \ + (make -C ../../tools perf) > $@ 2>&1 && \ + test -x perf && rm -f $@ || (cat $@ ; false) + +all: $(run) $(run_O) tarpkg make_kernelsrc make_kernelsrc_tools @echo OK out: $(run_O) @echo OK .PHONY: all $(run) $(run_O) tarpkg clean +endif # ifndef MK diff --git a/tools/perf/tests/mmap-basic.c b/tools/perf/tests/mmap-basic.c index 9b9622a33932..666b67a4df9d 100644 --- a/tools/perf/tests/mmap-basic.c +++ b/tools/perf/tests/mmap-basic.c @@ -23,10 +23,8 @@ int test__basic_mmap(void) struct cpu_map *cpus; struct perf_evlist *evlist; cpu_set_t cpu_set; - const char *syscall_names[] = { "getsid", "getppid", "getpgrp", - "getpgid", }; - pid_t (*syscalls[])(void) = { (void *)getsid, getppid, getpgrp, - (void*)getpgid }; + const char *syscall_names[] = { "getsid", "getppid", "getpgid", }; + pid_t (*syscalls[])(void) = { (void *)getsid, getppid, (void*)getpgid }; #define nsyscalls ARRAY_SIZE(syscall_names) unsigned int nr_events[nsyscalls], expected_nr_events[nsyscalls], i, j; @@ -142,8 +140,8 @@ out_delete_evlist: cpus = NULL; threads = NULL; out_free_cpus: - cpu_map__delete(cpus); + cpu_map__put(cpus); out_free_threads: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/mmap-thread-lookup.c b/tools/perf/tests/mmap-thread-lookup.c index 2113f1c8611f..145050e2e544 100644 --- a/tools/perf/tests/mmap-thread-lookup.c +++ b/tools/perf/tests/mmap-thread-lookup.c @@ -129,7 +129,7 @@ static int synth_all(struct machine *machine) { return perf_event__synthesize_threads(NULL, perf_event__process, - machine, 0); + machine, 0, 500); } static int synth_process(struct machine *machine) @@ -141,9 +141,9 @@ static int synth_process(struct machine *machine) err = perf_event__synthesize_thread_map(NULL, map, perf_event__process, - machine, 0); + machine, 0, 500); - thread_map__delete(map); + thread_map__put(map); return err; } @@ -191,6 +191,8 @@ static int mmap_events(synth_cb synth) PERF_RECORD_MISC_USER, MAP__FUNCTION, (unsigned long) (td->map + 1), &al); + thread__put(thread); + if (!al.map) { pr_debug("failed, couldn't find map\n"); err = -1; diff --git a/tools/perf/tests/open-syscall-all-cpus.c b/tools/perf/tests/openat-syscall-all-cpus.c index 8fa82d1700c7..a572f87e9c8d 100644 --- a/tools/perf/tests/open-syscall-all-cpus.c +++ b/tools/perf/tests/openat-syscall-all-cpus.c @@ -3,13 +3,14 @@ #include "thread_map.h" #include "cpumap.h" #include "debug.h" +#include "stat.h" -int test__open_syscall_event_on_all_cpus(void) +int test__openat_syscall_event_on_all_cpus(void) { int err = -1, fd, cpu; struct cpu_map *cpus; struct perf_evsel *evsel; - unsigned int nr_open_calls = 111, i; + unsigned int nr_openat_calls = 111, i; cpu_set_t cpu_set; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; @@ -27,9 +28,14 @@ int test__open_syscall_event_on_all_cpus(void) CPU_ZERO(&cpu_set); - evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); + evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } @@ -41,7 +47,7 @@ int test__open_syscall_event_on_all_cpus(void) } for (cpu = 0; cpu < cpus->nr; ++cpu) { - unsigned int ncalls = nr_open_calls + cpu; + unsigned int ncalls = nr_openat_calls + cpu; /* * XXX eventually lift this restriction in a way that * keeps perf building on older glibc installations @@ -61,7 +67,7 @@ int test__open_syscall_event_on_all_cpus(void) goto out_close_fd; } for (i = 0; i < ncalls; ++i) { - fd = open("/etc/passwd", O_RDONLY); + fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } CPU_CLR(cpus->map[cpu], &cpu_set); @@ -72,7 +78,7 @@ int test__open_syscall_event_on_all_cpus(void) * we use the auto allocation it will allocate just for 1 cpu, * as we start by cpu 0. */ - if (perf_evsel__alloc_counts(evsel, cpus->nr) < 0) { + if (perf_evsel__alloc_counts(evsel, cpus->nr, 1) < 0) { pr_debug("perf_evsel__alloc_counts(ncpus=%d)\n", cpus->nr); goto out_close_fd; } @@ -91,10 +97,10 @@ int test__open_syscall_event_on_all_cpus(void) break; } - expected = nr_open_calls + cpu; - if (evsel->counts->cpu[cpu].val != expected) { + expected = nr_openat_calls + cpu; + if (perf_counts(evsel->counts, cpu, 0)->val != expected) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls on cpu %d, got %" PRIu64 "\n", - expected, cpus->map[cpu], evsel->counts->cpu[cpu].val); + expected, cpus->map[cpu], perf_counts(evsel->counts, cpu, 0)->val); err = -1; } } @@ -105,6 +111,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/open-syscall-tp-fields.c b/tools/perf/tests/openat-syscall-tp-fields.c index 127dcae0b760..01a19626c846 100644 --- a/tools/perf/tests/open-syscall-tp-fields.c +++ b/tools/perf/tests/openat-syscall-tp-fields.c @@ -5,7 +5,7 @@ #include "tests.h" #include "debug.h" -int test__syscall_open_tp_fields(void) +int test__syscall_openat_tp_fields(void) { struct record_opts opts = { .target = { @@ -29,7 +29,7 @@ int test__syscall_open_tp_fields(void) goto out; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); + evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); if (evsel == NULL) { pr_debug("%s: perf_evsel__newtp\n", __func__); goto out_delete_evlist; @@ -45,7 +45,7 @@ int test__syscall_open_tp_fields(void) perf_evsel__config(evsel, &opts); - evlist->threads->map[0] = getpid(); + thread_map__set_pid(evlist->threads, 0, getpid()); err = perf_evlist__open(evlist); if (err < 0) { @@ -66,7 +66,7 @@ int test__syscall_open_tp_fields(void) /* * Generate the event: */ - open(filename, flags); + openat(AT_FDCWD, filename, flags); while (1) { int before = nr_events; diff --git a/tools/perf/tests/open-syscall.c b/tools/perf/tests/openat-syscall.c index a33b2daae40f..c9a37bc6b33a 100644 --- a/tools/perf/tests/open-syscall.c +++ b/tools/perf/tests/openat-syscall.c @@ -3,11 +3,11 @@ #include "debug.h" #include "tests.h" -int test__open_syscall_event(void) +int test__openat_syscall_event(void) { int err = -1, fd; struct perf_evsel *evsel; - unsigned int nr_open_calls = 111, i; + unsigned int nr_openat_calls = 111, i; struct thread_map *threads = thread_map__new(-1, getpid(), UINT_MAX); char sbuf[STRERR_BUFSIZE]; @@ -16,9 +16,14 @@ int test__open_syscall_event(void) return -1; } - evsel = perf_evsel__newtp("syscalls", "sys_enter_open"); + evsel = perf_evsel__newtp("syscalls", "sys_enter_openat"); if (evsel == NULL) { - pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + if (tracefs_configured()) + pr_debug("is tracefs mounted on /sys/kernel/tracing?\n"); + else if (debugfs_configured()) + pr_debug("is debugfs mounted on /sys/kernel/debug?\n"); + else + pr_debug("Neither tracefs or debugfs is enabled in this kernel\n"); goto out_thread_map_delete; } @@ -29,8 +34,8 @@ int test__open_syscall_event(void) goto out_evsel_delete; } - for (i = 0; i < nr_open_calls; ++i) { - fd = open("/etc/passwd", O_RDONLY); + for (i = 0; i < nr_openat_calls; ++i) { + fd = openat(0, "/etc/passwd", O_RDONLY); close(fd); } @@ -39,9 +44,9 @@ int test__open_syscall_event(void) goto out_close_fd; } - if (evsel->counts->cpu[0].val != nr_open_calls) { + if (perf_counts(evsel->counts, 0, 0)->val != nr_openat_calls) { pr_debug("perf_evsel__read_on_cpu: expected to intercept %d calls, got %" PRIu64 "\n", - nr_open_calls, evsel->counts->cpu[0].val); + nr_openat_calls, perf_counts(evsel->counts, 0, 0)->val); goto out_close_fd; } @@ -51,6 +56,6 @@ out_close_fd: out_evsel_delete: perf_evsel__delete(evsel); out_thread_map_delete: - thread_map__delete(threads); + thread_map__put(threads); return err; } diff --git a/tools/perf/tests/parse-events.c b/tools/perf/tests/parse-events.c index 1cdab0ce00e2..d76963f7ad3d 100644 --- a/tools/perf/tests/parse-events.c +++ b/tools/perf/tests/parse-events.c @@ -3,6 +3,7 @@ #include "evsel.h" #include "evlist.h" #include <api/fs/fs.h> +#include <api/fs/tracefs.h> #include <api/fs/debugfs.h> #include "tests.h" #include "debug.h" @@ -294,6 +295,36 @@ static int test__checkevent_genhw_modifier(struct perf_evlist *evlist) return test__checkevent_genhw(evlist); } +static int test__checkevent_exclude_idle_modifier(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", !evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", !evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + +static int test__checkevent_exclude_idle_modifier_1(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel = perf_evlist__first(evlist); + + TEST_ASSERT_VAL("wrong exclude idle", evsel->attr.exclude_idle); + TEST_ASSERT_VAL("wrong exclude guest", !evsel->attr.exclude_guest); + TEST_ASSERT_VAL("wrong exclude host", evsel->attr.exclude_host); + TEST_ASSERT_VAL("wrong exclude_user", evsel->attr.exclude_user); + TEST_ASSERT_VAL("wrong exclude_kernel", !evsel->attr.exclude_kernel); + TEST_ASSERT_VAL("wrong exclude_hv", evsel->attr.exclude_hv); + TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); + + return test__checkevent_symbolic_name(evlist); +} + static int test__checkevent_breakpoint_modifier(struct perf_evlist *evlist) { struct perf_evsel *evsel = perf_evlist__first(evlist); @@ -396,7 +427,7 @@ static int test__checkevent_list(struct perf_evlist *evlist) TEST_ASSERT_VAL("wrong exclude_hv", !evsel->attr.exclude_hv); TEST_ASSERT_VAL("wrong precise_ip", !evsel->attr.precise_ip); - /* syscalls:sys_enter_open:k */ + /* syscalls:sys_enter_openat:k */ evsel = perf_evsel__next(evsel); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", @@ -634,7 +665,7 @@ static int test__group3(struct perf_evlist *evlist __maybe_unused) TEST_ASSERT_VAL("wrong number of entries", 5 == evlist->nr_entries); TEST_ASSERT_VAL("wrong number of groups", 2 == evlist->nr_groups); - /* group1 syscalls:sys_enter_open:H */ + /* group1 syscalls:sys_enter_openat:H */ evsel = leader = perf_evlist__first(evlist); TEST_ASSERT_VAL("wrong type", PERF_TYPE_TRACEPOINT == evsel->attr.type); TEST_ASSERT_VAL("wrong sample_type", @@ -1192,11 +1223,19 @@ static int count_tracepoints(void) { char events_path[PATH_MAX]; struct dirent *events_ent; + const char *mountpoint; DIR *events_dir; int cnt = 0; - scnprintf(events_path, PATH_MAX, "%s/tracing/events", - debugfs_find_mountpoint()); + mountpoint = tracefs_find_mountpoint(); + if (mountpoint) { + scnprintf(events_path, PATH_MAX, "%s/events", + mountpoint); + } else { + mountpoint = debugfs_find_mountpoint(); + scnprintf(events_path, PATH_MAX, "%s/tracing/events", + mountpoint); + } events_dir = opendir(events_path); @@ -1254,7 +1293,7 @@ struct evlist_test { static struct evlist_test test__events[] = { { - .name = "syscalls:sys_enter_open", + .name = "syscalls:sys_enter_openat", .check = test__checkevent_tracepoint, .id = 0, }, @@ -1314,7 +1353,7 @@ static struct evlist_test test__events[] = { .id = 11, }, { - .name = "syscalls:sys_enter_open:k", + .name = "syscalls:sys_enter_openat:k", .check = test__checkevent_tracepoint_modifier, .id = 12, }, @@ -1369,7 +1408,7 @@ static struct evlist_test test__events[] = { .id = 22, }, { - .name = "r1,syscalls:sys_enter_open:k,1:1:hp", + .name = "r1,syscalls:sys_enter_openat:k,1:1:hp", .check = test__checkevent_list, .id = 23, }, @@ -1404,7 +1443,7 @@ static struct evlist_test test__events[] = { .id = 29, }, { - .name = "group1{syscalls:sys_enter_open:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", + .name = "group1{syscalls:sys_enter_openat:H,cycles:kppp},group2{cycles,1:3}:G,instructions:u", .check = test__group3, .id = 30, }, @@ -1485,6 +1524,16 @@ static struct evlist_test test__events[] = { .id = 100, }, #endif + { + .name = "instructions:I", + .check = test__checkevent_exclude_idle_modifier, + .id = 45, + }, + { + .name = "instructions:kIG", + .check = test__checkevent_exclude_idle_modifier_1, + .id = 46, + }, }; static struct evlist_test test__events_pmu[] = { @@ -1522,7 +1571,7 @@ static int test_event(struct evlist_test *e) if (evlist == NULL) return -ENOMEM; - ret = parse_events(evlist, e->name); + ret = parse_events(evlist, e->name, NULL); if (ret) { pr_debug("failed to parse event '%s', err %d\n", e->name, ret); diff --git a/tools/perf/tests/perf-time-to-tsc.c b/tools/perf/tests/perf-time-to-tsc.c index f238442b238a..5f49484f1abc 100644 --- a/tools/perf/tests/perf-time-to-tsc.c +++ b/tools/perf/tests/perf-time-to-tsc.c @@ -68,7 +68,7 @@ int test__perf_time_to_tsc(void) perf_evlist__set_maps(evlist, cpus, threads); - CHECK__(parse_events(evlist, "cycles:u")); + CHECK__(parse_events(evlist, "cycles:u", NULL)); perf_evlist__config(evlist, &opts); diff --git a/tools/perf/tests/pmu.c b/tools/perf/tests/pmu.c index eeb68bb1972d..faa04e9d5d5f 100644 --- a/tools/perf/tests/pmu.c +++ b/tools/perf/tests/pmu.c @@ -152,7 +152,8 @@ int test__pmu(void) if (ret) break; - ret = perf_pmu__config_terms(&formats, &attr, terms, false); + ret = perf_pmu__config_terms(&formats, &attr, terms, + false, NULL); if (ret) break; diff --git a/tools/perf/tests/switch-tracking.c b/tools/perf/tests/switch-tracking.c index cc68648c7c55..e698742d4fec 100644 --- a/tools/perf/tests/switch-tracking.c +++ b/tools/perf/tests/switch-tracking.c @@ -347,7 +347,7 @@ int test__switch_tracking(void) perf_evlist__set_maps(evlist, cpus, threads); /* First event */ - err = parse_events(evlist, "cpu-clock:u"); + err = parse_events(evlist, "cpu-clock:u", NULL); if (err) { pr_debug("Failed to parse event dummy:u\n"); goto out_err; @@ -356,7 +356,7 @@ int test__switch_tracking(void) cpu_clocks_evsel = perf_evlist__last(evlist); /* Second event */ - err = parse_events(evlist, "cycles:u"); + err = parse_events(evlist, "cycles:u", NULL); if (err) { pr_debug("Failed to parse event cycles:u\n"); goto out_err; @@ -371,7 +371,7 @@ int test__switch_tracking(void) goto out; } - err = parse_events(evlist, sched_switch); + err = parse_events(evlist, sched_switch, NULL); if (err) { pr_debug("Failed to parse event %s\n", sched_switch); goto out_err; @@ -401,7 +401,7 @@ int test__switch_tracking(void) perf_evsel__set_sample_bit(cycles_evsel, TIME); /* Fourth event */ - err = parse_events(evlist, "dummy:u"); + err = parse_events(evlist, "dummy:u", NULL); if (err) { pr_debug("Failed to parse event dummy:u\n"); goto out_err; @@ -560,8 +560,8 @@ out: perf_evlist__disable(evlist); perf_evlist__delete(evlist); } else { - cpu_map__delete(cpus); - thread_map__delete(threads); + cpu_map__put(cpus); + thread_map__put(threads); } return err; diff --git a/tools/perf/tests/tests.h b/tools/perf/tests/tests.h index 00e776a87a9c..ebb47d96bc0b 100644 --- a/tools/perf/tests/tests.h +++ b/tools/perf/tests/tests.h @@ -9,6 +9,15 @@ do { \ } \ } while (0) +#define TEST_ASSERT_EQUAL(text, val, expected) \ +do { \ + if (val != expected) { \ + pr_debug("FAILED %s:%d %s (%d != %d)\n", \ + __FILE__, __LINE__, text, val, expected); \ + return -1; \ + } \ +} while (0) + enum { TEST_OK = 0, TEST_FAIL = -1, @@ -17,14 +26,14 @@ enum { /* Tests */ int test__vmlinux_matches_kallsyms(void); -int test__open_syscall_event(void); -int test__open_syscall_event_on_all_cpus(void); +int test__openat_syscall_event(void); +int test__openat_syscall_event_on_all_cpus(void); int test__basic_mmap(void); int test__PERF_RECORD(void); int test__rdpmc(void); int test__perf_evsel__roundtrip_name_test(void); int test__perf_evsel__tp_sched_test(void); -int test__syscall_open_tp_fields(void); +int test__syscall_openat_tp_fields(void); int test__pmu(void); int test__attr(void); int test__dso_data(void); @@ -51,8 +60,10 @@ int test__hists_cumulate(void); int test__switch_tracking(void); int test__fdarray__filter(void); int test__fdarray__add(void); +int test__kmod_path__parse(void); +int test__thread_map(void); -#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) +#if defined(__x86_64__) || defined(__i386__) || defined(__arm__) || defined(__aarch64__) #ifdef HAVE_DWARF_UNWIND_SUPPORT struct thread; struct perf_sample; diff --git a/tools/perf/tests/thread-map.c b/tools/perf/tests/thread-map.c new file mode 100644 index 000000000000..5acf000939ea --- /dev/null +++ b/tools/perf/tests/thread-map.c @@ -0,0 +1,38 @@ +#include <sys/types.h> +#include <unistd.h> +#include "tests.h" +#include "thread_map.h" +#include "debug.h" + +int test__thread_map(void) +{ + struct thread_map *map; + + /* test map on current pid */ + map = thread_map__new_by_pid(getpid()); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", + thread_map__pid(map, 0) == getpid()); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "perf")); + thread_map__put(map); + + /* test dummy pid */ + map = thread_map__new_dummy(); + TEST_ASSERT_VAL("failed to alloc map", map); + + thread_map__read_comms(map); + + TEST_ASSERT_VAL("wrong nr", map->nr == 1); + TEST_ASSERT_VAL("wrong pid", thread_map__pid(map, 0) == -1); + TEST_ASSERT_VAL("wrong comm", + thread_map__comm(map, 0) && + !strcmp(thread_map__comm(map, 0), "dummy")); + thread_map__put(map); + return 0; +} diff --git a/tools/perf/tests/thread-mg-share.c b/tools/perf/tests/thread-mg-share.c index b028499dd3cf..01fabb19d746 100644 --- a/tools/perf/tests/thread-mg-share.c +++ b/tools/perf/tests/thread-mg-share.c @@ -43,7 +43,7 @@ int test__thread_mg_share(void) leader && t1 && t2 && t3 && other); mg = leader->mg; - TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 4); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 4); /* test the map groups pointer is shared */ TEST_ASSERT_VAL("map groups don't match", mg == t1->mg); @@ -58,33 +58,40 @@ int test__thread_mg_share(void) other_leader = machine__find_thread(machine, 4, 4); TEST_ASSERT_VAL("failed to find other leader", other_leader); + /* + * Ok, now that all the rbtree related operations were done, + * lets remove all of them from there so that we can do the + * refcounting tests. + */ + machine__remove_thread(machine, leader); + machine__remove_thread(machine, t1); + machine__remove_thread(machine, t2); + machine__remove_thread(machine, t3); + machine__remove_thread(machine, other); + machine__remove_thread(machine, other_leader); + other_mg = other->mg; - TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 2); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 2); TEST_ASSERT_VAL("map groups don't match", other_mg == other_leader->mg); /* release thread group */ - thread__delete(leader); - TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 3); + thread__put(leader); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 3); - thread__delete(t1); - TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 2); + thread__put(t1); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 2); - thread__delete(t2); - TEST_ASSERT_VAL("wrong refcnt", mg->refcnt == 1); + thread__put(t2); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&mg->refcnt), 1); - thread__delete(t3); + thread__put(t3); /* release other group */ - thread__delete(other_leader); - TEST_ASSERT_VAL("wrong refcnt", other_mg->refcnt == 1); + thread__put(other_leader); + TEST_ASSERT_EQUAL("wrong refcnt", atomic_read(&other_mg->refcnt), 1); - thread__delete(other); - - /* - * Cannot call machine__delete_threads(machine) now, - * because we've already released all the threads. - */ + thread__put(other); machines__exit(&machines); return 0; diff --git a/tools/perf/tests/vmlinux-kallsyms.c b/tools/perf/tests/vmlinux-kallsyms.c index 3d9088003a5b..b34c5fc829ae 100644 --- a/tools/perf/tests/vmlinux-kallsyms.c +++ b/tools/perf/tests/vmlinux-kallsyms.c @@ -23,9 +23,10 @@ int test__vmlinux_matches_kallsyms(void) int err = -1; struct rb_node *nd; struct symbol *sym; - struct map *kallsyms_map, *vmlinux_map; + struct map *kallsyms_map, *vmlinux_map, *map; struct machine kallsyms, vmlinux; enum map_type type = MAP__FUNCTION; + struct maps *maps = &vmlinux.kmaps.maps[type]; u64 mem_start, mem_end; /* @@ -184,8 +185,8 @@ detour: pr_info("Maps only in vmlinux:\n"); - for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node), *pair; + for (map = maps__first(maps); map; map = map__next(map)) { + struct map * /* * If it is the kernel, kallsyms is always "[kernel.kallsyms]", while * the kernel will have the path for the vmlinux file being used, @@ -193,22 +194,22 @@ detour: * both cases. */ pair = map_groups__find_by_name(&kallsyms.kmaps, type, - (pos->dso->kernel ? - pos->dso->short_name : - pos->dso->name)); + (map->dso->kernel ? + map->dso->short_name : + map->dso->name)); if (pair) pair->priv = 1; else - map__fprintf(pos, stderr); + map__fprintf(map, stderr); } pr_info("Maps in vmlinux with a different name in kallsyms:\n"); - for (nd = rb_first(&vmlinux.kmaps.maps[type]); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node), *pair; + for (map = maps__first(maps); map; map = map__next(map)) { + struct map *pair; - mem_start = vmlinux_map->unmap_ip(vmlinux_map, pos->start); - mem_end = vmlinux_map->unmap_ip(vmlinux_map, pos->end); + mem_start = vmlinux_map->unmap_ip(vmlinux_map, map->start); + mem_end = vmlinux_map->unmap_ip(vmlinux_map, map->end); pair = map_groups__find(&kallsyms.kmaps, type, mem_start); if (pair == NULL || pair->priv) @@ -217,7 +218,7 @@ detour: if (pair->start == mem_start) { pair->priv = 1; pr_info(" %" PRIx64 "-%" PRIx64 " %" PRIx64 " %s in kallsyms as", - pos->start, pos->end, pos->pgoff, pos->dso->name); + map->start, map->end, map->pgoff, map->dso->name); if (mem_end != pair->end) pr_info(":\n*%" PRIx64 "-%" PRIx64 " %" PRIx64, pair->start, pair->end, pair->pgoff); @@ -228,12 +229,11 @@ detour: pr_info("Maps only in kallsyms:\n"); - for (nd = rb_first(&kallsyms.kmaps.maps[type]); - nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node); + maps = &kallsyms.kmaps.maps[type]; - if (!pos->priv) - map__fprintf(pos, stderr); + for (map = maps__first(maps); map; map = map__next(map)) { + if (!map->priv) + map__fprintf(map, stderr); } out: machine__exit(&kallsyms); diff --git a/tools/perf/ui/Build b/tools/perf/ui/Build new file mode 100644 index 000000000000..0a73538c0441 --- /dev/null +++ b/tools/perf/ui/Build @@ -0,0 +1,14 @@ +libperf-y += setup.o +libperf-y += helpline.o +libperf-y += progress.o +libperf-y += util.o +libperf-y += hist.o +libperf-y += stdio/hist.o + +CFLAGS_setup.o += -DLIBDIR="BUILD_STR($(LIBDIR))" + +libperf-$(CONFIG_SLANG) += browser.o +libperf-$(CONFIG_SLANG) += browsers/ +libperf-$(CONFIG_SLANG) += tui/ + +CFLAGS_browser.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/Build b/tools/perf/ui/browsers/Build new file mode 100644 index 000000000000..de223f5bed58 --- /dev/null +++ b/tools/perf/ui/browsers/Build @@ -0,0 +1,10 @@ +libperf-y += annotate.o +libperf-y += hists.o +libperf-y += map.o +libperf-y += scripts.o +libperf-y += header.o + +CFLAGS_annotate.o += -DENABLE_SLFUTURE_CONST +CFLAGS_hists.o += -DENABLE_SLFUTURE_CONST +CFLAGS_map.o += -DENABLE_SLFUTURE_CONST +CFLAGS_scripts.o += -DENABLE_SLFUTURE_CONST diff --git a/tools/perf/ui/browsers/annotate.c b/tools/perf/ui/browsers/annotate.c index 9d32e3c0cfee..5995a8bd7c69 100644 --- a/tools/perf/ui/browsers/annotate.c +++ b/tools/perf/ui/browsers/annotate.c @@ -11,16 +11,21 @@ #include "../../util/evsel.h" #include <pthread.h> +struct disasm_line_samples { + double percent; + u64 nr; +}; + struct browser_disasm_line { - struct rb_node rb_node; - u32 idx; - int idx_asm; - int jump_sources; + struct rb_node rb_node; + u32 idx; + int idx_asm; + int jump_sources; /* * actual length of this array is saved on the nr_events field * of the struct annotate_browser */ - double percent[1]; + struct disasm_line_samples samples[1]; }; static struct annotate_browser_opt { @@ -28,7 +33,8 @@ static struct annotate_browser_opt { use_offset, jump_arrows, show_linenr, - show_nr_jumps; + show_nr_jumps, + show_total_period; } annotate_browser__opts = { .use_offset = true, .jump_arrows = true, @@ -105,15 +111,20 @@ static void annotate_browser__write(struct ui_browser *browser, void *entry, int char bf[256]; for (i = 0; i < ab->nr_events; i++) { - if (bdl->percent[i] > percent_max) - percent_max = bdl->percent[i]; + if (bdl->samples[i].percent > percent_max) + percent_max = bdl->samples[i].percent; } if (dl->offset != -1 && percent_max != 0.0) { for (i = 0; i < ab->nr_events; i++) { - ui_browser__set_percent_color(browser, bdl->percent[i], + ui_browser__set_percent_color(browser, + bdl->samples[i].percent, current_entry); - slsmg_printf("%6.2f ", bdl->percent[i]); + if (annotate_browser__opts.show_total_period) + slsmg_printf("%6" PRIu64 " ", + bdl->samples[i].nr); + else + slsmg_printf("%6.2f ", bdl->samples[i].percent); } } else { ui_browser__set_percent_color(browser, 0, current_entry); @@ -273,9 +284,9 @@ static int disasm__cmp(struct browser_disasm_line *a, int i; for (i = 0; i < nr_pcnt; i++) { - if (a->percent[i] == b->percent[i]) + if (a->samples[i].percent == b->samples[i].percent) continue; - return a->percent[i] < b->percent[i]; + return a->samples[i].percent < b->samples[i].percent; } return 0; } @@ -366,14 +377,17 @@ static void annotate_browser__calc_percent(struct annotate_browser *browser, next = disasm__get_next_ip_line(¬es->src->source, pos); for (i = 0; i < browser->nr_events; i++) { - bpos->percent[i] = disasm__calc_percent(notes, + u64 nr_samples; + + bpos->samples[i].percent = disasm__calc_percent(notes, evsel->idx + i, pos->offset, next ? next->offset : len, - &path); + &path, &nr_samples); + bpos->samples[i].nr = nr_samples; - if (max_percent < bpos->percent[i]) - max_percent = bpos->percent[i]; + if (max_percent < bpos->samples[i].percent) + max_percent = bpos->samples[i].percent; } if (max_percent < 0.01) { @@ -737,6 +751,7 @@ static int annotate_browser__run(struct annotate_browser *browser, "n Search next string\n" "o Toggle disassembler output/simplified view\n" "s Toggle source code view\n" + "t Toggle total period view\n" "/ Search string\n" "k Toggle line numbers\n" "r Run available scripts\n" @@ -812,6 +827,11 @@ show_sup_ins: ui_helpline__puts("Actions are only available for 'callq', 'retq' & jump instructions."); } continue; + case 't': + annotate_browser__opts.show_total_period = + !annotate_browser__opts.show_total_period; + annotate_browser__update_addr_width(browser); + continue; case K_LEFT: case K_ESC: case 'q': @@ -829,10 +849,24 @@ out: return key; } +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt) +{ + /* Set default value for show_total_period. */ + annotate_browser__opts.show_total_period = + symbol_conf.show_total_period; + + return symbol__tui_annotate(ms->sym, ms->map, evsel, hbt); +} + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt) { - return symbol__tui_annotate(he->ms.sym, he->ms.map, evsel, hbt); + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + + return map_symbol__tui_annotate(&he->ms, evsel, hbt); } static void annotate_browser__mark_jump_targets(struct annotate_browser *browser, @@ -919,7 +953,8 @@ int symbol__tui_annotate(struct symbol *sym, struct map *map, if (perf_evsel__is_group_event(evsel)) { nr_pcnt = evsel->nr_members; - sizeof_bdl += sizeof(double) * (nr_pcnt - 1); + sizeof_bdl += sizeof(struct disasm_line_samples) * + (nr_pcnt - 1); } if (symbol__annotate(sym, map, sizeof_bdl) < 0) { @@ -996,6 +1031,7 @@ static struct annotate_config { ANNOTATE_CFG(show_linenr), ANNOTATE_CFG(show_nr_jumps), ANNOTATE_CFG(use_offset), + ANNOTATE_CFG(show_total_period), }; #undef ANNOTATE_CFG diff --git a/tools/perf/ui/browsers/hists.c b/tools/perf/ui/browsers/hists.c index 788506eef567..fa67613976a8 100644 --- a/tools/perf/ui/browsers/hists.c +++ b/tools/perf/ui/browsers/hists.c @@ -25,6 +25,9 @@ struct hist_browser { struct hists *hists; struct hist_entry *he_selection; struct map_symbol *selection; + struct hist_browser_timer *hbt; + struct pstack *pstack; + struct perf_session_env *env; int print_seq; bool show_dso; bool show_headers; @@ -45,7 +48,25 @@ static struct rb_node *hists__filter_entries(struct rb_node *nd, static bool hist_browser__has_filter(struct hist_browser *hb) { - return hists__has_filter(hb->hists) || hb->min_pcnt; + return hists__has_filter(hb->hists) || hb->min_pcnt || symbol_conf.has_filter; +} + +static int hist_browser__get_folding(struct hist_browser *browser) +{ + struct rb_node *nd; + struct hists *hists = browser->hists; + int unfolded_rows = 0; + + for (nd = rb_first(&hists->entries); + (nd = hists__filter_entries(nd, browser->min_pcnt)) != NULL; + nd = rb_next(nd)) { + struct hist_entry *he = + rb_entry(nd, struct hist_entry, rb_node); + + if (he->unfolded) + unfolded_rows += he->nr_rows; + } + return unfolded_rows; } static u32 hist_browser__nr_entries(struct hist_browser *hb) @@ -57,6 +78,7 @@ static u32 hist_browser__nr_entries(struct hist_browser *hb) else nr_entries = hb->hists->nr_entries; + hb->nr_callchain_rows = hist_browser__get_folding(hb); return nr_entries + hb->nr_callchain_rows; } @@ -117,24 +139,19 @@ static char tree__folded_sign(bool unfolded) return unfolded ? '-' : '+'; } -static char map_symbol__folded(const struct map_symbol *ms) -{ - return ms->has_children ? tree__folded_sign(ms->unfolded) : ' '; -} - static char hist_entry__folded(const struct hist_entry *he) { - return map_symbol__folded(&he->ms); + return he->has_children ? tree__folded_sign(he->unfolded) : ' '; } static char callchain_list__folded(const struct callchain_list *cl) { - return map_symbol__folded(&cl->ms); + return cl->has_children ? tree__folded_sign(cl->unfolded) : ' '; } -static void map_symbol__set_folding(struct map_symbol *ms, bool unfold) +static void callchain_list__set_folding(struct callchain_list *cl, bool unfold) { - ms->unfolded = unfold ? ms->has_children : false; + cl->unfolded = unfold ? cl->has_children : false; } static int callchain_node__count_rows_rb_tree(struct callchain_node *node) @@ -170,7 +187,7 @@ static int callchain_node__count_rows(struct callchain_node *node) list_for_each_entry(chain, &node->val, list) { ++n; - unfolded = chain->ms.unfolded; + unfolded = chain->unfolded; } if (unfolded) @@ -192,15 +209,27 @@ static int callchain__count_rows(struct rb_root *chain) return n; } -static bool map_symbol__toggle_fold(struct map_symbol *ms) +static bool hist_entry__toggle_fold(struct hist_entry *he) +{ + if (!he) + return false; + + if (!he->has_children) + return false; + + he->unfolded = !he->unfolded; + return true; +} + +static bool callchain_list__toggle_fold(struct callchain_list *cl) { - if (!ms) + if (!cl) return false; - if (!ms->has_children) + if (!cl->has_children) return false; - ms->unfolded = !ms->unfolded; + cl->unfolded = !cl->unfolded; return true; } @@ -216,10 +245,10 @@ static void callchain_node__init_have_children_rb_tree(struct callchain_node *no list_for_each_entry(chain, &child->val, list) { if (first) { first = false; - chain->ms.has_children = chain->list.next != &child->val || + chain->has_children = chain->list.next != &child->val || !RB_EMPTY_ROOT(&child->rb_root); } else - chain->ms.has_children = chain->list.next == &child->val && + chain->has_children = chain->list.next == &child->val && !RB_EMPTY_ROOT(&child->rb_root); } @@ -233,11 +262,11 @@ static void callchain_node__init_have_children(struct callchain_node *node, struct callchain_list *chain; chain = list_entry(node->val.next, struct callchain_list, list); - chain->ms.has_children = has_sibling; + chain->has_children = has_sibling; if (!list_empty(&node->val)) { chain = list_entry(node->val.prev, struct callchain_list, list); - chain->ms.has_children = !RB_EMPTY_ROOT(&node->rb_root); + chain->has_children = !RB_EMPTY_ROOT(&node->rb_root); } callchain_node__init_have_children_rb_tree(node); @@ -257,7 +286,7 @@ static void callchain__init_have_children(struct rb_root *root) static void hist_entry__init_have_children(struct hist_entry *he) { if (!he->init_have_children) { - he->ms.has_children = !RB_EMPTY_ROOT(&he->sorted_chain); + he->has_children = !RB_EMPTY_ROOT(&he->sorted_chain); callchain__init_have_children(&he->sorted_chain); he->init_have_children = true; } @@ -265,14 +294,22 @@ static void hist_entry__init_have_children(struct hist_entry *he) static bool hist_browser__toggle_fold(struct hist_browser *browser) { - if (map_symbol__toggle_fold(browser->selection)) { - struct hist_entry *he = browser->he_selection; + struct hist_entry *he = browser->he_selection; + struct map_symbol *ms = browser->selection; + struct callchain_list *cl = container_of(ms, struct callchain_list, ms); + bool has_children; + + if (ms == &he->ms) + has_children = hist_entry__toggle_fold(he); + else + has_children = callchain_list__toggle_fold(cl); + if (has_children) { hist_entry__init_have_children(he); browser->b.nr_entries -= he->nr_rows; browser->nr_callchain_rows -= he->nr_rows; - if (he->ms.unfolded) + if (he->unfolded) he->nr_rows = callchain__count_rows(&he->sorted_chain); else he->nr_rows = 0; @@ -299,8 +336,8 @@ static int callchain_node__set_folding_rb_tree(struct callchain_node *node, bool list_for_each_entry(chain, &child->val, list) { ++n; - map_symbol__set_folding(&chain->ms, unfold); - has_children = chain->ms.has_children; + callchain_list__set_folding(chain, unfold); + has_children = chain->has_children; } if (has_children) @@ -318,8 +355,8 @@ static int callchain_node__set_folding(struct callchain_node *node, bool unfold) list_for_each_entry(chain, &node->val, list) { ++n; - map_symbol__set_folding(&chain->ms, unfold); - has_children = chain->ms.has_children; + callchain_list__set_folding(chain, unfold); + has_children = chain->has_children; } if (has_children) @@ -344,9 +381,9 @@ static int callchain__set_folding(struct rb_root *chain, bool unfold) static void hist_entry__set_folding(struct hist_entry *he, bool unfold) { hist_entry__init_have_children(he); - map_symbol__set_folding(&he->ms, unfold); + he->unfolded = unfold ? he->has_children : false; - if (he->ms.has_children) { + if (he->has_children) { int n = callchain__set_folding(&he->sorted_chain, unfold); he->nr_rows = unfold ? n : 0; } else @@ -387,11 +424,11 @@ static void ui_browser__warn_lost_events(struct ui_browser *browser) "Or reduce the sampling frequency."); } -static int hist_browser__run(struct hist_browser *browser, - struct hist_browser_timer *hbt) +static int hist_browser__run(struct hist_browser *browser, const char *help) { int key; char title[160]; + struct hist_browser_timer *hbt = browser->hbt; int delay_secs = hbt ? hbt->refresh : 0; browser->b.entries = &browser->hists->entries; @@ -399,8 +436,7 @@ static int hist_browser__run(struct hist_browser *browser, hists__browser_title(browser->hists, hbt, title, sizeof(title)); - if (ui_browser__show(&browser->b, title, - "Press '?' for help on key bindings") < 0) + if (ui_browser__show(&browser->b, title, help) < 0) return -1; while (1) { @@ -492,6 +528,7 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, { int color, width; char folded_sign = callchain_list__folded(chain); + bool show_annotated = browser->show_dso && chain->ms.sym && symbol__annotation(chain->ms.sym)->src; color = HE_COLORSET_NORMAL; width = browser->b.width - (offset + 2); @@ -504,7 +541,8 @@ static void hist_browser__show_callchain_entry(struct hist_browser *browser, ui_browser__set_color(&browser->b, color); hist_browser__gotorc(browser, row, 0); slsmg_write_nstring(" ", offset); - slsmg_printf("%c ", folded_sign); + slsmg_printf("%c", folded_sign); + ui_browser__write_graph(&browser->b, show_annotated ? SLSMG_RARROW_CHAR : ' '); slsmg_write_nstring(str, width); } @@ -995,7 +1033,7 @@ do_offset: if (offset > 0) { do { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) { + if (h->unfolded) { u16 remaining = h->nr_rows - h->row_offset; if (offset > remaining) { offset -= remaining; @@ -1016,7 +1054,7 @@ do_offset: } else if (offset < 0) { while (1) { h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) { + if (h->unfolded) { if (first) { if (-offset > h->row_offset) { offset += h->row_offset; @@ -1053,7 +1091,7 @@ do_offset: * row_offset at its last entry. */ h = rb_entry(nd, struct hist_entry, rb_node); - if (h->ms.unfolded) + if (h->unfolded) h->row_offset = h->nr_rows; break; } @@ -1174,7 +1212,9 @@ static int hist_browser__dump(struct hist_browser *browser) return 0; } -static struct hist_browser *hist_browser__new(struct hists *hists) +static struct hist_browser *hist_browser__new(struct hists *hists, + struct hist_browser_timer *hbt, + struct perf_session_env *env) { struct hist_browser *browser = zalloc(sizeof(*browser)); @@ -1185,6 +1225,8 @@ static struct hist_browser *hist_browser__new(struct hists *hists) browser->b.seek = ui_browser__hists_seek; browser->b.use_navkeypressed = true; browser->show_headers = symbol_conf.show_hist_headers; + browser->hbt = hbt; + browser->env = env; } return browser; @@ -1374,6 +1416,257 @@ close_file_and_continue: return ret; } +struct popup_action { + struct thread *thread; + struct dso *dso; + struct map_symbol ms; + + int (*fn)(struct hist_browser *browser, struct popup_action *act); +}; + +static int +do_annotate(struct hist_browser *browser, struct popup_action *act) +{ + struct perf_evsel *evsel; + struct annotation *notes; + struct hist_entry *he; + int err; + + if (!objdump_path && perf_session_env__lookup_objdump(browser->env)) + return 0; + + notes = symbol__annotation(act->ms.sym); + if (!notes->src) + return 0; + + evsel = hists_to_evsel(browser->hists); + err = map_symbol__tui_annotate(&act->ms, evsel, browser->hbt); + he = hist_browser__selected_entry(browser); + /* + * offer option to annotate the other branch source or target + * (if they exists) when returning from annotate + */ + if ((err == 'q' || err == CTRL('c')) && he->branch_info) + return 1; + + ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); + if (err) + ui_browser__handle_resize(&browser->b); + return 0; +} + +static int +add_annotate_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr, + struct map *map, struct symbol *sym) +{ + if (sym == NULL || map->dso->annotate_warned) + return 0; + + if (asprintf(optstr, "Annotate %s", sym->name) < 0) + return 0; + + act->ms.map = map; + act->ms.sym = sym; + act->fn = do_annotate; + return 1; +} + +static int +do_zoom_thread(struct hist_browser *browser, struct popup_action *act) +{ + struct thread *thread = act->thread; + + if (browser->hists->thread_filter) { + pstack__remove(browser->pstack, &browser->hists->thread_filter); + perf_hpp__set_elide(HISTC_THREAD, false); + thread__zput(browser->hists->thread_filter); + ui_helpline__pop(); + } else { + ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid); + browser->hists->thread_filter = thread__get(thread); + perf_hpp__set_elide(HISTC_THREAD, false); + pstack__push(browser->pstack, &browser->hists->thread_filter); + } + + hists__filter_by_thread(browser->hists); + hist_browser__reset(browser); + return 0; +} + +static int +add_thread_opt(struct hist_browser *browser, struct popup_action *act, + char **optstr, struct thread *thread) +{ + if (thread == NULL) + return 0; + + if (asprintf(optstr, "Zoom %s %s(%d) thread", + browser->hists->thread_filter ? "out of" : "into", + thread->comm_set ? thread__comm_str(thread) : "", + thread->tid) < 0) + return 0; + + act->thread = thread; + act->fn = do_zoom_thread; + return 1; +} + +static int +do_zoom_dso(struct hist_browser *browser, struct popup_action *act) +{ + struct dso *dso = act->dso; + + if (browser->hists->dso_filter) { + pstack__remove(browser->pstack, &browser->hists->dso_filter); + perf_hpp__set_elide(HISTC_DSO, false); + browser->hists->dso_filter = NULL; + ui_helpline__pop(); + } else { + if (dso == NULL) + return 0; + ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", + dso->kernel ? "the Kernel" : dso->short_name); + browser->hists->dso_filter = dso; + perf_hpp__set_elide(HISTC_DSO, true); + pstack__push(browser->pstack, &browser->hists->dso_filter); + } + + hists__filter_by_dso(browser->hists); + hist_browser__reset(browser); + return 0; +} + +static int +add_dso_opt(struct hist_browser *browser, struct popup_action *act, + char **optstr, struct dso *dso) +{ + if (dso == NULL) + return 0; + + if (asprintf(optstr, "Zoom %s %s DSO", + browser->hists->dso_filter ? "out of" : "into", + dso->kernel ? "the Kernel" : dso->short_name) < 0) + return 0; + + act->dso = dso; + act->fn = do_zoom_dso; + return 1; +} + +static int +do_browse_map(struct hist_browser *browser __maybe_unused, + struct popup_action *act) +{ + map__browse(act->ms.map); + return 0; +} + +static int +add_map_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr, struct map *map) +{ + if (map == NULL) + return 0; + + if (asprintf(optstr, "Browse map details") < 0) + return 0; + + act->ms.map = map; + act->fn = do_browse_map; + return 1; +} + +static int +do_run_script(struct hist_browser *browser __maybe_unused, + struct popup_action *act) +{ + char script_opt[64]; + memset(script_opt, 0, sizeof(script_opt)); + + if (act->thread) { + scnprintf(script_opt, sizeof(script_opt), " -c %s ", + thread__comm_str(act->thread)); + } else if (act->ms.sym) { + scnprintf(script_opt, sizeof(script_opt), " -S %s ", + act->ms.sym->name); + } + + script_browse(script_opt); + return 0; +} + +static int +add_script_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr, + struct thread *thread, struct symbol *sym) +{ + if (thread) { + if (asprintf(optstr, "Run scripts for samples of thread [%s]", + thread__comm_str(thread)) < 0) + return 0; + } else if (sym) { + if (asprintf(optstr, "Run scripts for samples of symbol [%s]", + sym->name) < 0) + return 0; + } else { + if (asprintf(optstr, "Run scripts for all samples") < 0) + return 0; + } + + act->thread = thread; + act->ms.sym = sym; + act->fn = do_run_script; + return 1; +} + +static int +do_switch_data(struct hist_browser *browser __maybe_unused, + struct popup_action *act __maybe_unused) +{ + if (switch_data_file()) { + ui__warning("Won't switch the data files due to\n" + "no valid data file get selected!\n"); + return 0; + } + + return K_SWITCH_INPUT_DATA; +} + +static int +add_switch_opt(struct hist_browser *browser, + struct popup_action *act, char **optstr) +{ + if (!is_report_browser(browser->hbt)) + return 0; + + if (asprintf(optstr, "Switch to another data file in PWD") < 0) + return 0; + + act->fn = do_switch_data; + return 1; +} + +static int +do_exit_browser(struct hist_browser *browser __maybe_unused, + struct popup_action *act __maybe_unused) +{ + return 0; +} + +static int +add_exit_opt(struct hist_browser *browser __maybe_unused, + struct popup_action *act, char **optstr) +{ + if (asprintf(optstr, "Exit") < 0) + return 0; + + act->fn = do_exit_browser; + return 1; +} + static void hist_browser__update_nr_entries(struct hist_browser *hb) { u64 nr_entries = 0; @@ -1400,14 +1693,14 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, struct perf_session_env *env) { struct hists *hists = evsel__hists(evsel); - struct hist_browser *browser = hist_browser__new(hists); + struct hist_browser *browser = hist_browser__new(hists, hbt, env); struct branch_info *bi; - struct pstack *fstack; - char *options[16]; +#define MAX_OPTIONS 16 + char *options[MAX_OPTIONS]; + struct popup_action actions[MAX_OPTIONS]; int nr_options = 0; int key = -1; char buf[64]; - char script_opt[64]; int delay_secs = hbt ? hbt->refresh : 0; struct perf_hpp_fmt *fmt; @@ -1442,23 +1735,29 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, "t Zoom into current Thread\n" "V Verbose (DSO names in callchains, etc)\n" "z Toggle zeroing of samples\n" + "f Enable/Disable events\n" "/ Filter symbol by name"; if (browser == NULL) return -1; + /* reset abort key so that it can get Ctrl-C as a key */ + SLang_reset_tty(); + SLang_init_tty(0, 0, 0); + if (min_pcnt) { browser->min_pcnt = min_pcnt; hist_browser__update_nr_entries(browser); } - fstack = pstack__new(2); - if (fstack == NULL) + browser->pstack = pstack__new(2); + if (browser->pstack == NULL) goto out; ui_helpline__push(helpline); memset(options, 0, sizeof(options)); + memset(actions, 0, sizeof(actions)); perf_hpp__for_each_format(fmt) perf_hpp__reset_width(fmt, hists); @@ -1467,17 +1766,13 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, perf_hpp__set_user_width(symbol_conf.col_width_list_str); while (1) { - const struct thread *thread = NULL; - const struct dso *dso = NULL; - int choice = 0, - annotate = -2, zoom_dso = -2, zoom_thread = -2, - annotate_f = -2, annotate_t = -2, browse_map = -2; - int scripts_comm = -2, scripts_symbol = -2, - scripts_all = -2, switch_data = -2; + struct thread *thread = NULL; + struct dso *dso = NULL; + int choice = 0; nr_options = 0; - key = hist_browser__run(browser, hbt); + key = hist_browser__run(browser, helpline); if (browser->he_selection != NULL) { thread = hist_browser__selected_thread(browser); @@ -1505,17 +1800,25 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, browser->selection->sym == NULL || browser->selection->map->dso->annotate_warned) continue; - goto do_annotate; + + actions->ms.map = browser->selection->map; + actions->ms.sym = browser->selection->sym; + do_annotate(browser, actions); + continue; case 'P': hist_browser__dump(browser); continue; case 'd': - goto zoom_dso; + actions->dso = dso; + do_zoom_dso(browser, actions); + continue; case 'V': browser->show_dso = !browser->show_dso; continue; case 't': - goto zoom_thread; + actions->thread = thread; + do_zoom_thread(browser, actions); + continue; case '/': if (ui_browser__input_window("Symbol to show", "Please enter the name of symbol you want to see", @@ -1527,12 +1830,18 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, } continue; case 'r': - if (is_report_browser(hbt)) - goto do_scripts; + if (is_report_browser(hbt)) { + actions->thread = NULL; + actions->ms.sym = NULL; + do_run_script(browser, actions); + } continue; case 's': - if (is_report_browser(hbt)) - goto do_data_switch; + if (is_report_browser(hbt)) { + key = do_switch_data(browser, actions); + if (key == K_SWITCH_INPUT_DATA) + goto out_free_stack; + } continue; case 'i': /* env->arch is NULL for live-mode (i.e. perf top) */ @@ -1562,7 +1871,7 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case K_LEFT: { const void *top; - if (pstack__empty(fstack)) { + if (pstack__empty(browser->pstack)) { /* * Go back to the perf_evsel_menu__run or other user */ @@ -1570,11 +1879,17 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, goto out_free_stack; continue; } - top = pstack__pop(fstack); - if (top == &browser->hists->dso_filter) - goto zoom_out_dso; + top = pstack__peek(browser->pstack); + if (top == &browser->hists->dso_filter) { + /* + * No need to set actions->dso here since + * it's just to remove the current filter. + * Ditto for thread below. + */ + do_zoom_dso(browser, actions); + } if (top == &browser->hists->thread_filter) - goto zoom_out_thread; + do_zoom_thread(browser, actions); continue; } case K_ESC: @@ -1586,217 +1901,107 @@ static int perf_evsel__hists_browse(struct perf_evsel *evsel, int nr_events, case 'q': case CTRL('c'): goto out_free_stack; + case 'f': + if (!is_report_browser(hbt)) { + struct perf_top *top = hbt->arg; + + perf_evlist__toggle_enable(top->evlist); + /* + * No need to refresh, resort/decay histogram + * entries if we are not collecting samples: + */ + if (top->evlist->enabled) { + helpline = "Press 'f' to disable the events or 'h' to see other hotkeys"; + hbt->refresh = delay_secs; + } else { + helpline = "Press 'f' again to re-enable the events"; + hbt->refresh = 0; + } + continue; + } + /* Fall thru */ default: + helpline = "Press '?' for help on key bindings"; continue; } if (!sort__has_sym) goto add_exit_option; + if (browser->selection == NULL) + goto skip_annotation; + if (sort__mode == SORT_MODE__BRANCH) { bi = browser->he_selection->branch_info; - if (browser->selection != NULL && - bi && - bi->from.sym != NULL && - !bi->from.map->dso->annotate_warned && - asprintf(&options[nr_options], "Annotate %s", - bi->from.sym->name) > 0) - annotate_f = nr_options++; - - if (browser->selection != NULL && - bi && - bi->to.sym != NULL && - !bi->to.map->dso->annotate_warned && - (bi->to.sym != bi->from.sym || - bi->to.map->dso != bi->from.map->dso) && - asprintf(&options[nr_options], "Annotate %s", - bi->to.sym->name) > 0) - annotate_t = nr_options++; - } else { - if (browser->selection != NULL && - browser->selection->sym != NULL && - !browser->selection->map->dso->annotate_warned) { - struct annotation *notes; - - notes = symbol__annotation(browser->selection->sym); - if (notes->src && - asprintf(&options[nr_options], "Annotate %s", - browser->selection->sym->name) > 0) - annotate = nr_options++; - } + if (bi == NULL) + goto skip_annotation; + + nr_options += add_annotate_opt(browser, + &actions[nr_options], + &options[nr_options], + bi->from.map, + bi->from.sym); + if (bi->to.sym != bi->from.sym) + nr_options += add_annotate_opt(browser, + &actions[nr_options], + &options[nr_options], + bi->to.map, + bi->to.sym); + } else { + nr_options += add_annotate_opt(browser, + &actions[nr_options], + &options[nr_options], + browser->selection->map, + browser->selection->sym); } - - if (thread != NULL && - asprintf(&options[nr_options], "Zoom %s %s(%d) thread", - (browser->hists->thread_filter ? "out of" : "into"), - (thread->comm_set ? thread__comm_str(thread) : ""), - thread->tid) > 0) - zoom_thread = nr_options++; - - if (dso != NULL && - asprintf(&options[nr_options], "Zoom %s %s DSO", - (browser->hists->dso_filter ? "out of" : "into"), - (dso->kernel ? "the Kernel" : dso->short_name)) > 0) - zoom_dso = nr_options++; - - if (browser->selection != NULL && - browser->selection->map != NULL && - asprintf(&options[nr_options], "Browse map details") > 0) - browse_map = nr_options++; +skip_annotation: + nr_options += add_thread_opt(browser, &actions[nr_options], + &options[nr_options], thread); + nr_options += add_dso_opt(browser, &actions[nr_options], + &options[nr_options], dso); + nr_options += add_map_opt(browser, &actions[nr_options], + &options[nr_options], + browser->selection->map); /* perf script support */ if (browser->he_selection) { - struct symbol *sym; - - if (asprintf(&options[nr_options], "Run scripts for samples of thread [%s]", - thread__comm_str(browser->he_selection->thread)) > 0) - scripts_comm = nr_options++; - - sym = browser->he_selection->ms.sym; - if (sym && sym->namelen && - asprintf(&options[nr_options], "Run scripts for samples of symbol [%s]", - sym->name) > 0) - scripts_symbol = nr_options++; + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + thread, NULL); + nr_options += add_script_opt(browser, + &actions[nr_options], + &options[nr_options], + NULL, browser->selection->sym); } - - if (asprintf(&options[nr_options], "Run scripts for all samples") > 0) - scripts_all = nr_options++; - - if (is_report_browser(hbt) && asprintf(&options[nr_options], - "Switch to another data file in PWD") > 0) - switch_data = nr_options++; + nr_options += add_script_opt(browser, &actions[nr_options], + &options[nr_options], NULL, NULL); + nr_options += add_switch_opt(browser, &actions[nr_options], + &options[nr_options]); add_exit_option: - options[nr_options++] = (char *)"Exit"; -retry_popup_menu: - choice = ui__popup_menu(nr_options, options); - - if (choice == nr_options - 1) - break; - - if (choice == -1) { - free_popup_options(options, nr_options - 1); - continue; - } - - if (choice == annotate || choice == annotate_t || choice == annotate_f) { - struct hist_entry *he; - struct annotation *notes; - int err; -do_annotate: - if (!objdump_path && perf_session_env__lookup_objdump(env)) - continue; - - he = hist_browser__selected_entry(browser); - if (he == NULL) - continue; - - /* - * we stash the branch_info symbol + map into the - * the ms so we don't have to rewrite all the annotation - * code to use branch_info. - * in branch mode, the ms struct is not used - */ - if (choice == annotate_f) { - he->ms.sym = he->branch_info->from.sym; - he->ms.map = he->branch_info->from.map; - } else if (choice == annotate_t) { - he->ms.sym = he->branch_info->to.sym; - he->ms.map = he->branch_info->to.map; - } + nr_options += add_exit_opt(browser, &actions[nr_options], + &options[nr_options]); - notes = symbol__annotation(he->ms.sym); - if (!notes->src) - continue; - - /* - * Don't let this be freed, say, by hists__decay_entry. - */ - he->used = true; - err = hist_entry__tui_annotate(he, evsel, hbt); - he->used = false; - /* - * offer option to annotate the other branch source or target - * (if they exists) when returning from annotate - */ - if ((err == 'q' || err == CTRL('c')) - && annotate_t != -2 && annotate_f != -2) - goto retry_popup_menu; - - ui_browser__update_nr_entries(&browser->b, browser->hists->nr_entries); - if (err) - ui_browser__handle_resize(&browser->b); - - } else if (choice == browse_map) - map__browse(browser->selection->map); - else if (choice == zoom_dso) { -zoom_dso: - if (browser->hists->dso_filter) { - pstack__remove(fstack, &browser->hists->dso_filter); -zoom_out_dso: - ui_helpline__pop(); - browser->hists->dso_filter = NULL; - perf_hpp__set_elide(HISTC_DSO, false); - } else { - if (dso == NULL) - continue; - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s DSO\"", - dso->kernel ? "the Kernel" : dso->short_name); - browser->hists->dso_filter = dso; - perf_hpp__set_elide(HISTC_DSO, true); - pstack__push(fstack, &browser->hists->dso_filter); - } - hists__filter_by_dso(hists); - hist_browser__reset(browser); - } else if (choice == zoom_thread) { -zoom_thread: - if (browser->hists->thread_filter) { - pstack__remove(fstack, &browser->hists->thread_filter); -zoom_out_thread: - ui_helpline__pop(); - browser->hists->thread_filter = NULL; - perf_hpp__set_elide(HISTC_THREAD, false); - } else { - ui_helpline__fpush("To zoom out press <- or -> + \"Zoom out of %s(%d) thread\"", - thread->comm_set ? thread__comm_str(thread) : "", - thread->tid); - browser->hists->thread_filter = thread; - perf_hpp__set_elide(HISTC_THREAD, false); - pstack__push(fstack, &browser->hists->thread_filter); - } - hists__filter_by_thread(hists); - hist_browser__reset(browser); - } - /* perf scripts support */ - else if (choice == scripts_all || choice == scripts_comm || - choice == scripts_symbol) { -do_scripts: - memset(script_opt, 0, 64); + do { + struct popup_action *act; - if (choice == scripts_comm) - sprintf(script_opt, " -c %s ", thread__comm_str(browser->he_selection->thread)); + choice = ui__popup_menu(nr_options, options); + if (choice == -1 || choice >= nr_options) + break; - if (choice == scripts_symbol) - sprintf(script_opt, " -S %s ", browser->he_selection->ms.sym->name); + act = &actions[choice]; + key = act->fn(browser, act); + } while (key == 1); - script_browse(script_opt); - } - /* Switch to another data file */ - else if (choice == switch_data) { -do_data_switch: - if (!switch_data_file()) { - key = K_SWITCH_INPUT_DATA; - break; - } else - ui__warning("Won't switch the data files due to\n" - "no valid data file get selected!\n"); - } + if (key == K_SWITCH_INPUT_DATA) + break; } out_free_stack: - pstack__delete(fstack); + pstack__delete(browser->pstack); out: hist_browser__delete(browser); - free_popup_options(options, nr_options - 1); + free_popup_options(options, MAX_OPTIONS); return key; } diff --git a/tools/perf/ui/gtk/Build b/tools/perf/ui/gtk/Build new file mode 100644 index 000000000000..ec22e899a224 --- /dev/null +++ b/tools/perf/ui/gtk/Build @@ -0,0 +1,9 @@ +CFLAGS_gtk += -fPIC $(GTK_CFLAGS) + +gtk-y += browser.o +gtk-y += hists.o +gtk-y += setup.o +gtk-y += util.o +gtk-y += helpline.o +gtk-y += progress.o +gtk-y += annotate.o diff --git a/tools/perf/ui/tui/Build b/tools/perf/ui/tui/Build new file mode 100644 index 000000000000..9e4c6ca41a9f --- /dev/null +++ b/tools/perf/ui/tui/Build @@ -0,0 +1,4 @@ +libperf-y += setup.o +libperf-y += util.o +libperf-y += helpline.o +libperf-y += progress.o diff --git a/tools/perf/ui/tui/setup.c b/tools/perf/ui/tui/setup.c index b77e1d771363..60d1f29b4b50 100644 --- a/tools/perf/ui/tui/setup.c +++ b/tools/perf/ui/tui/setup.c @@ -129,7 +129,7 @@ int ui__init(void) err = SLsmg_init_smg(); if (err < 0) goto out; - err = SLang_init_tty(0, 0, 0); + err = SLang_init_tty(-1, 0, 0); if (err < 0) goto out; diff --git a/tools/perf/util/Build b/tools/perf/util/Build new file mode 100644 index 000000000000..d2d318c59b37 --- /dev/null +++ b/tools/perf/util/Build @@ -0,0 +1,148 @@ +libperf-y += abspath.o +libperf-y += alias.o +libperf-y += annotate.o +libperf-y += build-id.o +libperf-y += config.o +libperf-y += ctype.o +libperf-y += db-export.o +libperf-y += environment.o +libperf-y += event.o +libperf-y += evlist.o +libperf-y += evsel.o +libperf-y += exec_cmd.o +libperf-y += find_next_bit.o +libperf-y += help.o +libperf-y += kallsyms.o +libperf-y += levenshtein.o +libperf-y += parse-options.o +libperf-y += parse-events.o +libperf-y += path.o +libperf-y += rbtree.o +libperf-y += bitmap.o +libperf-y += hweight.o +libperf-y += run-command.o +libperf-y += quote.o +libperf-y += strbuf.o +libperf-y += string.o +libperf-y += strlist.o +libperf-y += strfilter.o +libperf-y += top.o +libperf-y += usage.o +libperf-y += wrapper.o +libperf-y += sigchain.o +libperf-y += dso.o +libperf-y += symbol.o +libperf-y += color.o +libperf-y += pager.o +libperf-y += header.o +libperf-y += callchain.o +libperf-y += values.o +libperf-y += debug.o +libperf-y += machine.o +libperf-y += map.o +libperf-y += pstack.o +libperf-y += session.o +libperf-y += ordered-events.o +libperf-y += comm.o +libperf-y += thread.o +libperf-y += thread_map.o +libperf-y += trace-event-parse.o +libperf-y += parse-events-flex.o +libperf-y += parse-events-bison.o +libperf-y += pmu.o +libperf-y += pmu-flex.o +libperf-y += pmu-bison.o +libperf-y += trace-event-read.o +libperf-y += trace-event-info.o +libperf-y += trace-event-scripting.o +libperf-y += trace-event.o +libperf-y += svghelper.o +libperf-y += sort.o +libperf-y += hist.o +libperf-y += util.o +libperf-y += xyarray.o +libperf-y += cpumap.o +libperf-y += cgroup.o +libperf-y += target.o +libperf-y += rblist.o +libperf-y += intlist.o +libperf-y += vdso.o +libperf-y += stat.o +libperf-y += stat-shadow.o +libperf-y += record.o +libperf-y += srcline.o +libperf-y += data.o +libperf-$(CONFIG_X86) += tsc.o +libperf-y += cloexec.o +libperf-y += thread-stack.o +libperf-$(CONFIG_AUXTRACE) += auxtrace.o +libperf-y += parse-branch-options.o + +libperf-$(CONFIG_LIBELF) += symbol-elf.o +libperf-$(CONFIG_LIBELF) += probe-event.o + +ifndef CONFIG_LIBELF +libperf-y += symbol-minimal.o +endif + +libperf-$(CONFIG_DWARF) += probe-finder.o +libperf-$(CONFIG_DWARF) += dwarf-aux.o + +libperf-$(CONFIG_LIBDW_DWARF_UNWIND) += unwind-libdw.o +libperf-$(CONFIG_LIBUNWIND) += unwind-libunwind.o + +libperf-$(CONFIG_LIBBABELTRACE) += data-convert-bt.o + +libperf-y += scripting-engines/ + +libperf-$(CONFIG_PERF_REGS) += perf_regs.o +libperf-$(CONFIG_ZLIB) += zlib.o +libperf-$(CONFIG_LZMA) += lzma.o + +CFLAGS_config.o += -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_exec_cmd.o += -DPERF_EXEC_PATH="BUILD_STR($(perfexecdir_SQ))" -DPREFIX="BUILD_STR($(prefix_SQ))" + +$(OUTPUT)util/parse-events-flex.c: util/parse-events.l $(OUTPUT)util/parse-events-bison.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/parse-events-flex.h $(PARSER_DEBUG_FLEX) util/parse-events.l + +$(OUTPUT)util/parse-events-bison.c: util/parse-events.y + $(call rule_mkdir) + $(Q)$(call echo-cmd,bison)$(BISON) -v util/parse-events.y -d $(PARSER_DEBUG_BISON) -o $@ -p parse_events_ + +$(OUTPUT)util/pmu-flex.c: util/pmu.l $(OUTPUT)util/pmu-bison.c + $(call rule_mkdir) + $(Q)$(call echo-cmd,flex)$(FLEX) -o $@ --header-file=$(OUTPUT)util/pmu-flex.h util/pmu.l + +$(OUTPUT)util/pmu-bison.c: util/pmu.y + $(call rule_mkdir) + $(Q)$(call echo-cmd,bison)$(BISON) -v util/pmu.y -d -o $@ -p perf_pmu_ + +CFLAGS_parse-events-flex.o += -w +CFLAGS_pmu-flex.o += -w +CFLAGS_parse-events-bison.o += -DYYENABLE_NLS=0 -w +CFLAGS_pmu-bison.o += -DYYENABLE_NLS=0 -DYYLTYPE_IS_TRIVIAL=0 -w + +$(OUTPUT)util/parse-events.o: $(OUTPUT)util/parse-events-flex.c $(OUTPUT)util/parse-events-bison.c +$(OUTPUT)util/pmu.o: $(OUTPUT)util/pmu-flex.c $(OUTPUT)util/pmu-bison.c + +CFLAGS_find_next_bit.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_rbtree.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_hweight.o += -Wno-unused-parameter -DETC_PERFCONFIG="BUILD_STR($(ETC_PERFCONFIG_SQ))" +CFLAGS_parse-events.o += -Wno-redundant-decls + +$(OUTPUT)util/kallsyms.o: ../lib/symbol/kallsyms.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/find_next_bit.o: ../lib/util/find_next_bit.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/rbtree.o: ../lib/rbtree.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) + +$(OUTPUT)util/hweight.o: ../lib/hweight.c FORCE + $(call rule_mkdir) + $(call if_changed_dep,cc_o_c) diff --git a/tools/perf/util/annotate.c b/tools/perf/util/annotate.c index 9d9db3b296dd..03b7bc70eb66 100644 --- a/tools/perf/util/annotate.c +++ b/tools/perf/util/annotate.c @@ -506,6 +506,17 @@ static int __symbol__inc_addr_samples(struct symbol *sym, struct map *map, return 0; } +static struct annotation *symbol__get_annotation(struct symbol *sym) +{ + struct annotation *notes = symbol__annotation(sym); + + if (notes->src == NULL) { + if (symbol__alloc_hist(sym) < 0) + return NULL; + } + return notes; +} + static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, int evidx, u64 addr) { @@ -513,13 +524,9 @@ static int symbol__inc_addr_samples(struct symbol *sym, struct map *map, if (sym == NULL) return 0; - - notes = symbol__annotation(sym); - if (notes->src == NULL) { - if (symbol__alloc_hist(sym) < 0) - return -ENOMEM; - } - + notes = symbol__get_annotation(sym); + if (notes == NULL) + return -ENOMEM; return __symbol__inc_addr_samples(sym, map, notes, evidx, addr); } @@ -647,14 +654,15 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa } double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path) + s64 end, const char **path, u64 *nr_samples) { struct source_line *src_line = notes->src->lines; double percent = 0.0; + *nr_samples = 0; if (src_line) { size_t sizeof_src_line = sizeof(*src_line) + - sizeof(src_line->p) * (src_line->nr_pcnt - 1); + sizeof(src_line->samples) * (src_line->nr_pcnt - 1); while (offset < end) { src_line = (void *)notes->src->lines + @@ -663,7 +671,8 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, if (*path == NULL) *path = src_line->path; - percent += src_line->p[evidx].percent; + percent += src_line->samples[evidx].percent; + *nr_samples += src_line->samples[evidx].nr; offset++; } } else { @@ -673,8 +682,10 @@ double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, while (offset < end) hits += h->addr[offset++]; - if (h->sum) + if (h->sum) { + *nr_samples = hits; percent = 100.0 * hits / h->sum; + } } return percent; @@ -689,8 +700,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (dl->offset != -1) { const char *path = NULL; + u64 nr_samples; double percent, max_percent = 0.0; double *ppercents = &percent; + u64 *psamples = &nr_samples; int i, nr_percent = 1; const char *color; struct annotation *notes = symbol__annotation(sym); @@ -703,8 +716,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (perf_evsel__is_group_event(evsel)) { nr_percent = evsel->nr_members; ppercents = calloc(nr_percent, sizeof(double)); - if (ppercents == NULL) + psamples = calloc(nr_percent, sizeof(u64)); + if (ppercents == NULL || psamples == NULL) { return -1; + } } for (i = 0; i < nr_percent; i++) { @@ -712,9 +727,10 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st notes->src->lines ? i : evsel->idx + i, offset, next ? next->offset : (s64) len, - &path); + &path, &nr_samples); ppercents[i] = percent; + psamples[i] = nr_samples; if (percent > max_percent) max_percent = percent; } @@ -752,8 +768,14 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st for (i = 0; i < nr_percent; i++) { percent = ppercents[i]; + nr_samples = psamples[i]; color = get_percent_color(percent); - color_fprintf(stdout, color, " %7.2f", percent); + + if (symbol_conf.show_total_period) + color_fprintf(stdout, color, " %7" PRIu64, + nr_samples); + else + color_fprintf(stdout, color, " %7.2f", percent); } printf(" : "); @@ -763,6 +785,9 @@ static int disasm_line__print(struct disasm_line *dl, struct symbol *sym, u64 st if (ppercents != &percent) free(ppercents); + if (psamples != &nr_samples) + free(psamples); + } else if (max_lines && printed >= max_lines) return 1; else { @@ -1010,6 +1035,32 @@ fallback: } filename = symfs_filename; } + } else if (dso__needs_decompress(dso)) { + char tmp[PATH_MAX]; + struct kmod_path m; + int fd; + bool ret; + + if (kmod_path__parse_ext(&m, symfs_filename)) + goto out_free_filename; + + snprintf(tmp, PATH_MAX, "/tmp/perf-kmod-XXXXXX"); + + fd = mkstemp(tmp); + if (fd < 0) { + free(m.ext); + goto out_free_filename; + } + + ret = decompress_to_file(m.ext, symfs_filename, fd); + + free(m.ext); + close(fd); + + if (!ret) + goto out_free_filename; + + strcpy(symfs_filename, tmp); } snprintf(command, sizeof(command), @@ -1029,7 +1080,7 @@ fallback: file = popen(command, "r"); if (!file) - goto out_free_filename; + goto out_remove_tmp; while (!feof(file)) if (symbol__parse_objdump_line(sym, map, file, privsize, @@ -1044,6 +1095,10 @@ fallback: delete_last_nop(sym); pclose(file); + +out_remove_tmp: + if (dso__needs_decompress(dso)) + unlink(symfs_filename); out_free_filename: if (delete_extract) kcore_extract__delete(&kce); @@ -1066,7 +1121,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin ret = strcmp(iter->path, src_line->path); if (ret == 0) { for (i = 0; i < src_line->nr_pcnt; i++) - iter->p[i].percent_sum += src_line->p[i].percent; + iter->samples[i].percent_sum += src_line->samples[i].percent; return; } @@ -1077,7 +1132,7 @@ static void insert_source_line(struct rb_root *root, struct source_line *src_lin } for (i = 0; i < src_line->nr_pcnt; i++) - src_line->p[i].percent_sum = src_line->p[i].percent; + src_line->samples[i].percent_sum = src_line->samples[i].percent; rb_link_node(&src_line->node, parent, p); rb_insert_color(&src_line->node, root); @@ -1088,9 +1143,9 @@ static int cmp_source_line(struct source_line *a, struct source_line *b) int i; for (i = 0; i < a->nr_pcnt; i++) { - if (a->p[i].percent_sum == b->p[i].percent_sum) + if (a->samples[i].percent_sum == b->samples[i].percent_sum) continue; - return a->p[i].percent_sum > b->p[i].percent_sum; + return a->samples[i].percent_sum > b->samples[i].percent_sum; } return 0; @@ -1142,7 +1197,7 @@ static void symbol__free_source_line(struct symbol *sym, int len) int i; sizeof_src_line = sizeof(*src_line) + - (sizeof(src_line->p) * (src_line->nr_pcnt - 1)); + (sizeof(src_line->samples) * (src_line->nr_pcnt - 1)); for (i = 0; i < len; i++) { free_srcline(src_line->path); @@ -1174,7 +1229,7 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, h_sum += h->sum; } nr_pcnt = evsel->nr_members; - sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->p); + sizeof_src_line += (nr_pcnt - 1) * sizeof(src_line->samples); } if (!h_sum) @@ -1194,10 +1249,10 @@ static int symbol__get_source_line(struct symbol *sym, struct map *map, for (k = 0; k < nr_pcnt; k++) { h = annotation__histogram(notes, evidx + k); - src_line->p[k].percent = 100.0 * h->addr[i] / h->sum; + src_line->samples[k].percent = 100.0 * h->addr[i] / h->sum; - if (src_line->p[k].percent > percent_max) - percent_max = src_line->p[k].percent; + if (src_line->samples[k].percent > percent_max) + percent_max = src_line->samples[k].percent; } if (percent_max <= 0.5) @@ -1237,7 +1292,7 @@ static void print_summary(struct rb_root *root, const char *filename) src_line = rb_entry(node, struct source_line, node); for (i = 0; i < src_line->nr_pcnt; i++) { - percent = src_line->p[i].percent_sum; + percent = src_line->samples[i].percent_sum; color = get_percent_color(percent); color_fprintf(stdout, color, " %7.2f", percent); diff --git a/tools/perf/util/annotate.h b/tools/perf/util/annotate.h index cadbdc90a5cb..7e78e6c27078 100644 --- a/tools/perf/util/annotate.h +++ b/tools/perf/util/annotate.h @@ -72,23 +72,24 @@ struct disasm_line *disasm__get_next_ip_line(struct list_head *head, struct disa int disasm_line__scnprintf(struct disasm_line *dl, char *bf, size_t size, bool raw); size_t disasm__fprintf(struct list_head *head, FILE *fp); double disasm__calc_percent(struct annotation *notes, int evidx, s64 offset, - s64 end, const char **path); + s64 end, const char **path, u64 *nr_samples); struct sym_hist { u64 sum; u64 addr[0]; }; -struct source_line_percent { +struct source_line_samples { double percent; double percent_sum; + double nr; }; struct source_line { struct rb_node node; char *path; int nr_pcnt; - struct source_line_percent p[1]; + struct source_line_samples samples[1]; }; /** struct annotated_source - symbols with hits have this attached as in sannotation diff --git a/tools/perf/util/auxtrace.c b/tools/perf/util/auxtrace.c new file mode 100644 index 000000000000..83d9dd96fe08 --- /dev/null +++ b/tools/perf/util/auxtrace.c @@ -0,0 +1,1359 @@ +/* + * auxtrace.c: AUX area trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#include <sys/types.h> +#include <sys/mman.h> +#include <stdbool.h> + +#include <linux/kernel.h> +#include <linux/perf_event.h> +#include <linux/types.h> +#include <linux/bitops.h> +#include <linux/log2.h> +#include <linux/string.h> + +#include <sys/param.h> +#include <stdlib.h> +#include <stdio.h> +#include <string.h> +#include <limits.h> +#include <errno.h> +#include <linux/list.h> + +#include "../perf.h" +#include "util.h" +#include "evlist.h" +#include "cpumap.h" +#include "thread_map.h" +#include "asm/bug.h" +#include "auxtrace.h" + +#include <linux/hash.h> + +#include "event.h" +#include "session.h" +#include "debug.h" +#include "parse-options.h" + +int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, + struct auxtrace_mmap_params *mp, + void *userpg, int fd) +{ + struct perf_event_mmap_page *pc = userpg; + + WARN_ONCE(mm->base, "Uninitialized auxtrace_mmap\n"); + + mm->userpg = userpg; + mm->mask = mp->mask; + mm->len = mp->len; + mm->prev = 0; + mm->idx = mp->idx; + mm->tid = mp->tid; + mm->cpu = mp->cpu; + + if (!mp->len) { + mm->base = NULL; + return 0; + } + +#if BITS_PER_LONG != 64 && !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + pr_err("Cannot use AUX area tracing mmaps\n"); + return -1; +#endif + + pc->aux_offset = mp->offset; + pc->aux_size = mp->len; + + mm->base = mmap(NULL, mp->len, mp->prot, MAP_SHARED, fd, mp->offset); + if (mm->base == MAP_FAILED) { + pr_debug2("failed to mmap AUX area\n"); + mm->base = NULL; + return -1; + } + + return 0; +} + +void auxtrace_mmap__munmap(struct auxtrace_mmap *mm) +{ + if (mm->base) { + munmap(mm->base, mm->len); + mm->base = NULL; + } +} + +void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, + off_t auxtrace_offset, + unsigned int auxtrace_pages, + bool auxtrace_overwrite) +{ + if (auxtrace_pages) { + mp->offset = auxtrace_offset; + mp->len = auxtrace_pages * (size_t)page_size; + mp->mask = is_power_of_2(mp->len) ? mp->len - 1 : 0; + mp->prot = PROT_READ | (auxtrace_overwrite ? 0 : PROT_WRITE); + pr_debug2("AUX area mmap length %zu\n", mp->len); + } else { + mp->len = 0; + } +} + +void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, + struct perf_evlist *evlist, int idx, + bool per_cpu) +{ + mp->idx = idx; + + if (per_cpu) { + mp->cpu = evlist->cpus->map[idx]; + if (evlist->threads) + mp->tid = thread_map__pid(evlist->threads, 0); + else + mp->tid = -1; + } else { + mp->cpu = -1; + mp->tid = thread_map__pid(evlist->threads, idx); + } +} + +#define AUXTRACE_INIT_NR_QUEUES 32 + +static struct auxtrace_queue *auxtrace_alloc_queue_array(unsigned int nr_queues) +{ + struct auxtrace_queue *queue_array; + unsigned int max_nr_queues, i; + + max_nr_queues = UINT_MAX / sizeof(struct auxtrace_queue); + if (nr_queues > max_nr_queues) + return NULL; + + queue_array = calloc(nr_queues, sizeof(struct auxtrace_queue)); + if (!queue_array) + return NULL; + + for (i = 0; i < nr_queues; i++) { + INIT_LIST_HEAD(&queue_array[i].head); + queue_array[i].priv = NULL; + } + + return queue_array; +} + +int auxtrace_queues__init(struct auxtrace_queues *queues) +{ + queues->nr_queues = AUXTRACE_INIT_NR_QUEUES; + queues->queue_array = auxtrace_alloc_queue_array(queues->nr_queues); + if (!queues->queue_array) + return -ENOMEM; + return 0; +} + +static int auxtrace_queues__grow(struct auxtrace_queues *queues, + unsigned int new_nr_queues) +{ + unsigned int nr_queues = queues->nr_queues; + struct auxtrace_queue *queue_array; + unsigned int i; + + if (!nr_queues) + nr_queues = AUXTRACE_INIT_NR_QUEUES; + + while (nr_queues && nr_queues < new_nr_queues) + nr_queues <<= 1; + + if (nr_queues < queues->nr_queues || nr_queues < new_nr_queues) + return -EINVAL; + + queue_array = auxtrace_alloc_queue_array(nr_queues); + if (!queue_array) + return -ENOMEM; + + for (i = 0; i < queues->nr_queues; i++) { + list_splice_tail(&queues->queue_array[i].head, + &queue_array[i].head); + queue_array[i].priv = queues->queue_array[i].priv; + } + + queues->nr_queues = nr_queues; + queues->queue_array = queue_array; + + return 0; +} + +static void *auxtrace_copy_data(u64 size, struct perf_session *session) +{ + int fd = perf_data_file__fd(session->file); + void *p; + ssize_t ret; + + if (size > SSIZE_MAX) + return NULL; + + p = malloc(size); + if (!p) + return NULL; + + ret = readn(fd, p, size); + if (ret != (ssize_t)size) { + free(p); + return NULL; + } + + return p; +} + +static int auxtrace_queues__add_buffer(struct auxtrace_queues *queues, + unsigned int idx, + struct auxtrace_buffer *buffer) +{ + struct auxtrace_queue *queue; + int err; + + if (idx >= queues->nr_queues) { + err = auxtrace_queues__grow(queues, idx + 1); + if (err) + return err; + } + + queue = &queues->queue_array[idx]; + + if (!queue->set) { + queue->set = true; + queue->tid = buffer->tid; + queue->cpu = buffer->cpu; + } else if (buffer->cpu != queue->cpu || buffer->tid != queue->tid) { + pr_err("auxtrace queue conflict: cpu %d, tid %d vs cpu %d, tid %d\n", + queue->cpu, queue->tid, buffer->cpu, buffer->tid); + return -EINVAL; + } + + buffer->buffer_nr = queues->next_buffer_nr++; + + list_add_tail(&buffer->list, &queue->head); + + queues->new_data = true; + queues->populated = true; + + return 0; +} + +/* Limit buffers to 32MiB on 32-bit */ +#define BUFFER_LIMIT_FOR_32_BIT (32 * 1024 * 1024) + +static int auxtrace_queues__split_buffer(struct auxtrace_queues *queues, + unsigned int idx, + struct auxtrace_buffer *buffer) +{ + u64 sz = buffer->size; + bool consecutive = false; + struct auxtrace_buffer *b; + int err; + + while (sz > BUFFER_LIMIT_FOR_32_BIT) { + b = memdup(buffer, sizeof(struct auxtrace_buffer)); + if (!b) + return -ENOMEM; + b->size = BUFFER_LIMIT_FOR_32_BIT; + b->consecutive = consecutive; + err = auxtrace_queues__add_buffer(queues, idx, b); + if (err) { + auxtrace_buffer__free(b); + return err; + } + buffer->data_offset += BUFFER_LIMIT_FOR_32_BIT; + sz -= BUFFER_LIMIT_FOR_32_BIT; + consecutive = true; + } + + buffer->size = sz; + buffer->consecutive = consecutive; + + return 0; +} + +static int auxtrace_queues__add_event_buffer(struct auxtrace_queues *queues, + struct perf_session *session, + unsigned int idx, + struct auxtrace_buffer *buffer) +{ + if (session->one_mmap) { + buffer->data = buffer->data_offset - session->one_mmap_offset + + session->one_mmap_addr; + } else if (perf_data_file__is_pipe(session->file)) { + buffer->data = auxtrace_copy_data(buffer->size, session); + if (!buffer->data) + return -ENOMEM; + buffer->data_needs_freeing = true; + } else if (BITS_PER_LONG == 32 && + buffer->size > BUFFER_LIMIT_FOR_32_BIT) { + int err; + + err = auxtrace_queues__split_buffer(queues, idx, buffer); + if (err) + return err; + } + + return auxtrace_queues__add_buffer(queues, idx, buffer); +} + +int auxtrace_queues__add_event(struct auxtrace_queues *queues, + struct perf_session *session, + union perf_event *event, off_t data_offset, + struct auxtrace_buffer **buffer_ptr) +{ + struct auxtrace_buffer *buffer; + unsigned int idx; + int err; + + buffer = zalloc(sizeof(struct auxtrace_buffer)); + if (!buffer) + return -ENOMEM; + + buffer->pid = -1; + buffer->tid = event->auxtrace.tid; + buffer->cpu = event->auxtrace.cpu; + buffer->data_offset = data_offset; + buffer->offset = event->auxtrace.offset; + buffer->reference = event->auxtrace.reference; + buffer->size = event->auxtrace.size; + idx = event->auxtrace.idx; + + err = auxtrace_queues__add_event_buffer(queues, session, idx, buffer); + if (err) + goto out_err; + + if (buffer_ptr) + *buffer_ptr = buffer; + + return 0; + +out_err: + auxtrace_buffer__free(buffer); + return err; +} + +static int auxtrace_queues__add_indexed_event(struct auxtrace_queues *queues, + struct perf_session *session, + off_t file_offset, size_t sz) +{ + union perf_event *event; + int err; + char buf[PERF_SAMPLE_MAX_SIZE]; + + err = perf_session__peek_event(session, file_offset, buf, + PERF_SAMPLE_MAX_SIZE, &event, NULL); + if (err) + return err; + + if (event->header.type == PERF_RECORD_AUXTRACE) { + if (event->header.size < sizeof(struct auxtrace_event) || + event->header.size != sz) { + err = -EINVAL; + goto out; + } + file_offset += event->header.size; + err = auxtrace_queues__add_event(queues, session, event, + file_offset, NULL); + } +out: + return err; +} + +void auxtrace_queues__free(struct auxtrace_queues *queues) +{ + unsigned int i; + + for (i = 0; i < queues->nr_queues; i++) { + while (!list_empty(&queues->queue_array[i].head)) { + struct auxtrace_buffer *buffer; + + buffer = list_entry(queues->queue_array[i].head.next, + struct auxtrace_buffer, list); + list_del(&buffer->list); + auxtrace_buffer__free(buffer); + } + } + + zfree(&queues->queue_array); + queues->nr_queues = 0; +} + +static void auxtrace_heapify(struct auxtrace_heap_item *heap_array, + unsigned int pos, unsigned int queue_nr, + u64 ordinal) +{ + unsigned int parent; + + while (pos) { + parent = (pos - 1) >> 1; + if (heap_array[parent].ordinal <= ordinal) + break; + heap_array[pos] = heap_array[parent]; + pos = parent; + } + heap_array[pos].queue_nr = queue_nr; + heap_array[pos].ordinal = ordinal; +} + +int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr, + u64 ordinal) +{ + struct auxtrace_heap_item *heap_array; + + if (queue_nr >= heap->heap_sz) { + unsigned int heap_sz = AUXTRACE_INIT_NR_QUEUES; + + while (heap_sz <= queue_nr) + heap_sz <<= 1; + heap_array = realloc(heap->heap_array, + heap_sz * sizeof(struct auxtrace_heap_item)); + if (!heap_array) + return -ENOMEM; + heap->heap_array = heap_array; + heap->heap_sz = heap_sz; + } + + auxtrace_heapify(heap->heap_array, heap->heap_cnt++, queue_nr, ordinal); + + return 0; +} + +void auxtrace_heap__free(struct auxtrace_heap *heap) +{ + zfree(&heap->heap_array); + heap->heap_cnt = 0; + heap->heap_sz = 0; +} + +void auxtrace_heap__pop(struct auxtrace_heap *heap) +{ + unsigned int pos, last, heap_cnt = heap->heap_cnt; + struct auxtrace_heap_item *heap_array; + + if (!heap_cnt) + return; + + heap->heap_cnt -= 1; + + heap_array = heap->heap_array; + + pos = 0; + while (1) { + unsigned int left, right; + + left = (pos << 1) + 1; + if (left >= heap_cnt) + break; + right = left + 1; + if (right >= heap_cnt) { + heap_array[pos] = heap_array[left]; + return; + } + if (heap_array[left].ordinal < heap_array[right].ordinal) { + heap_array[pos] = heap_array[left]; + pos = left; + } else { + heap_array[pos] = heap_array[right]; + pos = right; + } + } + + last = heap_cnt - 1; + auxtrace_heapify(heap_array, pos, heap_array[last].queue_nr, + heap_array[last].ordinal); +} + +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr) +{ + if (itr) + return itr->info_priv_size(itr); + return 0; +} + +static int auxtrace_not_supported(void) +{ + pr_err("AUX area tracing is not supported on this architecture\n"); + return -EINVAL; +} + +int auxtrace_record__info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size) +{ + if (itr) + return itr->info_fill(itr, session, auxtrace_info, priv_size); + return auxtrace_not_supported(); +} + +void auxtrace_record__free(struct auxtrace_record *itr) +{ + if (itr) + itr->free(itr); +} + +int auxtrace_record__snapshot_start(struct auxtrace_record *itr) +{ + if (itr && itr->snapshot_start) + return itr->snapshot_start(itr); + return 0; +} + +int auxtrace_record__snapshot_finish(struct auxtrace_record *itr) +{ + if (itr && itr->snapshot_finish) + return itr->snapshot_finish(itr); + return 0; +} + +int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, + unsigned char *data, u64 *head, u64 *old) +{ + if (itr && itr->find_snapshot) + return itr->find_snapshot(itr, idx, mm, data, head, old); + return 0; +} + +int auxtrace_record__options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts) +{ + if (itr) + return itr->recording_options(itr, evlist, opts); + return 0; +} + +u64 auxtrace_record__reference(struct auxtrace_record *itr) +{ + if (itr) + return itr->reference(itr); + return 0; +} + +int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, const char *str) +{ + if (!str) + return 0; + + if (itr) + return itr->parse_snapshot_options(itr, opts, str); + + pr_err("No AUX area tracing to snapshot\n"); + return -EINVAL; +} + +struct auxtrace_record *__weak +auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, int *err) +{ + *err = 0; + return NULL; +} + +static int auxtrace_index__alloc(struct list_head *head) +{ + struct auxtrace_index *auxtrace_index; + + auxtrace_index = malloc(sizeof(struct auxtrace_index)); + if (!auxtrace_index) + return -ENOMEM; + + auxtrace_index->nr = 0; + INIT_LIST_HEAD(&auxtrace_index->list); + + list_add_tail(&auxtrace_index->list, head); + + return 0; +} + +void auxtrace_index__free(struct list_head *head) +{ + struct auxtrace_index *auxtrace_index, *n; + + list_for_each_entry_safe(auxtrace_index, n, head, list) { + list_del(&auxtrace_index->list); + free(auxtrace_index); + } +} + +static struct auxtrace_index *auxtrace_index__last(struct list_head *head) +{ + struct auxtrace_index *auxtrace_index; + int err; + + if (list_empty(head)) { + err = auxtrace_index__alloc(head); + if (err) + return NULL; + } + + auxtrace_index = list_entry(head->prev, struct auxtrace_index, list); + + if (auxtrace_index->nr >= PERF_AUXTRACE_INDEX_ENTRY_COUNT) { + err = auxtrace_index__alloc(head); + if (err) + return NULL; + auxtrace_index = list_entry(head->prev, struct auxtrace_index, + list); + } + + return auxtrace_index; +} + +int auxtrace_index__auxtrace_event(struct list_head *head, + union perf_event *event, off_t file_offset) +{ + struct auxtrace_index *auxtrace_index; + size_t nr; + + auxtrace_index = auxtrace_index__last(head); + if (!auxtrace_index) + return -ENOMEM; + + nr = auxtrace_index->nr; + auxtrace_index->entries[nr].file_offset = file_offset; + auxtrace_index->entries[nr].sz = event->header.size; + auxtrace_index->nr += 1; + + return 0; +} + +static int auxtrace_index__do_write(int fd, + struct auxtrace_index *auxtrace_index) +{ + struct auxtrace_index_entry ent; + size_t i; + + for (i = 0; i < auxtrace_index->nr; i++) { + ent.file_offset = auxtrace_index->entries[i].file_offset; + ent.sz = auxtrace_index->entries[i].sz; + if (writen(fd, &ent, sizeof(ent)) != sizeof(ent)) + return -errno; + } + return 0; +} + +int auxtrace_index__write(int fd, struct list_head *head) +{ + struct auxtrace_index *auxtrace_index; + u64 total = 0; + int err; + + list_for_each_entry(auxtrace_index, head, list) + total += auxtrace_index->nr; + + if (writen(fd, &total, sizeof(total)) != sizeof(total)) + return -errno; + + list_for_each_entry(auxtrace_index, head, list) { + err = auxtrace_index__do_write(fd, auxtrace_index); + if (err) + return err; + } + + return 0; +} + +static int auxtrace_index__process_entry(int fd, struct list_head *head, + bool needs_swap) +{ + struct auxtrace_index *auxtrace_index; + struct auxtrace_index_entry ent; + size_t nr; + + if (readn(fd, &ent, sizeof(ent)) != sizeof(ent)) + return -1; + + auxtrace_index = auxtrace_index__last(head); + if (!auxtrace_index) + return -1; + + nr = auxtrace_index->nr; + if (needs_swap) { + auxtrace_index->entries[nr].file_offset = + bswap_64(ent.file_offset); + auxtrace_index->entries[nr].sz = bswap_64(ent.sz); + } else { + auxtrace_index->entries[nr].file_offset = ent.file_offset; + auxtrace_index->entries[nr].sz = ent.sz; + } + + auxtrace_index->nr = nr + 1; + + return 0; +} + +int auxtrace_index__process(int fd, u64 size, struct perf_session *session, + bool needs_swap) +{ + struct list_head *head = &session->auxtrace_index; + u64 nr; + + if (readn(fd, &nr, sizeof(u64)) != sizeof(u64)) + return -1; + + if (needs_swap) + nr = bswap_64(nr); + + if (sizeof(u64) + nr * sizeof(struct auxtrace_index_entry) > size) + return -1; + + while (nr--) { + int err; + + err = auxtrace_index__process_entry(fd, head, needs_swap); + if (err) + return -1; + } + + return 0; +} + +static int auxtrace_queues__process_index_entry(struct auxtrace_queues *queues, + struct perf_session *session, + struct auxtrace_index_entry *ent) +{ + return auxtrace_queues__add_indexed_event(queues, session, + ent->file_offset, ent->sz); +} + +int auxtrace_queues__process_index(struct auxtrace_queues *queues, + struct perf_session *session) +{ + struct auxtrace_index *auxtrace_index; + struct auxtrace_index_entry *ent; + size_t i; + int err; + + list_for_each_entry(auxtrace_index, &session->auxtrace_index, list) { + for (i = 0; i < auxtrace_index->nr; i++) { + ent = &auxtrace_index->entries[i]; + err = auxtrace_queues__process_index_entry(queues, + session, + ent); + if (err) + return err; + } + } + return 0; +} + +struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer) +{ + if (buffer) { + if (list_is_last(&buffer->list, &queue->head)) + return NULL; + return list_entry(buffer->list.next, struct auxtrace_buffer, + list); + } else { + if (list_empty(&queue->head)) + return NULL; + return list_entry(queue->head.next, struct auxtrace_buffer, + list); + } +} + +void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd) +{ + size_t adj = buffer->data_offset & (page_size - 1); + size_t size = buffer->size + adj; + off_t file_offset = buffer->data_offset - adj; + void *addr; + + if (buffer->data) + return buffer->data; + + addr = mmap(NULL, size, PROT_READ, MAP_SHARED, fd, file_offset); + if (addr == MAP_FAILED) + return NULL; + + buffer->mmap_addr = addr; + buffer->mmap_size = size; + + buffer->data = addr + adj; + + return buffer->data; +} + +void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer) +{ + if (!buffer->data || !buffer->mmap_addr) + return; + munmap(buffer->mmap_addr, buffer->mmap_size); + buffer->mmap_addr = NULL; + buffer->mmap_size = 0; + buffer->data = NULL; + buffer->use_data = NULL; +} + +void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer) +{ + auxtrace_buffer__put_data(buffer); + if (buffer->data_needs_freeing) { + buffer->data_needs_freeing = false; + zfree(&buffer->data); + buffer->use_data = NULL; + buffer->size = 0; + } +} + +void auxtrace_buffer__free(struct auxtrace_buffer *buffer) +{ + auxtrace_buffer__drop_data(buffer); + free(buffer); +} + +void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, + int code, int cpu, pid_t pid, pid_t tid, u64 ip, + const char *msg) +{ + size_t size; + + memset(auxtrace_error, 0, sizeof(struct auxtrace_error_event)); + + auxtrace_error->header.type = PERF_RECORD_AUXTRACE_ERROR; + auxtrace_error->type = type; + auxtrace_error->code = code; + auxtrace_error->cpu = cpu; + auxtrace_error->pid = pid; + auxtrace_error->tid = tid; + auxtrace_error->ip = ip; + strlcpy(auxtrace_error->msg, msg, MAX_AUXTRACE_ERROR_MSG); + + size = (void *)auxtrace_error->msg - (void *)auxtrace_error + + strlen(auxtrace_error->msg) + 1; + auxtrace_error->header.size = PERF_ALIGN(size, sizeof(u64)); +} + +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, + struct perf_tool *tool, + struct perf_session *session, + perf_event__handler_t process) +{ + union perf_event *ev; + size_t priv_size; + int err; + + pr_debug2("Synthesizing auxtrace information\n"); + priv_size = auxtrace_record__info_priv_size(itr); + ev = zalloc(sizeof(struct auxtrace_info_event) + priv_size); + if (!ev) + return -ENOMEM; + + ev->auxtrace_info.header.type = PERF_RECORD_AUXTRACE_INFO; + ev->auxtrace_info.header.size = sizeof(struct auxtrace_info_event) + + priv_size; + err = auxtrace_record__info_fill(itr, session, &ev->auxtrace_info, + priv_size); + if (err) + goto out_free; + + err = process(tool, ev, NULL, NULL); +out_free: + free(ev); + return err; +} + +static bool auxtrace__dont_decode(struct perf_session *session) +{ + return !session->itrace_synth_opts || + session->itrace_synth_opts->dont_decode; +} + +int perf_event__process_auxtrace_info(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session __maybe_unused) +{ + enum auxtrace_type type = event->auxtrace_info.type; + + if (dump_trace) + fprintf(stdout, " type: %u\n", type); + + switch (type) { + case PERF_AUXTRACE_UNKNOWN: + default: + return -EINVAL; + } +} + +s64 perf_event__process_auxtrace(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session) +{ + s64 err; + + if (dump_trace) + fprintf(stdout, " size: %#"PRIx64" offset: %#"PRIx64" ref: %#"PRIx64" idx: %u tid: %d cpu: %d\n", + event->auxtrace.size, event->auxtrace.offset, + event->auxtrace.reference, event->auxtrace.idx, + event->auxtrace.tid, event->auxtrace.cpu); + + if (auxtrace__dont_decode(session)) + return event->auxtrace.size; + + if (!session->auxtrace || event->header.type != PERF_RECORD_AUXTRACE) + return -EINVAL; + + err = session->auxtrace->process_auxtrace_event(session, event, tool); + if (err < 0) + return err; + + return event->auxtrace.size; +} + +#define PERF_ITRACE_DEFAULT_PERIOD_TYPE PERF_ITRACE_PERIOD_NANOSECS +#define PERF_ITRACE_DEFAULT_PERIOD 100000 +#define PERF_ITRACE_DEFAULT_CALLCHAIN_SZ 16 +#define PERF_ITRACE_MAX_CALLCHAIN_SZ 1024 + +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts) +{ + synth_opts->instructions = true; + synth_opts->branches = true; + synth_opts->transactions = true; + synth_opts->errors = true; + synth_opts->period_type = PERF_ITRACE_DEFAULT_PERIOD_TYPE; + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + synth_opts->callchain_sz = PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; +} + +/* + * Please check tools/perf/Documentation/perf-script.txt for information + * about the options parsed here, which is introduced after this cset, + * when support in 'perf script' for these options is introduced. + */ +int itrace_parse_synth_opts(const struct option *opt, const char *str, + int unset) +{ + struct itrace_synth_opts *synth_opts = opt->value; + const char *p; + char *endptr; + + synth_opts->set = true; + + if (unset) { + synth_opts->dont_decode = true; + return 0; + } + + if (!str) { + itrace_synth_opts__set_default(synth_opts); + return 0; + } + + for (p = str; *p;) { + switch (*p++) { + case 'i': + synth_opts->instructions = true; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + synth_opts->period = strtoull(p, &endptr, 10); + p = endptr; + while (*p == ' ' || *p == ',') + p += 1; + switch (*p++) { + case 'i': + synth_opts->period_type = + PERF_ITRACE_PERIOD_INSTRUCTIONS; + break; + case 't': + synth_opts->period_type = + PERF_ITRACE_PERIOD_TICKS; + break; + case 'm': + synth_opts->period *= 1000; + /* Fall through */ + case 'u': + synth_opts->period *= 1000; + /* Fall through */ + case 'n': + if (*p++ != 's') + goto out_err; + synth_opts->period_type = + PERF_ITRACE_PERIOD_NANOSECS; + break; + case '\0': + goto out; + default: + goto out_err; + } + } + break; + case 'b': + synth_opts->branches = true; + break; + case 'x': + synth_opts->transactions = true; + break; + case 'e': + synth_opts->errors = true; + break; + case 'd': + synth_opts->log = true; + break; + case 'c': + synth_opts->branches = true; + synth_opts->calls = true; + break; + case 'r': + synth_opts->branches = true; + synth_opts->returns = true; + break; + case 'g': + synth_opts->callchain = true; + synth_opts->callchain_sz = + PERF_ITRACE_DEFAULT_CALLCHAIN_SZ; + while (*p == ' ' || *p == ',') + p += 1; + if (isdigit(*p)) { + unsigned int val; + + val = strtoul(p, &endptr, 10); + p = endptr; + if (!val || val > PERF_ITRACE_MAX_CALLCHAIN_SZ) + goto out_err; + synth_opts->callchain_sz = val; + } + break; + case ' ': + case ',': + break; + default: + goto out_err; + } + } +out: + if (synth_opts->instructions) { + if (!synth_opts->period_type) + synth_opts->period_type = + PERF_ITRACE_DEFAULT_PERIOD_TYPE; + if (!synth_opts->period) + synth_opts->period = PERF_ITRACE_DEFAULT_PERIOD; + } + + return 0; + +out_err: + pr_err("Bad Instruction Tracing options '%s'\n", str); + return -EINVAL; +} + +static const char * const auxtrace_error_type_name[] = { + [PERF_AUXTRACE_ERROR_ITRACE] = "instruction trace", +}; + +static const char *auxtrace_error_name(int type) +{ + const char *error_type_name = NULL; + + if (type < PERF_AUXTRACE_ERROR_MAX) + error_type_name = auxtrace_error_type_name[type]; + if (!error_type_name) + error_type_name = "unknown AUX"; + return error_type_name; +} + +size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp) +{ + struct auxtrace_error_event *e = &event->auxtrace_error; + int ret; + + ret = fprintf(fp, " %s error type %u", + auxtrace_error_name(e->type), e->type); + ret += fprintf(fp, " cpu %d pid %d tid %d ip %#"PRIx64" code %u: %s\n", + e->cpu, e->pid, e->tid, e->ip, e->code, e->msg); + return ret; +} + +void perf_session__auxtrace_error_inc(struct perf_session *session, + union perf_event *event) +{ + struct auxtrace_error_event *e = &event->auxtrace_error; + + if (e->type < PERF_AUXTRACE_ERROR_MAX) + session->evlist->stats.nr_auxtrace_errors[e->type] += 1; +} + +void events_stats__auxtrace_error_warn(const struct events_stats *stats) +{ + int i; + + for (i = 0; i < PERF_AUXTRACE_ERROR_MAX; i++) { + if (!stats->nr_auxtrace_errors[i]) + continue; + ui__warning("%u %s errors\n", + stats->nr_auxtrace_errors[i], + auxtrace_error_name(i)); + } +} + +int perf_event__process_auxtrace_error(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session) +{ + if (auxtrace__dont_decode(session)) + return 0; + + perf_event__fprintf_auxtrace_error(event, stdout); + return 0; +} + +static int __auxtrace_mmap__read(struct auxtrace_mmap *mm, + struct auxtrace_record *itr, + struct perf_tool *tool, process_auxtrace_t fn, + bool snapshot, size_t snapshot_size) +{ + u64 head, old = mm->prev, offset, ref; + unsigned char *data = mm->base; + size_t size, head_off, old_off, len1, len2, padding; + union perf_event ev; + void *data1, *data2; + + if (snapshot) { + head = auxtrace_mmap__read_snapshot_head(mm); + if (auxtrace_record__find_snapshot(itr, mm->idx, mm, data, + &head, &old)) + return -1; + } else { + head = auxtrace_mmap__read_head(mm); + } + + if (old == head) + return 0; + + pr_debug3("auxtrace idx %d old %#"PRIx64" head %#"PRIx64" diff %#"PRIx64"\n", + mm->idx, old, head, head - old); + + if (mm->mask) { + head_off = head & mm->mask; + old_off = old & mm->mask; + } else { + head_off = head % mm->len; + old_off = old % mm->len; + } + + if (head_off > old_off) + size = head_off - old_off; + else + size = mm->len - (old_off - head_off); + + if (snapshot && size > snapshot_size) + size = snapshot_size; + + ref = auxtrace_record__reference(itr); + + if (head > old || size <= head || mm->mask) { + offset = head - size; + } else { + /* + * When the buffer size is not a power of 2, 'head' wraps at the + * highest multiple of the buffer size, so we have to subtract + * the remainder here. + */ + u64 rem = (0ULL - mm->len) % mm->len; + + offset = head - size - rem; + } + + if (size > head_off) { + len1 = size - head_off; + data1 = &data[mm->len - len1]; + len2 = head_off; + data2 = &data[0]; + } else { + len1 = size; + data1 = &data[head_off - len1]; + len2 = 0; + data2 = NULL; + } + + if (itr->alignment) { + unsigned int unwanted = len1 % itr->alignment; + + len1 -= unwanted; + size -= unwanted; + } + + /* padding must be written by fn() e.g. record__process_auxtrace() */ + padding = size & 7; + if (padding) + padding = 8 - padding; + + memset(&ev, 0, sizeof(ev)); + ev.auxtrace.header.type = PERF_RECORD_AUXTRACE; + ev.auxtrace.header.size = sizeof(ev.auxtrace); + ev.auxtrace.size = size + padding; + ev.auxtrace.offset = offset; + ev.auxtrace.reference = ref; + ev.auxtrace.idx = mm->idx; + ev.auxtrace.tid = mm->tid; + ev.auxtrace.cpu = mm->cpu; + + if (fn(tool, &ev, data1, len1, data2, len2)) + return -1; + + mm->prev = head; + + if (!snapshot) { + auxtrace_mmap__write_tail(mm, head); + if (itr->read_finish) { + int err; + + err = itr->read_finish(itr, mm->idx); + if (err < 0) + return err; + } + } + + return 1; +} + +int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, + struct perf_tool *tool, process_auxtrace_t fn) +{ + return __auxtrace_mmap__read(mm, itr, tool, fn, false, 0); +} + +int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, + struct auxtrace_record *itr, + struct perf_tool *tool, process_auxtrace_t fn, + size_t snapshot_size) +{ + return __auxtrace_mmap__read(mm, itr, tool, fn, true, snapshot_size); +} + +/** + * struct auxtrace_cache - hash table to implement a cache + * @hashtable: the hashtable + * @sz: hashtable size (number of hlists) + * @entry_size: size of an entry + * @limit: limit the number of entries to this maximum, when reached the cache + * is dropped and caching begins again with an empty cache + * @cnt: current number of entries + * @bits: hashtable size (@sz = 2^@bits) + */ +struct auxtrace_cache { + struct hlist_head *hashtable; + size_t sz; + size_t entry_size; + size_t limit; + size_t cnt; + unsigned int bits; +}; + +struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size, + unsigned int limit_percent) +{ + struct auxtrace_cache *c; + struct hlist_head *ht; + size_t sz, i; + + c = zalloc(sizeof(struct auxtrace_cache)); + if (!c) + return NULL; + + sz = 1UL << bits; + + ht = calloc(sz, sizeof(struct hlist_head)); + if (!ht) + goto out_free; + + for (i = 0; i < sz; i++) + INIT_HLIST_HEAD(&ht[i]); + + c->hashtable = ht; + c->sz = sz; + c->entry_size = entry_size; + c->limit = (c->sz * limit_percent) / 100; + c->bits = bits; + + return c; + +out_free: + free(c); + return NULL; +} + +static void auxtrace_cache__drop(struct auxtrace_cache *c) +{ + struct auxtrace_cache_entry *entry; + struct hlist_node *tmp; + size_t i; + + if (!c) + return; + + for (i = 0; i < c->sz; i++) { + hlist_for_each_entry_safe(entry, tmp, &c->hashtable[i], hash) { + hlist_del(&entry->hash); + auxtrace_cache__free_entry(c, entry); + } + } + + c->cnt = 0; +} + +void auxtrace_cache__free(struct auxtrace_cache *c) +{ + if (!c) + return; + + auxtrace_cache__drop(c); + free(c->hashtable); + free(c); +} + +void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c) +{ + return malloc(c->entry_size); +} + +void auxtrace_cache__free_entry(struct auxtrace_cache *c __maybe_unused, + void *entry) +{ + free(entry); +} + +int auxtrace_cache__add(struct auxtrace_cache *c, u32 key, + struct auxtrace_cache_entry *entry) +{ + if (c->limit && ++c->cnt > c->limit) + auxtrace_cache__drop(c); + + entry->key = key; + hlist_add_head(&entry->hash, &c->hashtable[hash_32(key, c->bits)]); + + return 0; +} + +void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key) +{ + struct auxtrace_cache_entry *entry; + struct hlist_head *hlist; + + if (!c) + return NULL; + + hlist = &c->hashtable[hash_32(key, c->bits)]; + hlist_for_each_entry(entry, hlist, hash) { + if (entry->key == key) + return entry; + } + + return NULL; +} diff --git a/tools/perf/util/auxtrace.h b/tools/perf/util/auxtrace.h new file mode 100644 index 000000000000..471aecbc4d68 --- /dev/null +++ b/tools/perf/util/auxtrace.h @@ -0,0 +1,644 @@ +/* + * auxtrace.h: AUX area trace support + * Copyright (c) 2013-2015, Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef __PERF_AUXTRACE_H +#define __PERF_AUXTRACE_H + +#include <sys/types.h> +#include <stdbool.h> +#include <stddef.h> +#include <linux/list.h> +#include <linux/perf_event.h> +#include <linux/types.h> + +#include "../perf.h" +#include "event.h" +#include "session.h" +#include "debug.h" + +union perf_event; +struct perf_session; +struct perf_evlist; +struct perf_tool; +struct option; +struct record_opts; +struct auxtrace_info_event; +struct events_stats; + +enum auxtrace_type { + PERF_AUXTRACE_UNKNOWN, +}; + +enum itrace_period_type { + PERF_ITRACE_PERIOD_INSTRUCTIONS, + PERF_ITRACE_PERIOD_TICKS, + PERF_ITRACE_PERIOD_NANOSECS, +}; + +/** + * struct itrace_synth_opts - AUX area tracing synthesis options. + * @set: indicates whether or not options have been set + * @inject: indicates the event (not just the sample) must be fully synthesized + * because 'perf inject' will write it out + * @instructions: whether to synthesize 'instructions' events + * @branches: whether to synthesize 'branches' events + * @transactions: whether to synthesize events for transactions + * @errors: whether to synthesize decoder error events + * @dont_decode: whether to skip decoding entirely + * @log: write a decoding log + * @calls: limit branch samples to calls (can be combined with @returns) + * @returns: limit branch samples to returns (can be combined with @calls) + * @callchain: add callchain to 'instructions' events + * @callchain_sz: maximum callchain size + * @period: 'instructions' events period + * @period_type: 'instructions' events period type + */ +struct itrace_synth_opts { + bool set; + bool inject; + bool instructions; + bool branches; + bool transactions; + bool errors; + bool dont_decode; + bool log; + bool calls; + bool returns; + bool callchain; + unsigned int callchain_sz; + unsigned long long period; + enum itrace_period_type period_type; +}; + +/** + * struct auxtrace_index_entry - indexes a AUX area tracing event within a + * perf.data file. + * @file_offset: offset within the perf.data file + * @sz: size of the event + */ +struct auxtrace_index_entry { + u64 file_offset; + u64 sz; +}; + +#define PERF_AUXTRACE_INDEX_ENTRY_COUNT 256 + +/** + * struct auxtrace_index - index of AUX area tracing events within a perf.data + * file. + * @list: linking a number of arrays of entries + * @nr: number of entries + * @entries: array of entries + */ +struct auxtrace_index { + struct list_head list; + size_t nr; + struct auxtrace_index_entry entries[PERF_AUXTRACE_INDEX_ENTRY_COUNT]; +}; + +/** + * struct auxtrace - session callbacks to allow AUX area data decoding. + * @process_event: lets the decoder see all session events + * @flush_events: process any remaining data + * @free_events: free resources associated with event processing + * @free: free resources associated with the session + */ +struct auxtrace { + int (*process_event)(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool); + int (*process_auxtrace_event)(struct perf_session *session, + union perf_event *event, + struct perf_tool *tool); + int (*flush_events)(struct perf_session *session, + struct perf_tool *tool); + void (*free_events)(struct perf_session *session); + void (*free)(struct perf_session *session); +}; + +/** + * struct auxtrace_buffer - a buffer containing AUX area tracing data. + * @list: buffers are queued in a list held by struct auxtrace_queue + * @size: size of the buffer in bytes + * @pid: in per-thread mode, the pid this buffer is associated with + * @tid: in per-thread mode, the tid this buffer is associated with + * @cpu: in per-cpu mode, the cpu this buffer is associated with + * @data: actual buffer data (can be null if the data has not been loaded) + * @data_offset: file offset at which the buffer can be read + * @mmap_addr: mmap address at which the buffer can be read + * @mmap_size: size of the mmap at @mmap_addr + * @data_needs_freeing: @data was malloc'd so free it when it is no longer + * needed + * @consecutive: the original data was split up and this buffer is consecutive + * to the previous buffer + * @offset: offset as determined by aux_head / aux_tail members of struct + * perf_event_mmap_page + * @reference: an implementation-specific reference determined when the data is + * recorded + * @buffer_nr: used to number each buffer + * @use_size: implementation actually only uses this number of bytes + * @use_data: implementation actually only uses data starting at this address + */ +struct auxtrace_buffer { + struct list_head list; + size_t size; + pid_t pid; + pid_t tid; + int cpu; + void *data; + off_t data_offset; + void *mmap_addr; + size_t mmap_size; + bool data_needs_freeing; + bool consecutive; + u64 offset; + u64 reference; + u64 buffer_nr; + size_t use_size; + void *use_data; +}; + +/** + * struct auxtrace_queue - a queue of AUX area tracing data buffers. + * @head: head of buffer list + * @tid: in per-thread mode, the tid this queue is associated with + * @cpu: in per-cpu mode, the cpu this queue is associated with + * @set: %true once this queue has been dedicated to a specific thread or cpu + * @priv: implementation-specific data + */ +struct auxtrace_queue { + struct list_head head; + pid_t tid; + int cpu; + bool set; + void *priv; +}; + +/** + * struct auxtrace_queues - an array of AUX area tracing queues. + * @queue_array: array of queues + * @nr_queues: number of queues + * @new_data: set whenever new data is queued + * @populated: queues have been fully populated using the auxtrace_index + * @next_buffer_nr: used to number each buffer + */ +struct auxtrace_queues { + struct auxtrace_queue *queue_array; + unsigned int nr_queues; + bool new_data; + bool populated; + u64 next_buffer_nr; +}; + +/** + * struct auxtrace_heap_item - element of struct auxtrace_heap. + * @queue_nr: queue number + * @ordinal: value used for sorting (lowest ordinal is top of the heap) expected + * to be a timestamp + */ +struct auxtrace_heap_item { + unsigned int queue_nr; + u64 ordinal; +}; + +/** + * struct auxtrace_heap - a heap suitable for sorting AUX area tracing queues. + * @heap_array: the heap + * @heap_cnt: the number of elements in the heap + * @heap_sz: maximum number of elements (grows as needed) + */ +struct auxtrace_heap { + struct auxtrace_heap_item *heap_array; + unsigned int heap_cnt; + unsigned int heap_sz; +}; + +/** + * struct auxtrace_mmap - records an mmap of the auxtrace buffer. + * @base: address of mapped area + * @userpg: pointer to buffer's perf_event_mmap_page + * @mask: %0 if @len is not a power of two, otherwise (@len - %1) + * @len: size of mapped area + * @prev: previous aux_head + * @idx: index of this mmap + * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu + * mmap) otherwise %0 + * @cpu: cpu number for a per-cpu mmap otherwise %-1 + */ +struct auxtrace_mmap { + void *base; + void *userpg; + size_t mask; + size_t len; + u64 prev; + int idx; + pid_t tid; + int cpu; +}; + +/** + * struct auxtrace_mmap_params - parameters to set up struct auxtrace_mmap. + * @mask: %0 if @len is not a power of two, otherwise (@len - %1) + * @offset: file offset of mapped area + * @len: size of mapped area + * @prot: mmap memory protection + * @idx: index of this mmap + * @tid: tid for a per-thread mmap (also set if there is only 1 tid on a per-cpu + * mmap) otherwise %0 + * @cpu: cpu number for a per-cpu mmap otherwise %-1 + */ +struct auxtrace_mmap_params { + size_t mask; + off_t offset; + size_t len; + int prot; + int idx; + pid_t tid; + int cpu; +}; + +/** + * struct auxtrace_record - callbacks for recording AUX area data. + * @recording_options: validate and process recording options + * @info_priv_size: return the size of the private data in auxtrace_info_event + * @info_fill: fill-in the private data in auxtrace_info_event + * @free: free this auxtrace record structure + * @snapshot_start: starting a snapshot + * @snapshot_finish: finishing a snapshot + * @find_snapshot: find data to snapshot within auxtrace mmap + * @parse_snapshot_options: parse snapshot options + * @reference: provide a 64-bit reference number for auxtrace_event + * @read_finish: called after reading from an auxtrace mmap + */ +struct auxtrace_record { + int (*recording_options)(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts); + size_t (*info_priv_size)(struct auxtrace_record *itr); + int (*info_fill)(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size); + void (*free)(struct auxtrace_record *itr); + int (*snapshot_start)(struct auxtrace_record *itr); + int (*snapshot_finish)(struct auxtrace_record *itr); + int (*find_snapshot)(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, unsigned char *data, + u64 *head, u64 *old); + int (*parse_snapshot_options)(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str); + u64 (*reference)(struct auxtrace_record *itr); + int (*read_finish)(struct auxtrace_record *itr, int idx); + unsigned int alignment; +}; + +#ifdef HAVE_AUXTRACE_SUPPORT + +/* + * In snapshot mode the mmapped page is read-only which makes using + * __sync_val_compare_and_swap() problematic. However, snapshot mode expects + * the buffer is not updated while the snapshot is made (e.g. Intel PT disables + * the event) so there is not a race anyway. + */ +static inline u64 auxtrace_mmap__read_snapshot_head(struct auxtrace_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->userpg; + u64 head = ACCESS_ONCE(pc->aux_head); + + /* Ensure all reads are done after we read the head */ + rmb(); + return head; +} + +static inline u64 auxtrace_mmap__read_head(struct auxtrace_mmap *mm) +{ + struct perf_event_mmap_page *pc = mm->userpg; +#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + u64 head = ACCESS_ONCE(pc->aux_head); +#else + u64 head = __sync_val_compare_and_swap(&pc->aux_head, 0, 0); +#endif + + /* Ensure all reads are done after we read the head */ + rmb(); + return head; +} + +static inline void auxtrace_mmap__write_tail(struct auxtrace_mmap *mm, u64 tail) +{ + struct perf_event_mmap_page *pc = mm->userpg; +#if BITS_PER_LONG != 64 && defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + u64 old_tail; +#endif + + /* Ensure all reads are done before we write the tail out */ + mb(); +#if BITS_PER_LONG == 64 || !defined(HAVE_SYNC_COMPARE_AND_SWAP_SUPPORT) + pc->aux_tail = tail; +#else + do { + old_tail = __sync_val_compare_and_swap(&pc->aux_tail, 0, 0); + } while (!__sync_bool_compare_and_swap(&pc->aux_tail, old_tail, tail)); +#endif +} + +int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, + struct auxtrace_mmap_params *mp, + void *userpg, int fd); +void auxtrace_mmap__munmap(struct auxtrace_mmap *mm); +void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, + off_t auxtrace_offset, + unsigned int auxtrace_pages, + bool auxtrace_overwrite); +void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, + struct perf_evlist *evlist, int idx, + bool per_cpu); + +typedef int (*process_auxtrace_t)(struct perf_tool *tool, + union perf_event *event, void *data1, + size_t len1, void *data2, size_t len2); + +int auxtrace_mmap__read(struct auxtrace_mmap *mm, struct auxtrace_record *itr, + struct perf_tool *tool, process_auxtrace_t fn); + +int auxtrace_mmap__read_snapshot(struct auxtrace_mmap *mm, + struct auxtrace_record *itr, + struct perf_tool *tool, process_auxtrace_t fn, + size_t snapshot_size); + +int auxtrace_queues__init(struct auxtrace_queues *queues); +int auxtrace_queues__add_event(struct auxtrace_queues *queues, + struct perf_session *session, + union perf_event *event, off_t data_offset, + struct auxtrace_buffer **buffer_ptr); +void auxtrace_queues__free(struct auxtrace_queues *queues); +int auxtrace_queues__process_index(struct auxtrace_queues *queues, + struct perf_session *session); +struct auxtrace_buffer *auxtrace_buffer__next(struct auxtrace_queue *queue, + struct auxtrace_buffer *buffer); +void *auxtrace_buffer__get_data(struct auxtrace_buffer *buffer, int fd); +void auxtrace_buffer__put_data(struct auxtrace_buffer *buffer); +void auxtrace_buffer__drop_data(struct auxtrace_buffer *buffer); +void auxtrace_buffer__free(struct auxtrace_buffer *buffer); + +int auxtrace_heap__add(struct auxtrace_heap *heap, unsigned int queue_nr, + u64 ordinal); +void auxtrace_heap__pop(struct auxtrace_heap *heap); +void auxtrace_heap__free(struct auxtrace_heap *heap); + +struct auxtrace_cache_entry { + struct hlist_node hash; + u32 key; +}; + +struct auxtrace_cache *auxtrace_cache__new(unsigned int bits, size_t entry_size, + unsigned int limit_percent); +void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache); +void *auxtrace_cache__alloc_entry(struct auxtrace_cache *c); +void auxtrace_cache__free_entry(struct auxtrace_cache *c, void *entry); +int auxtrace_cache__add(struct auxtrace_cache *c, u32 key, + struct auxtrace_cache_entry *entry); +void *auxtrace_cache__lookup(struct auxtrace_cache *c, u32 key); + +struct auxtrace_record *auxtrace_record__init(struct perf_evlist *evlist, + int *err); + +int auxtrace_parse_snapshot_options(struct auxtrace_record *itr, + struct record_opts *opts, + const char *str); +int auxtrace_record__options(struct auxtrace_record *itr, + struct perf_evlist *evlist, + struct record_opts *opts); +size_t auxtrace_record__info_priv_size(struct auxtrace_record *itr); +int auxtrace_record__info_fill(struct auxtrace_record *itr, + struct perf_session *session, + struct auxtrace_info_event *auxtrace_info, + size_t priv_size); +void auxtrace_record__free(struct auxtrace_record *itr); +int auxtrace_record__snapshot_start(struct auxtrace_record *itr); +int auxtrace_record__snapshot_finish(struct auxtrace_record *itr); +int auxtrace_record__find_snapshot(struct auxtrace_record *itr, int idx, + struct auxtrace_mmap *mm, + unsigned char *data, u64 *head, u64 *old); +u64 auxtrace_record__reference(struct auxtrace_record *itr); + +int auxtrace_index__auxtrace_event(struct list_head *head, union perf_event *event, + off_t file_offset); +int auxtrace_index__write(int fd, struct list_head *head); +int auxtrace_index__process(int fd, u64 size, struct perf_session *session, + bool needs_swap); +void auxtrace_index__free(struct list_head *head); + +void auxtrace_synth_error(struct auxtrace_error_event *auxtrace_error, int type, + int code, int cpu, pid_t pid, pid_t tid, u64 ip, + const char *msg); + +int perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr, + struct perf_tool *tool, + struct perf_session *session, + perf_event__handler_t process); +int perf_event__process_auxtrace_info(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); +s64 perf_event__process_auxtrace(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); +int perf_event__process_auxtrace_error(struct perf_tool *tool, + union perf_event *event, + struct perf_session *session); +int itrace_parse_synth_opts(const struct option *opt, const char *str, + int unset); +void itrace_synth_opts__set_default(struct itrace_synth_opts *synth_opts); + +size_t perf_event__fprintf_auxtrace_error(union perf_event *event, FILE *fp); +void perf_session__auxtrace_error_inc(struct perf_session *session, + union perf_event *event); +void events_stats__auxtrace_error_warn(const struct events_stats *stats); + +static inline int auxtrace__process_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool) +{ + if (!session->auxtrace) + return 0; + + return session->auxtrace->process_event(session, event, sample, tool); +} + +static inline int auxtrace__flush_events(struct perf_session *session, + struct perf_tool *tool) +{ + if (!session->auxtrace) + return 0; + + return session->auxtrace->flush_events(session, tool); +} + +static inline void auxtrace__free_events(struct perf_session *session) +{ + if (!session->auxtrace) + return; + + return session->auxtrace->free_events(session); +} + +static inline void auxtrace__free(struct perf_session *session) +{ + if (!session->auxtrace) + return; + + return session->auxtrace->free(session); +} + +#else + +static inline struct auxtrace_record * +auxtrace_record__init(struct perf_evlist *evlist __maybe_unused, + int *err __maybe_unused) +{ + *err = 0; + return NULL; +} + +static inline +void auxtrace_record__free(struct auxtrace_record *itr __maybe_unused) +{ +} + +static inline int +perf_event__synthesize_auxtrace_info(struct auxtrace_record *itr __maybe_unused, + struct perf_tool *tool __maybe_unused, + struct perf_session *session __maybe_unused, + perf_event__handler_t process __maybe_unused) +{ + return -EINVAL; +} + +static inline +int auxtrace_record__options(struct auxtrace_record *itr __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + struct record_opts *opts __maybe_unused) +{ + return 0; +} + +#define perf_event__process_auxtrace_info 0 +#define perf_event__process_auxtrace 0 +#define perf_event__process_auxtrace_error 0 + +static inline +void perf_session__auxtrace_error_inc(struct perf_session *session + __maybe_unused, + union perf_event *event + __maybe_unused) +{ +} + +static inline +void events_stats__auxtrace_error_warn(const struct events_stats *stats + __maybe_unused) +{ +} + +static inline +int itrace_parse_synth_opts(const struct option *opt __maybe_unused, + const char *str __maybe_unused, + int unset __maybe_unused) +{ + pr_err("AUX area tracing not supported\n"); + return -EINVAL; +} + +static inline +int auxtrace_parse_snapshot_options(struct auxtrace_record *itr __maybe_unused, + struct record_opts *opts __maybe_unused, + const char *str) +{ + if (!str) + return 0; + pr_err("AUX area tracing not supported\n"); + return -EINVAL; +} + +static inline +int auxtrace__process_event(struct perf_session *session __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_sample *sample __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static inline +int auxtrace__flush_events(struct perf_session *session __maybe_unused, + struct perf_tool *tool __maybe_unused) +{ + return 0; +} + +static inline +void auxtrace__free_events(struct perf_session *session __maybe_unused) +{ +} + +static inline +void auxtrace_cache__free(struct auxtrace_cache *auxtrace_cache __maybe_unused) +{ +} + +static inline +void auxtrace__free(struct perf_session *session __maybe_unused) +{ +} + +static inline +int auxtrace_index__write(int fd __maybe_unused, + struct list_head *head __maybe_unused) +{ + return -EINVAL; +} + +static inline +int auxtrace_index__process(int fd __maybe_unused, + u64 size __maybe_unused, + struct perf_session *session __maybe_unused, + bool needs_swap __maybe_unused) +{ + return -EINVAL; +} + +static inline +void auxtrace_index__free(struct list_head *head __maybe_unused) +{ +} + +int auxtrace_mmap__mmap(struct auxtrace_mmap *mm, + struct auxtrace_mmap_params *mp, + void *userpg, int fd); +void auxtrace_mmap__munmap(struct auxtrace_mmap *mm); +void auxtrace_mmap_params__init(struct auxtrace_mmap_params *mp, + off_t auxtrace_offset, + unsigned int auxtrace_pages, + bool auxtrace_overwrite); +void auxtrace_mmap_params__set_idx(struct auxtrace_mmap_params *mp, + struct perf_evlist *evlist, int idx, + bool per_cpu); + +#endif + +#endif diff --git a/tools/perf/util/build-id.c b/tools/perf/util/build-id.c index 0c72680a977f..1f6fc2323ef9 100644 --- a/tools/perf/util/build-id.c +++ b/tools/perf/util/build-id.c @@ -43,6 +43,7 @@ int build_id__mark_dso_hit(struct perf_tool *tool __maybe_unused, if (al.map != NULL) al.map->dso->hit = 1; + thread__put(thread); return 0; } @@ -60,9 +61,8 @@ static int perf_event__exit_del_thread(struct perf_tool *tool __maybe_unused, event->fork.ppid, event->fork.ptid); if (thread) { - rb_erase(&thread->rb_node, &machine->threads); - machine->last_match = NULL; - thread__delete(thread); + machine__remove_thread(machine, thread); + thread__put(thread); } return 0; @@ -93,6 +93,35 @@ int build_id__sprintf(const u8 *build_id, int len, char *bf) return raw - build_id; } +/* asnprintf consolidates asprintf and snprintf */ +static int asnprintf(char **strp, size_t size, const char *fmt, ...) +{ + va_list ap; + int ret; + + if (!strp) + return -EINVAL; + + va_start(ap, fmt); + if (*strp) + ret = vsnprintf(*strp, size, fmt, ap); + else + ret = vasprintf(strp, fmt, ap); + va_end(ap); + + return ret; +} + +static char *build_id__filename(const char *sbuild_id, char *bf, size_t size) +{ + char *tmp = bf; + int ret = asnprintf(&bf, size, "%s/.build-id/%.2s/%s", buildid_dir, + sbuild_id, sbuild_id + 2); + if (ret < 0 || (tmp && size < (unsigned int)ret)) + return NULL; + return bf; +} + char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) { char build_id_hex[BUILD_ID_SIZE * 2 + 1]; @@ -101,14 +130,7 @@ char *dso__build_id_filename(const struct dso *dso, char *bf, size_t size) return NULL; build_id__sprintf(dso->build_id, sizeof(dso->build_id), build_id_hex); - if (bf == NULL) { - if (asprintf(&bf, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2) < 0) - return NULL; - } else - snprintf(bf, size, "%s/.build-id/%.2s/%s", buildid_dir, - build_id_hex, build_id_hex + 2); - return bf; + return build_id__filename(build_id_hex, bf, size); } #define dsos__for_each_with_build_id(pos, head) \ @@ -140,15 +162,20 @@ static int write_buildid(const char *name, size_t name_len, u8 *build_id, return write_padded(fd, name, name_len + 1, len); } -static int __dsos__write_buildid_table(struct list_head *head, - struct machine *machine, - pid_t pid, u16 misc, int fd) +static int machine__write_buildid_table(struct machine *machine, int fd) { + int err = 0; char nm[PATH_MAX]; struct dso *pos; + u16 kmisc = PERF_RECORD_MISC_KERNEL, + umisc = PERF_RECORD_MISC_USER; + + if (!machine__is_host(machine)) { + kmisc = PERF_RECORD_MISC_GUEST_KERNEL; + umisc = PERF_RECORD_MISC_GUEST_USER; + } - dsos__for_each_with_build_id(pos, head) { - int err; + dsos__for_each_with_build_id(pos, &machine->dsos.head) { const char *name; size_t name_len; @@ -167,32 +194,12 @@ static int __dsos__write_buildid_table(struct list_head *head, name_len = pos->long_name_len + 1; } - err = write_buildid(name, name_len, pos->build_id, - pid, misc, fd); + err = write_buildid(name, name_len, pos->build_id, machine->pid, + pos->kernel ? kmisc : umisc, fd); if (err) - return err; - } - - return 0; -} - -static int machine__write_buildid_table(struct machine *machine, int fd) -{ - int err; - u16 kmisc = PERF_RECORD_MISC_KERNEL, - umisc = PERF_RECORD_MISC_USER; - - if (!machine__is_host(machine)) { - kmisc = PERF_RECORD_MISC_GUEST_KERNEL; - umisc = PERF_RECORD_MISC_GUEST_USER; + break; } - err = __dsos__write_buildid_table(&machine->kernel_dsos.head, machine, - machine->pid, kmisc, fd); - if (err == 0) - err = __dsos__write_buildid_table(&machine->user_dsos.head, - machine, machine->pid, umisc, - fd); return err; } @@ -225,13 +232,7 @@ static int __dsos__hit_all(struct list_head *head) static int machine__hit_all_dsos(struct machine *machine) { - int err; - - err = __dsos__hit_all(&machine->kernel_dsos.head); - if (err) - return err; - - return __dsos__hit_all(&machine->user_dsos.head); + return __dsos__hit_all(&machine->dsos.head); } int dsos__hit_all(struct perf_session *session) @@ -259,52 +260,113 @@ void disable_buildid_cache(void) no_buildid_cache = true; } -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, - const char *name, bool is_kallsyms, bool is_vdso) +static char *build_id_cache__dirname_from_path(const char *name, + bool is_kallsyms, bool is_vdso) { - const size_t size = PATH_MAX; - char *realname, *filename = zalloc(size), - *linkname = zalloc(size), *targetname; - int len, err = -1; + char *realname = (char *)name, *filename; bool slash = is_kallsyms || is_vdso; - if (is_kallsyms) { - if (symbol_conf.kptr_restrict) { - pr_debug("Not caching a kptr_restrict'ed /proc/kallsyms\n"); - err = 0; - goto out_free; - } - realname = (char *) name; - } else + if (!slash) { realname = realpath(name, NULL); + if (!realname) + return NULL; + } + + if (asprintf(&filename, "%s%s%s", buildid_dir, slash ? "/" : "", + is_vdso ? DSO__NAME_VDSO : realname) < 0) + filename = NULL; + + if (!slash) + free(realname); + + return filename; +} + +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result) +{ + struct strlist *list; + char *dir_name; + DIR *dir; + struct dirent *d; + int ret = 0; + + list = strlist__new(true, NULL); + dir_name = build_id_cache__dirname_from_path(pathname, false, false); + if (!list || !dir_name) { + ret = -ENOMEM; + goto out; + } - if (realname == NULL || filename == NULL || linkname == NULL) + /* List up all dirents */ + dir = opendir(dir_name); + if (!dir) { + ret = -errno; + goto out; + } + + while ((d = readdir(dir)) != NULL) { + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + strlist__add(list, d->d_name); + } + closedir(dir); + +out: + free(dir_name); + if (ret) + strlist__delete(list); + else + *result = list; + + return ret; +} + +int build_id_cache__add_s(const char *sbuild_id, const char *name, + bool is_kallsyms, bool is_vdso) +{ + const size_t size = PATH_MAX; + char *realname = NULL, *filename = NULL, *dir_name = NULL, + *linkname = zalloc(size), *targetname, *tmp; + int err = -1; + + if (!is_kallsyms) { + realname = realpath(name, NULL); + if (!realname) + goto out_free; + } + + dir_name = build_id_cache__dirname_from_path(name, is_kallsyms, is_vdso); + if (!dir_name) goto out_free; - len = scnprintf(filename, size, "%s%s%s", - debugdir, slash ? "/" : "", - is_vdso ? DSO__NAME_VDSO : realname); - if (mkdir_p(filename, 0755)) + if (mkdir_p(dir_name, 0755)) goto out_free; - snprintf(filename + len, size - len, "/%s", sbuild_id); + if (asprintf(&filename, "%s/%s", dir_name, sbuild_id) < 0) { + filename = NULL; + goto out_free; + } if (access(filename, F_OK)) { if (is_kallsyms) { if (copyfile("/proc/kallsyms", filename)) goto out_free; - } else if (link(realname, filename) && copyfile(name, filename)) + } else if (link(realname, filename) && errno != EEXIST && + copyfile(name, filename)) goto out_free; } - len = scnprintf(linkname, size, "%s/.build-id/%.2s", - debugdir, sbuild_id); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; + tmp = strrchr(linkname, '/'); + *tmp = '\0'; if (access(linkname, X_OK) && mkdir_p(linkname, 0755)) goto out_free; - snprintf(linkname + len, size - len, "/%s", sbuild_id + 2); - targetname = filename + strlen(debugdir) - 5; + *tmp = '/'; + targetname = filename + strlen(buildid_dir) - 5; memcpy(targetname, "../..", 5); if (symlink(targetname, linkname) == 0) @@ -313,34 +375,46 @@ out_free: if (!is_kallsyms) free(realname); free(filename); + free(dir_name); free(linkname); return err; } static int build_id_cache__add_b(const u8 *build_id, size_t build_id_size, - const char *name, const char *debugdir, - bool is_kallsyms, bool is_vdso) + const char *name, bool is_kallsyms, + bool is_vdso) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; build_id__sprintf(build_id, build_id_size, sbuild_id); - return build_id_cache__add_s(sbuild_id, debugdir, name, - is_kallsyms, is_vdso); + return build_id_cache__add_s(sbuild_id, name, is_kallsyms, is_vdso); +} + +bool build_id_cache__cached(const char *sbuild_id) +{ + bool ret = false; + char *filename = build_id__filename(sbuild_id, NULL, 0); + + if (filename && !access(filename, F_OK)) + ret = true; + free(filename); + + return ret; } -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) +int build_id_cache__remove_s(const char *sbuild_id) { const size_t size = PATH_MAX; char *filename = zalloc(size), - *linkname = zalloc(size); + *linkname = zalloc(size), *tmp; int err = -1; if (filename == NULL || linkname == NULL) goto out_free; - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, sbuild_id + 2); + if (!build_id__filename(sbuild_id, linkname, size)) + goto out_free; if (access(linkname, F_OK)) goto out_free; @@ -354,8 +428,8 @@ int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir) /* * Since the link is relative, we must make it absolute: */ - snprintf(linkname, size, "%s/.build-id/%.2s/%s", - debugdir, sbuild_id, filename); + tmp = strrchr(linkname, '/') + 1; + snprintf(tmp, size - (tmp - linkname), "%s", filename); if (unlink(linkname)) goto out_free; @@ -367,8 +441,7 @@ out_free: return err; } -static int dso__cache_build_id(struct dso *dso, struct machine *machine, - const char *debugdir) +static int dso__cache_build_id(struct dso *dso, struct machine *machine) { bool is_kallsyms = dso->kernel && dso->long_name[0] != '/'; bool is_vdso = dso__is_vdso(dso); @@ -381,29 +454,25 @@ static int dso__cache_build_id(struct dso *dso, struct machine *machine, name = nm; } return build_id_cache__add_b(dso->build_id, sizeof(dso->build_id), name, - debugdir, is_kallsyms, is_vdso); + is_kallsyms, is_vdso); } static int __dsos__cache_build_ids(struct list_head *head, - struct machine *machine, const char *debugdir) + struct machine *machine) { struct dso *pos; int err = 0; dsos__for_each_with_build_id(pos, head) - if (dso__cache_build_id(pos, machine, debugdir)) + if (dso__cache_build_id(pos, machine)) err = -1; return err; } -static int machine__cache_build_ids(struct machine *machine, const char *debugdir) +static int machine__cache_build_ids(struct machine *machine) { - int ret = __dsos__cache_build_ids(&machine->kernel_dsos.head, machine, - debugdir); - ret |= __dsos__cache_build_ids(&machine->user_dsos.head, machine, - debugdir); - return ret; + return __dsos__cache_build_ids(&machine->dsos.head, machine); } int perf_session__cache_build_ids(struct perf_session *session) @@ -417,22 +486,18 @@ int perf_session__cache_build_ids(struct perf_session *session) if (mkdir(buildid_dir, 0755) != 0 && errno != EEXIST) return -1; - ret = machine__cache_build_ids(&session->machines.host, buildid_dir); + ret = machine__cache_build_ids(&session->machines.host); for (nd = rb_first(&session->machines.guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret |= machine__cache_build_ids(pos, buildid_dir); + ret |= machine__cache_build_ids(pos); } return ret ? -1 : 0; } static bool machine__read_build_ids(struct machine *machine, bool with_hits) { - bool ret; - - ret = __dsos__read_build_ids(&machine->kernel_dsos.head, with_hits); - ret |= __dsos__read_build_ids(&machine->user_dsos.head, with_hits); - return ret; + return __dsos__read_build_ids(&machine->dsos.head, with_hits); } bool perf_session__read_build_ids(struct perf_session *session, bool with_hits) diff --git a/tools/perf/util/build-id.h b/tools/perf/util/build-id.h index 8236319514d5..85011222cc14 100644 --- a/tools/perf/util/build-id.h +++ b/tools/perf/util/build-id.h @@ -4,6 +4,7 @@ #define BUILD_ID_SIZE 20 #include "tool.h" +#include "strlist.h" #include <linux/types.h> extern struct perf_tool build_id__mark_dso_hit_ops; @@ -22,9 +23,12 @@ bool perf_session__read_build_ids(struct perf_session *session, bool with_hits); int perf_session__write_buildid_table(struct perf_session *session, int fd); int perf_session__cache_build_ids(struct perf_session *session); -int build_id_cache__add_s(const char *sbuild_id, const char *debugdir, +int build_id_cache__list_build_ids(const char *pathname, + struct strlist **result); +bool build_id_cache__cached(const char *sbuild_id); +int build_id_cache__add_s(const char *sbuild_id, const char *name, bool is_kallsyms, bool is_vdso); -int build_id_cache__remove_s(const char *sbuild_id, const char *debugdir); +int build_id_cache__remove_s(const char *sbuild_id); void disable_buildid_cache(void); #endif diff --git a/tools/perf/util/cache.h b/tools/perf/util/cache.h index d04d770d90f6..c861373aaed3 100644 --- a/tools/perf/util/cache.h +++ b/tools/perf/util/cache.h @@ -17,6 +17,7 @@ #define EXEC_PATH_ENVIRONMENT "PERF_EXEC_PATH" #define DEFAULT_PERF_DIR_ENVIRONMENT ".perf" #define PERF_DEBUGFS_ENVIRONMENT "PERF_DEBUGFS_DIR" +#define PERF_TRACEFS_ENVIRONMENT "PERF_TRACEFS_DIR" typedef int (*config_fn_t)(const char *, const char *, void *); extern int perf_default_config(const char *, const char *, void *); @@ -29,7 +30,6 @@ extern const char *perf_config_dirname(const char *, const char *); /* pager.c */ extern void setup_pager(void); -extern const char *pager_program; extern int pager_in_use(void); extern int pager_use_color; diff --git a/tools/perf/util/callchain.c b/tools/perf/util/callchain.c index 14e7a123d43b..9f643ee77001 100644 --- a/tools/perf/util/callchain.c +++ b/tools/perf/util/callchain.c @@ -97,6 +97,14 @@ int parse_callchain_record_opt(const char *arg) callchain_param.dump_size = size; } #endif /* HAVE_DWARF_UNWIND_SUPPORT */ + } else if (!strncmp(name, "lbr", sizeof("lbr"))) { + if (!strtok_r(NULL, ",", &saveptr)) { + callchain_param.record_mode = CALLCHAIN_LBR; + ret = 0; + } else + pr_err("callchain: No more arguments " + "needed for --call-graph lbr\n"); + break; } else { pr_err("callchain: Unknown --call-graph option " "value: %s\n", arg); diff --git a/tools/perf/util/callchain.h b/tools/perf/util/callchain.h index c0ec1acc38e4..679c2c6d8ade 100644 --- a/tools/perf/util/callchain.h +++ b/tools/perf/util/callchain.h @@ -11,6 +11,7 @@ enum perf_call_graph_mode { CALLCHAIN_NONE, CALLCHAIN_FP, CALLCHAIN_DWARF, + CALLCHAIN_LBR, CALLCHAIN_MAX }; @@ -71,6 +72,10 @@ extern struct callchain_param callchain_param; struct callchain_list { u64 ip; struct map_symbol ms; + struct /* for TUI */ { + bool unfolded; + bool has_children; + }; char *srcline; struct list_head list; }; diff --git a/tools/perf/util/cgroup.c b/tools/perf/util/cgroup.c index 88f7be399432..32e12ecfe9c5 100644 --- a/tools/perf/util/cgroup.c +++ b/tools/perf/util/cgroup.c @@ -115,23 +115,19 @@ static int add_cgroup(struct perf_evlist *evlist, char *str) goto found; n++; } - if (cgrp->refcnt == 0) + if (atomic_read(&cgrp->refcnt) == 0) free(cgrp); return -1; found: - cgrp->refcnt++; + atomic_inc(&cgrp->refcnt); counter->cgrp = cgrp; return 0; } void close_cgroup(struct cgroup_sel *cgrp) { - if (!cgrp) - return; - - /* XXX: not reentrant */ - if (--cgrp->refcnt == 0) { + if (cgrp && atomic_dec_and_test(&cgrp->refcnt)) { close(cgrp->fd); zfree(&cgrp->name); free(cgrp); diff --git a/tools/perf/util/cgroup.h b/tools/perf/util/cgroup.h index 89acd6debdc5..b4b8cb42fe5e 100644 --- a/tools/perf/util/cgroup.h +++ b/tools/perf/util/cgroup.h @@ -1,12 +1,14 @@ #ifndef __CGROUP_H__ #define __CGROUP_H__ +#include <linux/atomic.h> + struct option; struct cgroup_sel { char *name; int fd; - int refcnt; + atomic_t refcnt; }; diff --git a/tools/perf/util/cloexec.c b/tools/perf/util/cloexec.c index 6da965bdbc2c..2babddaa2481 100644 --- a/tools/perf/util/cloexec.c +++ b/tools/perf/util/cloexec.c @@ -7,6 +7,16 @@ static unsigned long flag = PERF_FLAG_FD_CLOEXEC; +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) +int __weak sched_getcpu(void) +{ + errno = ENOSYS; + return -1; +} +#endif +#endif + static int perf_flag_probe(void) { /* use 'safest' configuration as used in perf_evsel__fallback() */ diff --git a/tools/perf/util/cloexec.h b/tools/perf/util/cloexec.h index 94a5a7d829d5..68888c29b04a 100644 --- a/tools/perf/util/cloexec.h +++ b/tools/perf/util/cloexec.h @@ -3,4 +3,10 @@ unsigned long perf_event_open_cloexec_flag(void); +#ifdef __GLIBC_PREREQ +#if !__GLIBC_PREREQ(2, 6) +extern int sched_getcpu(void) __THROW; +#endif +#endif + #endif /* __PERF_CLOEXEC_H */ diff --git a/tools/perf/util/comm.c b/tools/perf/util/comm.c index b2bb59df65e1..21b7ff382c3f 100644 --- a/tools/perf/util/comm.c +++ b/tools/perf/util/comm.c @@ -2,24 +2,27 @@ #include "util.h" #include <stdlib.h> #include <stdio.h> +#include <linux/atomic.h> struct comm_str { char *str; struct rb_node rb_node; - int ref; + atomic_t refcnt; }; /* Should perhaps be moved to struct machine */ static struct rb_root comm_str_root; -static void comm_str__get(struct comm_str *cs) +static struct comm_str *comm_str__get(struct comm_str *cs) { - cs->ref++; + if (cs) + atomic_inc(&cs->refcnt); + return cs; } static void comm_str__put(struct comm_str *cs) { - if (!--cs->ref) { + if (cs && atomic_dec_and_test(&cs->refcnt)) { rb_erase(&cs->rb_node, &comm_str_root); zfree(&cs->str); free(cs); @@ -40,6 +43,8 @@ static struct comm_str *comm_str__alloc(const char *str) return NULL; } + atomic_set(&cs->refcnt, 0); + return cs; } diff --git a/tools/perf/util/cpumap.c b/tools/perf/util/cpumap.c index c4e55b71010c..3667e2123e5b 100644 --- a/tools/perf/util/cpumap.c +++ b/tools/perf/util/cpumap.c @@ -5,6 +5,7 @@ #include <assert.h> #include <stdio.h> #include <stdlib.h> +#include "asm/bug.h" static struct cpu_map *cpu_map__default_new(void) { @@ -22,6 +23,7 @@ static struct cpu_map *cpu_map__default_new(void) cpus->map[i] = i; cpus->nr = nr_cpus; + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -35,6 +37,7 @@ static struct cpu_map *cpu_map__trim_new(int nr_cpus, int *tmp_cpus) if (cpus != NULL) { cpus->nr = nr_cpus; memcpy(cpus->map, tmp_cpus, payload_size); + atomic_set(&cpus->refcnt, 1); } return cpus; @@ -194,14 +197,32 @@ struct cpu_map *cpu_map__dummy_new(void) if (cpus != NULL) { cpus->nr = 1; cpus->map[0] = -1; + atomic_set(&cpus->refcnt, 1); } return cpus; } -void cpu_map__delete(struct cpu_map *map) +static void cpu_map__delete(struct cpu_map *map) { - free(map); + if (map) { + WARN_ONCE(atomic_read(&map->refcnt) != 0, + "cpu_map refcnt unbalanced\n"); + free(map); + } +} + +struct cpu_map *cpu_map__get(struct cpu_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void cpu_map__put(struct cpu_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + cpu_map__delete(map); } int cpu_map__get_socket(struct cpu_map *map, int idx) @@ -263,6 +284,7 @@ static int cpu_map__build_map(struct cpu_map *cpus, struct cpu_map **res, /* ensure we process id in increasing order */ qsort(c->map, c->nr, sizeof(int), cmp_ids); + atomic_set(&cpus->refcnt, 1); *res = c; return 0; } diff --git a/tools/perf/util/cpumap.h b/tools/perf/util/cpumap.h index 61a654849002..0af9cecb4c51 100644 --- a/tools/perf/util/cpumap.h +++ b/tools/perf/util/cpumap.h @@ -3,18 +3,19 @@ #include <stdio.h> #include <stdbool.h> +#include <linux/atomic.h> #include "perf.h" #include "util/debug.h" struct cpu_map { + atomic_t refcnt; int nr; int map[]; }; struct cpu_map *cpu_map__new(const char *cpu_list); struct cpu_map *cpu_map__dummy_new(void); -void cpu_map__delete(struct cpu_map *map); struct cpu_map *cpu_map__read(FILE *file); size_t cpu_map__fprintf(struct cpu_map *map, FILE *fp); int cpu_map__get_socket(struct cpu_map *map, int idx); @@ -22,6 +23,9 @@ int cpu_map__get_core(struct cpu_map *map, int idx); int cpu_map__build_socket_map(struct cpu_map *cpus, struct cpu_map **sockp); int cpu_map__build_core_map(struct cpu_map *cpus, struct cpu_map **corep); +struct cpu_map *cpu_map__get(struct cpu_map *map); +void cpu_map__put(struct cpu_map *map); + static inline int cpu_map__socket(struct cpu_map *sock, int s) { if (!sock || s > sock->nr || s < 0) diff --git a/tools/perf/util/data-convert-bt.c b/tools/perf/util/data-convert-bt.c new file mode 100644 index 000000000000..5bfc1198ab46 --- /dev/null +++ b/tools/perf/util/data-convert-bt.c @@ -0,0 +1,1183 @@ +/* + * CTF writing support via babeltrace. + * + * Copyright (C) 2014, Jiri Olsa <jolsa@redhat.com> + * Copyright (C) 2014, Sebastian Andrzej Siewior <bigeasy@linutronix.de> + * + * Released under the GPL v2. (and only v2, not any later version) + */ + +#include <linux/compiler.h> +#include <babeltrace/ctf-writer/writer.h> +#include <babeltrace/ctf-writer/clock.h> +#include <babeltrace/ctf-writer/stream.h> +#include <babeltrace/ctf-writer/event.h> +#include <babeltrace/ctf-writer/event-types.h> +#include <babeltrace/ctf-writer/event-fields.h> +#include <babeltrace/ctf-ir/utils.h> +#include <babeltrace/ctf/events.h> +#include <traceevent/event-parse.h> +#include "asm/bug.h" +#include "data-convert-bt.h" +#include "session.h" +#include "util.h" +#include "debug.h" +#include "tool.h" +#include "evlist.h" +#include "evsel.h" +#include "machine.h" + +#define pr_N(n, fmt, ...) \ + eprintf(n, debug_data_convert, fmt, ##__VA_ARGS__) + +#define pr(fmt, ...) pr_N(1, pr_fmt(fmt), ##__VA_ARGS__) +#define pr2(fmt, ...) pr_N(2, pr_fmt(fmt), ##__VA_ARGS__) + +#define pr_time2(t, fmt, ...) pr_time_N(2, debug_data_convert, t, pr_fmt(fmt), ##__VA_ARGS__) + +struct evsel_priv { + struct bt_ctf_event_class *event_class; +}; + +#define MAX_CPUS 4096 + +struct ctf_stream { + struct bt_ctf_stream *stream; + int cpu; + u32 count; +}; + +struct ctf_writer { + /* writer primitives */ + struct bt_ctf_writer *writer; + struct ctf_stream **stream; + int stream_cnt; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + + /* data types */ + union { + struct { + struct bt_ctf_field_type *s64; + struct bt_ctf_field_type *u64; + struct bt_ctf_field_type *s32; + struct bt_ctf_field_type *u32; + struct bt_ctf_field_type *string; + struct bt_ctf_field_type *u64_hex; + }; + struct bt_ctf_field_type *array[6]; + } data; +}; + +struct convert { + struct perf_tool tool; + struct ctf_writer writer; + + u64 events_size; + u64 events_count; + + /* Ordered events configured queue size. */ + u64 queue_size; +}; + +static int value_set(struct bt_ctf_field_type *type, + struct bt_ctf_event *event, + const char *name, u64 val) +{ + struct bt_ctf_field *field; + bool sign = bt_ctf_field_type_integer_get_signed(type); + int ret; + + field = bt_ctf_field_create(type); + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (sign) { + ret = bt_ctf_field_signed_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } else { + ret = bt_ctf_field_unsigned_integer_set_value(field, val); + if (ret) { + pr_err("failed to set field value %s\n", name); + goto err; + } + } + + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err; + } + + pr2(" SET [%s = %" PRIu64 "]\n", name, val); + +err: + bt_ctf_field_put(field); + return ret; +} + +#define __FUNC_VALUE_SET(_name, _val_type) \ +static __maybe_unused int value_set_##_name(struct ctf_writer *cw, \ + struct bt_ctf_event *event, \ + const char *name, \ + _val_type val) \ +{ \ + struct bt_ctf_field_type *type = cw->data._name; \ + return value_set(type, event, name, (u64) val); \ +} + +#define FUNC_VALUE_SET(_name) __FUNC_VALUE_SET(_name, _name) + +FUNC_VALUE_SET(s32) +FUNC_VALUE_SET(u32) +FUNC_VALUE_SET(s64) +FUNC_VALUE_SET(u64) +__FUNC_VALUE_SET(u64_hex, u64) + +static struct bt_ctf_field_type* +get_tracepoint_field_type(struct ctf_writer *cw, struct format_field *field) +{ + unsigned long flags = field->flags; + + if (flags & FIELD_IS_STRING) + return cw->data.string; + + if (!(flags & FIELD_IS_SIGNED)) { + /* unsigned long are mostly pointers */ + if (flags & FIELD_IS_LONG || flags & FIELD_IS_POINTER) + return cw->data.u64_hex; + } + + if (flags & FIELD_IS_SIGNED) { + if (field->size == 8) + return cw->data.s64; + else + return cw->data.s32; + } + + if (field->size == 8) + return cw->data.u64; + else + return cw->data.u32; +} + +static unsigned long long adjust_signedness(unsigned long long value_int, int size) +{ + unsigned long long value_mask; + + /* + * value_mask = (1 << (size * 8 - 1)) - 1. + * Directly set value_mask for code readers. + */ + switch (size) { + case 1: + value_mask = 0x7fULL; + break; + case 2: + value_mask = 0x7fffULL; + break; + case 4: + value_mask = 0x7fffffffULL; + break; + case 8: + /* + * For 64 bit value, return it self. There is no need + * to fill high bit. + */ + /* Fall through */ + default: + /* BUG! */ + return value_int; + } + + /* If it is a positive value, don't adjust. */ + if ((value_int & (~0ULL - value_mask)) == 0) + return value_int; + + /* Fill upper part of value_int with 1 to make it a negative long long. */ + return (value_int & value_mask) | ~value_mask; +} + +static int add_tracepoint_field_value(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_sample *sample, + struct format_field *fmtf) +{ + struct bt_ctf_field_type *type; + struct bt_ctf_field *array_field; + struct bt_ctf_field *field; + const char *name = fmtf->name; + void *data = sample->raw_data; + unsigned long flags = fmtf->flags; + unsigned int n_items; + unsigned int i; + unsigned int offset; + unsigned int len; + int ret; + + name = fmtf->alias; + offset = fmtf->offset; + len = fmtf->size; + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_DYNAMIC) { + unsigned long long tmp_val; + + tmp_val = pevent_read_number(fmtf->event->pevent, + data + offset, len); + offset = tmp_val; + len = offset >> 16; + offset &= 0xffff; + } + + if (flags & FIELD_IS_ARRAY) { + + type = bt_ctf_event_class_get_field_by_name( + event_class, name); + array_field = bt_ctf_field_create(type); + bt_ctf_field_type_put(type); + if (!array_field) { + pr_err("Failed to create array type %s\n", name); + return -1; + } + + len = fmtf->size / fmtf->arraylen; + n_items = fmtf->arraylen; + } else { + n_items = 1; + array_field = NULL; + } + + type = get_tracepoint_field_type(cw, fmtf); + + for (i = 0; i < n_items; i++) { + if (flags & FIELD_IS_ARRAY) + field = bt_ctf_field_array_get_field(array_field, i); + else + field = bt_ctf_field_create(type); + + if (!field) { + pr_err("failed to create a field %s\n", name); + return -1; + } + + if (flags & FIELD_IS_STRING) + ret = bt_ctf_field_string_set_value(field, + data + offset + i * len); + else { + unsigned long long value_int; + + value_int = pevent_read_number( + fmtf->event->pevent, + data + offset + i * len, len); + + if (!(flags & FIELD_IS_SIGNED)) + ret = bt_ctf_field_unsigned_integer_set_value( + field, value_int); + else + ret = bt_ctf_field_signed_integer_set_value( + field, adjust_signedness(value_int, len)); + } + + if (ret) { + pr_err("failed to set file value %s\n", name); + goto err_put_field; + } + if (!(flags & FIELD_IS_ARRAY)) { + ret = bt_ctf_event_set_payload(event, name, field); + if (ret) { + pr_err("failed to set payload %s\n", name); + goto err_put_field; + } + } + bt_ctf_field_put(field); + } + if (flags & FIELD_IS_ARRAY) { + ret = bt_ctf_event_set_payload(event, name, array_field); + if (ret) { + pr_err("Failed add payload array %s\n", name); + return -1; + } + bt_ctf_field_put(array_field); + } + return 0; + +err_put_field: + bt_ctf_field_put(field); + return -1; +} + +static int add_tracepoint_fields_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct format_field *fields, + struct perf_sample *sample) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + ret = add_tracepoint_field_value(cw, event_class, event, sample, + field); + if (ret) + return -1; + } + return 0; +} + +static int add_tracepoint_values(struct ctf_writer *cw, + struct bt_ctf_event_class *event_class, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_values(cw, event_class, event, + common_fields, sample); + if (!ret) + ret = add_tracepoint_fields_values(cw, event_class, event, + fields, sample); + + return ret; +} + +static int add_generic_values(struct ctf_writer *cw, + struct bt_ctf_event *event, + struct perf_evsel *evsel, + struct perf_sample *sample) +{ + u64 type = evsel->attr.sample_type; + int ret; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + + if (type & PERF_SAMPLE_IP) { + ret = value_set_u64_hex(cw, event, "perf_ip", sample->ip); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TID) { + ret = value_set_s32(cw, event, "perf_tid", sample->tid); + if (ret) + return -1; + + ret = value_set_s32(cw, event, "perf_pid", sample->pid); + if (ret) + return -1; + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) { + ret = value_set_u64(cw, event, "perf_id", sample->id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_STREAM_ID) { + ret = value_set_u64(cw, event, "perf_stream_id", sample->stream_id); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_PERIOD) { + ret = value_set_u64(cw, event, "perf_period", sample->period); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_WEIGHT) { + ret = value_set_u64(cw, event, "perf_weight", sample->weight); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_DATA_SRC) { + ret = value_set_u64(cw, event, "perf_data_src", + sample->data_src); + if (ret) + return -1; + } + + if (type & PERF_SAMPLE_TRANSACTION) { + ret = value_set_u64(cw, event, "perf_transaction", + sample->transaction); + if (ret) + return -1; + } + + return 0; +} + +static int ctf_stream__flush(struct ctf_stream *cs) +{ + int err = 0; + + if (cs) { + err = bt_ctf_stream_flush(cs->stream); + if (err) + pr_err("CTF stream %d flush failed\n", cs->cpu); + + pr("Flush stream for cpu %d (%u samples)\n", + cs->cpu, cs->count); + + cs->count = 0; + } + + return err; +} + +static struct ctf_stream *ctf_stream__create(struct ctf_writer *cw, int cpu) +{ + struct ctf_stream *cs; + struct bt_ctf_field *pkt_ctx = NULL; + struct bt_ctf_field *cpu_field = NULL; + struct bt_ctf_stream *stream = NULL; + int ret; + + cs = zalloc(sizeof(*cs)); + if (!cs) { + pr_err("Failed to allocate ctf stream\n"); + return NULL; + } + + stream = bt_ctf_writer_create_stream(cw->writer, cw->stream_class); + if (!stream) { + pr_err("Failed to create CTF stream\n"); + goto out; + } + + pkt_ctx = bt_ctf_stream_get_packet_context(stream); + if (!pkt_ctx) { + pr_err("Failed to obtain packet context\n"); + goto out; + } + + cpu_field = bt_ctf_field_structure_get_field(pkt_ctx, "cpu_id"); + bt_ctf_field_put(pkt_ctx); + if (!cpu_field) { + pr_err("Failed to obtain cpu field\n"); + goto out; + } + + ret = bt_ctf_field_unsigned_integer_set_value(cpu_field, (u32) cpu); + if (ret) { + pr_err("Failed to update CPU number\n"); + goto out; + } + + bt_ctf_field_put(cpu_field); + + cs->cpu = cpu; + cs->stream = stream; + return cs; + +out: + if (cpu_field) + bt_ctf_field_put(cpu_field); + if (stream) + bt_ctf_stream_put(stream); + + free(cs); + return NULL; +} + +static void ctf_stream__delete(struct ctf_stream *cs) +{ + if (cs) { + bt_ctf_stream_put(cs->stream); + free(cs); + } +} + +static struct ctf_stream *ctf_stream(struct ctf_writer *cw, int cpu) +{ + struct ctf_stream *cs = cw->stream[cpu]; + + if (!cs) { + cs = ctf_stream__create(cw, cpu); + cw->stream[cpu] = cs; + } + + return cs; +} + +static int get_sample_cpu(struct ctf_writer *cw, struct perf_sample *sample, + struct perf_evsel *evsel) +{ + int cpu = 0; + + if (evsel->attr.sample_type & PERF_SAMPLE_CPU) + cpu = sample->cpu; + + if (cpu > cw->stream_cnt) { + pr_err("Event was recorded for CPU %d, limit is at %d.\n", + cpu, cw->stream_cnt); + cpu = 0; + } + + return cpu; +} + +#define STREAM_FLUSH_COUNT 100000 + +/* + * Currently we have no other way to determine the + * time for the stream flush other than keep track + * of the number of events and check it against + * threshold. + */ +static bool is_flush_needed(struct ctf_stream *cs) +{ + return cs->count >= STREAM_FLUSH_COUNT; +} + +static int process_sample_event(struct perf_tool *tool, + union perf_event *_event __maybe_unused, + struct perf_sample *sample, + struct perf_evsel *evsel, + struct machine *machine __maybe_unused) +{ + struct convert *c = container_of(tool, struct convert, tool); + struct evsel_priv *priv = evsel->priv; + struct ctf_writer *cw = &c->writer; + struct ctf_stream *cs; + struct bt_ctf_event_class *event_class; + struct bt_ctf_event *event; + int ret; + + if (WARN_ONCE(!priv, "Failed to setup all events.\n")) + return 0; + + event_class = priv->event_class; + + /* update stats */ + c->events_count++; + c->events_size += _event->header.size; + + pr_time2(sample->time, "sample %" PRIu64 "\n", c->events_count); + + event = bt_ctf_event_create(event_class); + if (!event) { + pr_err("Failed to create an CTF event\n"); + return -1; + } + + bt_ctf_clock_set_time(cw->clock, sample->time); + + ret = add_generic_values(cw, event, evsel, sample); + if (ret) + return -1; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_values(cw, event_class, event, + evsel, sample); + if (ret) + return -1; + } + + cs = ctf_stream(cw, get_sample_cpu(cw, sample, evsel)); + if (cs) { + if (is_flush_needed(cs)) + ctf_stream__flush(cs); + + cs->count++; + bt_ctf_stream_append_event(cs->stream, event); + } + + bt_ctf_event_put(event); + return cs ? 0 : -1; +} + +/* If dup < 0, add a prefix. Else, add _dupl_X suffix. */ +static char *change_name(char *name, char *orig_name, int dup) +{ + char *new_name = NULL; + size_t len; + + if (!name) + name = orig_name; + + if (dup >= 10) + goto out; + /* + * Add '_' prefix to potential keywork. According to + * Mathieu Desnoyers (https://lkml.org/lkml/2015/1/23/652), + * futher CTF spec updating may require us to use '$'. + */ + if (dup < 0) + len = strlen(name) + sizeof("_"); + else + len = strlen(orig_name) + sizeof("_dupl_X"); + + new_name = malloc(len); + if (!new_name) + goto out; + + if (dup < 0) + snprintf(new_name, len, "_%s", name); + else + snprintf(new_name, len, "%s_dupl_%d", orig_name, dup); + +out: + if (name != orig_name) + free(name); + return new_name; +} + +static int event_class_add_field(struct bt_ctf_event_class *event_class, + struct bt_ctf_field_type *type, + struct format_field *field) +{ + struct bt_ctf_field_type *t = NULL; + char *name; + int dup = 1; + int ret; + + /* alias was already assigned */ + if (field->alias != field->name) + return bt_ctf_event_class_add_field(event_class, type, + (char *)field->alias); + + name = field->name; + + /* If 'name' is a keywork, add prefix. */ + if (bt_ctf_validate_identifier(name)) + name = change_name(name, field->name, -1); + + if (!name) { + pr_err("Failed to fix invalid identifier."); + return -1; + } + while ((t = bt_ctf_event_class_get_field_by_name(event_class, name))) { + bt_ctf_field_type_put(t); + name = change_name(name, field->name, dup++); + if (!name) { + pr_err("Failed to create dup name for '%s'\n", field->name); + return -1; + } + } + + ret = bt_ctf_event_class_add_field(event_class, type, name); + if (!ret) + field->alias = name; + + return ret; +} + +static int add_tracepoint_fields_types(struct ctf_writer *cw, + struct format_field *fields, + struct bt_ctf_event_class *event_class) +{ + struct format_field *field; + int ret; + + for (field = fields; field; field = field->next) { + struct bt_ctf_field_type *type; + unsigned long flags = field->flags; + + pr2(" field '%s'\n", field->name); + + type = get_tracepoint_field_type(cw, field); + if (!type) + return -1; + + /* + * A string is an array of chars. For this we use the string + * type and don't care that it is an array. What we don't + * support is an array of strings. + */ + if (flags & FIELD_IS_STRING) + flags &= ~FIELD_IS_ARRAY; + + if (flags & FIELD_IS_ARRAY) + type = bt_ctf_field_type_array_create(type, field->arraylen); + + ret = event_class_add_field(event_class, type, field); + + if (flags & FIELD_IS_ARRAY) + bt_ctf_field_type_put(type); + + if (ret) { + pr_err("Failed to add field '%s': %d\n", + field->name, ret); + return -1; + } + } + + return 0; +} + +static int add_tracepoint_types(struct ctf_writer *cw, + struct perf_evsel *evsel, + struct bt_ctf_event_class *class) +{ + struct format_field *common_fields = evsel->tp_format->format.common_fields; + struct format_field *fields = evsel->tp_format->format.fields; + int ret; + + ret = add_tracepoint_fields_types(cw, common_fields, class); + if (!ret) + ret = add_tracepoint_fields_types(cw, fields, class); + + return ret; +} + +static int add_generic_types(struct ctf_writer *cw, struct perf_evsel *evsel, + struct bt_ctf_event_class *event_class) +{ + u64 type = evsel->attr.sample_type; + + /* + * missing: + * PERF_SAMPLE_TIME - not needed as we have it in + * ctf event header + * PERF_SAMPLE_READ - TODO + * PERF_SAMPLE_CALLCHAIN - TODO + * PERF_SAMPLE_RAW - tracepoint fields are handled separately + * PERF_SAMPLE_BRANCH_STACK - TODO + * PERF_SAMPLE_REGS_USER - TODO + * PERF_SAMPLE_STACK_USER - TODO + */ + +#define ADD_FIELD(cl, t, n) \ + do { \ + pr2(" field '%s'\n", n); \ + if (bt_ctf_event_class_add_field(cl, t, n)) { \ + pr_err("Failed to add field '%s';\n", n); \ + return -1; \ + } \ + } while (0) + + if (type & PERF_SAMPLE_IP) + ADD_FIELD(event_class, cw->data.u64_hex, "perf_ip"); + + if (type & PERF_SAMPLE_TID) { + ADD_FIELD(event_class, cw->data.s32, "perf_tid"); + ADD_FIELD(event_class, cw->data.s32, "perf_pid"); + } + + if ((type & PERF_SAMPLE_ID) || + (type & PERF_SAMPLE_IDENTIFIER)) + ADD_FIELD(event_class, cw->data.u64, "perf_id"); + + if (type & PERF_SAMPLE_STREAM_ID) + ADD_FIELD(event_class, cw->data.u64, "perf_stream_id"); + + if (type & PERF_SAMPLE_PERIOD) + ADD_FIELD(event_class, cw->data.u64, "perf_period"); + + if (type & PERF_SAMPLE_WEIGHT) + ADD_FIELD(event_class, cw->data.u64, "perf_weight"); + + if (type & PERF_SAMPLE_DATA_SRC) + ADD_FIELD(event_class, cw->data.u64, "perf_data_src"); + + if (type & PERF_SAMPLE_TRANSACTION) + ADD_FIELD(event_class, cw->data.u64, "perf_transaction"); + +#undef ADD_FIELD + return 0; +} + +static int add_event(struct ctf_writer *cw, struct perf_evsel *evsel) +{ + struct bt_ctf_event_class *event_class; + struct evsel_priv *priv; + const char *name = perf_evsel__name(evsel); + int ret; + + pr("Adding event '%s' (type %d)\n", name, evsel->attr.type); + + event_class = bt_ctf_event_class_create(name); + if (!event_class) + return -1; + + ret = add_generic_types(cw, evsel, event_class); + if (ret) + goto err; + + if (evsel->attr.type == PERF_TYPE_TRACEPOINT) { + ret = add_tracepoint_types(cw, evsel, event_class); + if (ret) + goto err; + } + + ret = bt_ctf_stream_class_add_event_class(cw->stream_class, event_class); + if (ret) { + pr("Failed to add event class into stream.\n"); + goto err; + } + + priv = malloc(sizeof(*priv)); + if (!priv) + goto err; + + priv->event_class = event_class; + evsel->priv = priv; + return 0; + +err: + bt_ctf_event_class_put(event_class); + pr_err("Failed to add event '%s'.\n", name); + return -1; +} + +static int setup_events(struct ctf_writer *cw, struct perf_session *session) +{ + struct perf_evlist *evlist = session->evlist; + struct perf_evsel *evsel; + int ret; + + evlist__for_each(evlist, evsel) { + ret = add_event(cw, evsel); + if (ret) + return ret; + } + return 0; +} + +static int setup_streams(struct ctf_writer *cw, struct perf_session *session) +{ + struct ctf_stream **stream; + struct perf_header *ph = &session->header; + int ncpus; + + /* + * Try to get the number of cpus used in the data file, + * if not present fallback to the MAX_CPUS. + */ + ncpus = ph->env.nr_cpus_avail ?: MAX_CPUS; + + stream = zalloc(sizeof(*stream) * ncpus); + if (!stream) { + pr_err("Failed to allocate streams.\n"); + return -ENOMEM; + } + + cw->stream = stream; + cw->stream_cnt = ncpus; + return 0; +} + +static void free_streams(struct ctf_writer *cw) +{ + int cpu; + + for (cpu = 0; cpu < cw->stream_cnt; cpu++) + ctf_stream__delete(cw->stream[cpu]); + + free(cw->stream); +} + +static int ctf_writer__setup_env(struct ctf_writer *cw, + struct perf_session *session) +{ + struct perf_header *header = &session->header; + struct bt_ctf_writer *writer = cw->writer; + +#define ADD(__n, __v) \ +do { \ + if (bt_ctf_writer_add_environment_field(writer, __n, __v)) \ + return -1; \ +} while (0) + + ADD("host", header->env.hostname); + ADD("sysname", "Linux"); + ADD("release", header->env.os_release); + ADD("version", header->env.version); + ADD("machine", header->env.arch); + ADD("domain", "kernel"); + ADD("tracer_name", "perf"); + +#undef ADD + return 0; +} + +static int ctf_writer__setup_clock(struct ctf_writer *cw) +{ + struct bt_ctf_clock *clock = cw->clock; + + bt_ctf_clock_set_description(clock, "perf clock"); + +#define SET(__n, __v) \ +do { \ + if (bt_ctf_clock_set_##__n(clock, __v)) \ + return -1; \ +} while (0) + + SET(frequency, 1000000000); + SET(offset_s, 0); + SET(offset, 0); + SET(precision, 10); + SET(is_absolute, 0); + +#undef SET + return 0; +} + +static struct bt_ctf_field_type *create_int_type(int size, bool sign, bool hex) +{ + struct bt_ctf_field_type *type; + + type = bt_ctf_field_type_integer_create(size); + if (!type) + return NULL; + + if (sign && + bt_ctf_field_type_integer_set_signed(type, 1)) + goto err; + + if (hex && + bt_ctf_field_type_integer_set_base(type, BT_CTF_INTEGER_BASE_HEXADECIMAL)) + goto err; + + pr2("Created type: INTEGER %d-bit %ssigned %s\n", + size, sign ? "un" : "", hex ? "hex" : ""); + return type; + +err: + bt_ctf_field_type_put(type); + return NULL; +} + +static void ctf_writer__cleanup_data(struct ctf_writer *cw) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(cw->data.array); i++) + bt_ctf_field_type_put(cw->data.array[i]); +} + +static int ctf_writer__init_data(struct ctf_writer *cw) +{ +#define CREATE_INT_TYPE(type, size, sign, hex) \ +do { \ + (type) = create_int_type(size, sign, hex); \ + if (!(type)) \ + goto err; \ +} while (0) + + CREATE_INT_TYPE(cw->data.s64, 64, true, false); + CREATE_INT_TYPE(cw->data.u64, 64, false, false); + CREATE_INT_TYPE(cw->data.s32, 32, true, false); + CREATE_INT_TYPE(cw->data.u32, 32, false, false); + CREATE_INT_TYPE(cw->data.u64_hex, 64, false, true); + + cw->data.string = bt_ctf_field_type_string_create(); + if (cw->data.string) + return 0; + +err: + ctf_writer__cleanup_data(cw); + pr_err("Failed to create data types.\n"); + return -1; +} + +static void ctf_writer__cleanup(struct ctf_writer *cw) +{ + ctf_writer__cleanup_data(cw); + + bt_ctf_clock_put(cw->clock); + free_streams(cw); + bt_ctf_stream_class_put(cw->stream_class); + bt_ctf_writer_put(cw->writer); + + /* and NULL all the pointers */ + memset(cw, 0, sizeof(*cw)); +} + +static int ctf_writer__init(struct ctf_writer *cw, const char *path) +{ + struct bt_ctf_writer *writer; + struct bt_ctf_stream_class *stream_class; + struct bt_ctf_clock *clock; + struct bt_ctf_field_type *pkt_ctx_type; + int ret; + + /* CTF writer */ + writer = bt_ctf_writer_create(path); + if (!writer) + goto err; + + cw->writer = writer; + + /* CTF clock */ + clock = bt_ctf_clock_create("perf_clock"); + if (!clock) { + pr("Failed to create CTF clock.\n"); + goto err_cleanup; + } + + cw->clock = clock; + + if (ctf_writer__setup_clock(cw)) { + pr("Failed to setup CTF clock.\n"); + goto err_cleanup; + } + + /* CTF stream class */ + stream_class = bt_ctf_stream_class_create("perf_stream"); + if (!stream_class) { + pr("Failed to create CTF stream class.\n"); + goto err_cleanup; + } + + cw->stream_class = stream_class; + + /* CTF clock stream setup */ + if (bt_ctf_stream_class_set_clock(stream_class, clock)) { + pr("Failed to assign CTF clock to stream class.\n"); + goto err_cleanup; + } + + if (ctf_writer__init_data(cw)) + goto err_cleanup; + + /* Add cpu_id for packet context */ + pkt_ctx_type = bt_ctf_stream_class_get_packet_context_type(stream_class); + if (!pkt_ctx_type) + goto err_cleanup; + + ret = bt_ctf_field_type_structure_add_field(pkt_ctx_type, cw->data.u32, "cpu_id"); + bt_ctf_field_type_put(pkt_ctx_type); + if (ret) + goto err_cleanup; + + /* CTF clock writer setup */ + if (bt_ctf_writer_add_clock(writer, clock)) { + pr("Failed to assign CTF clock to writer.\n"); + goto err_cleanup; + } + + return 0; + +err_cleanup: + ctf_writer__cleanup(cw); +err: + pr_err("Failed to setup CTF writer.\n"); + return -1; +} + +static int ctf_writer__flush_streams(struct ctf_writer *cw) +{ + int cpu, ret = 0; + + for (cpu = 0; cpu < cw->stream_cnt && !ret; cpu++) + ret = ctf_stream__flush(cw->stream[cpu]); + + return ret; +} + +static int convert__config(const char *var, const char *value, void *cb) +{ + struct convert *c = cb; + + if (!strcmp(var, "convert.queue-size")) { + c->queue_size = perf_config_u64(var, value); + return 0; + } + + return perf_default_config(var, value, cb); +} + +int bt_convert__perf2ctf(const char *input, const char *path, bool force) +{ + struct perf_session *session; + struct perf_data_file file = { + .path = input, + .mode = PERF_DATA_MODE_READ, + .force = force, + }; + struct convert c = { + .tool = { + .sample = process_sample_event, + .mmap = perf_event__process_mmap, + .mmap2 = perf_event__process_mmap2, + .comm = perf_event__process_comm, + .exit = perf_event__process_exit, + .fork = perf_event__process_fork, + .lost = perf_event__process_lost, + .tracing_data = perf_event__process_tracing_data, + .build_id = perf_event__process_build_id, + .ordered_events = true, + .ordering_requires_timestamps = true, + }, + }; + struct ctf_writer *cw = &c.writer; + int err = -1; + + perf_config(convert__config, &c); + + /* CTF writer */ + if (ctf_writer__init(cw, path)) + return -1; + + /* perf.data session */ + session = perf_session__new(&file, 0, &c.tool); + if (!session) + goto free_writer; + + if (c.queue_size) { + ordered_events__set_alloc_size(&session->ordered_events, + c.queue_size); + } + + /* CTF writer env/clock setup */ + if (ctf_writer__setup_env(cw, session)) + goto free_session; + + /* CTF events setup */ + if (setup_events(cw, session)) + goto free_session; + + if (setup_streams(cw, session)) + goto free_session; + + err = perf_session__process_events(session); + if (!err) + err = ctf_writer__flush_streams(cw); + else + pr_err("Error during conversion.\n"); + + fprintf(stderr, + "[ perf data convert: Converted '%s' into CTF data '%s' ]\n", + file.path, path); + + fprintf(stderr, + "[ perf data convert: Converted and wrote %.3f MB (%" PRIu64 " samples) ]\n", + (double) c.events_size / 1024.0 / 1024.0, + c.events_count); + + perf_session__delete(session); + ctf_writer__cleanup(cw); + + return err; + +free_session: + perf_session__delete(session); +free_writer: + ctf_writer__cleanup(cw); + pr_err("Error during conversion setup.\n"); + return err; +} diff --git a/tools/perf/util/data-convert-bt.h b/tools/perf/util/data-convert-bt.h new file mode 100644 index 000000000000..4c204342a9d8 --- /dev/null +++ b/tools/perf/util/data-convert-bt.h @@ -0,0 +1,8 @@ +#ifndef __DATA_CONVERT_BT_H +#define __DATA_CONVERT_BT_H +#ifdef HAVE_LIBBABELTRACE_SUPPORT + +int bt_convert__perf2ctf(const char *input_name, const char *to_ctf, bool force); + +#endif /* HAVE_LIBBABELTRACE_SUPPORT */ +#endif /* __DATA_CONVERT_BT_H */ diff --git a/tools/perf/util/db-export.c b/tools/perf/util/db-export.c index c81dae399763..1c9689e4cc17 100644 --- a/tools/perf/util/db-export.c +++ b/tools/perf/util/db-export.c @@ -122,6 +122,7 @@ int db_export__machine(struct db_export *dbe, struct machine *machine) int db_export__thread(struct db_export *dbe, struct thread *thread, struct machine *machine, struct comm *comm) { + struct thread *main_thread; u64 main_thread_db_id = 0; int err; @@ -131,8 +132,6 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, thread->db_id = ++dbe->thread_last_db_id; if (thread->pid_ != -1) { - struct thread *main_thread; - if (thread->pid_ == thread->tid) { main_thread = thread; } else { @@ -144,14 +143,16 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, err = db_export__thread(dbe, main_thread, machine, comm); if (err) - return err; + goto out_put; if (comm) { err = db_export__comm_thread(dbe, comm, thread); if (err) - return err; + goto out_put; } } main_thread_db_id = main_thread->db_id; + if (main_thread != thread) + thread__put(main_thread); } if (dbe->export_thread) @@ -159,6 +160,10 @@ int db_export__thread(struct db_export *dbe, struct thread *thread, machine); return 0; + +out_put: + thread__put(main_thread); + return err; } int db_export__comm(struct db_export *dbe, struct comm *comm, @@ -229,7 +234,7 @@ int db_export__symbol(struct db_export *dbe, struct symbol *sym, static struct thread *get_main_thread(struct machine *machine, struct thread *thread) { if (thread->pid_ == thread->tid) - return thread; + return thread__get(thread); if (thread->pid_ == -1) return NULL; @@ -282,13 +287,13 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) + struct addr_location *al) { + struct thread* thread = al->thread; struct export_sample es = { .event = event, .sample = sample, .evsel = evsel, - .thread = thread, .al = al, }; struct thread *main_thread; @@ -309,12 +314,12 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, err = db_export__thread(dbe, thread, al->machine, comm); if (err) - return err; + goto out_put; if (comm) { err = db_export__comm(dbe, comm, main_thread); if (err) - return err; + goto out_put; es.comm_db_id = comm->db_id; } @@ -322,7 +327,7 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, err = db_ids_from_al(dbe, al, &es.dso_db_id, &es.sym_db_id, &es.offset); if (err) - return err; + goto out_put; if ((evsel->attr.sample_type & PERF_SAMPLE_ADDR) && sample_addr_correlates_sym(&evsel->attr)) { @@ -332,20 +337,22 @@ int db_export__sample(struct db_export *dbe, union perf_event *event, err = db_ids_from_al(dbe, &addr_al, &es.addr_dso_db_id, &es.addr_sym_db_id, &es.addr_offset); if (err) - return err; + goto out_put; if (dbe->crp) { err = thread_stack__process(thread, comm, sample, al, &addr_al, es.db_id, dbe->crp); if (err) - return err; + goto out_put; } } if (dbe->export_sample) - return dbe->export_sample(dbe, &es); + err = dbe->export_sample(dbe, &es); - return 0; +out_put: + thread__put(main_thread); + return err; } static struct { diff --git a/tools/perf/util/db-export.h b/tools/perf/util/db-export.h index adbd22d66798..25e22fd76aca 100644 --- a/tools/perf/util/db-export.h +++ b/tools/perf/util/db-export.h @@ -34,7 +34,6 @@ struct export_sample { union perf_event *event; struct perf_sample *sample; struct perf_evsel *evsel; - struct thread *thread; struct addr_location *al; u64 db_id; u64 comm_db_id; @@ -97,7 +96,7 @@ int db_export__branch_type(struct db_export *dbe, u32 branch_type, const char *name); int db_export__sample(struct db_export *dbe, union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al); + struct addr_location *al); int db_export__branch_types(struct db_export *dbe); diff --git a/tools/perf/util/debug.c b/tools/perf/util/debug.c index ad60b2f20258..2da5581ec74d 100644 --- a/tools/perf/util/debug.c +++ b/tools/perf/util/debug.c @@ -20,6 +20,7 @@ int verbose; bool dump_trace = false, quiet = false; int debug_ordered_events; static int redirect_to_stderr; +int debug_data_convert; static int _eprintf(int level, int var, const char *fmt, va_list args) { @@ -147,6 +148,7 @@ static struct debug_variable { { .name = "verbose", .ptr = &verbose }, { .name = "ordered-events", .ptr = &debug_ordered_events}, { .name = "stderr", .ptr = &redirect_to_stderr}, + { .name = "data-convert", .ptr = &debug_data_convert }, { .name = NULL, } }; diff --git a/tools/perf/util/debug.h b/tools/perf/util/debug.h index be264d6f3b30..caac2fdc6105 100644 --- a/tools/perf/util/debug.h +++ b/tools/perf/util/debug.h @@ -12,6 +12,7 @@ extern int verbose; extern bool quiet, dump_trace; extern int debug_ordered_events; +extern int debug_data_convert; #ifndef pr_fmt #define pr_fmt(fmt) fmt diff --git a/tools/perf/util/dso.c b/tools/perf/util/dso.c index c2f7d3b90966..7c0c08386a1d 100644 --- a/tools/perf/util/dso.c +++ b/tools/perf/util/dso.c @@ -4,6 +4,7 @@ #include "symbol.h" #include "dso.h" #include "machine.h" +#include "auxtrace.h" #include "util.h" #include "debug.h" @@ -45,13 +46,13 @@ int dso__read_binary_type_filename(const struct dso *dso, case DSO_BINARY_TYPE__DEBUGLINK: { char *debuglink; - strncpy(filename, dso->long_name, size); - debuglink = filename + dso->long_name_len; + len = __symbol__join_symfs(filename, size, dso->long_name); + debuglink = filename + len; while (debuglink != filename && *debuglink != '/') debuglink--; if (*debuglink == '/') debuglink++; - ret = filename__read_debuglink(dso->long_name, debuglink, + ret = filename__read_debuglink(filename, debuglink, size - (debuglink - filename)); } break; @@ -148,6 +149,9 @@ static const struct { #ifdef HAVE_ZLIB_SUPPORT { "gz", gzip_decompress_to_file }, #endif +#ifdef HAVE_LZMA_SUPPORT + { "xz", lzma_decompress_to_file }, +#endif { NULL, NULL }, }; @@ -162,32 +166,30 @@ bool is_supported_compression(const char *ext) return false; } -bool is_kmodule_extension(const char *ext) +bool is_kernel_module(const char *pathname, int cpumode) { - if (strncmp(ext, "ko", 2)) - return false; - - if (ext[2] == '\0' || (ext[2] == '.' && is_supported_compression(ext+3))) - return true; + struct kmod_path m; + int mode = cpumode & PERF_RECORD_MISC_CPUMODE_MASK; - return false; -} + WARN_ONCE(mode != cpumode, + "Internal error: passing unmasked cpumode (%x) to is_kernel_module", + cpumode); -bool is_kernel_module(const char *pathname, bool *compressed) -{ - const char *ext = strrchr(pathname, '.'); - - if (ext == NULL) + switch (mode) { + case PERF_RECORD_MISC_USER: + case PERF_RECORD_MISC_HYPERVISOR: + case PERF_RECORD_MISC_GUEST_USER: return false; + /* Treat PERF_RECORD_MISC_CPUMODE_UNKNOWN as kernel */ + default: + if (kmod_path__parse(&m, pathname)) { + pr_err("Failed to check whether %s is a kernel module or not. Assume it is.", + pathname); + return true; + } + } - if (is_supported_compression(ext + 1)) { - if (compressed) - *compressed = true; - ext -= 3; - } else if (compressed) - *compressed = false; - - return is_kmodule_extension(ext + 1); + return m.kmod; } bool decompress_to_file(const char *ext, const char *filename, int output_fd) @@ -209,10 +211,98 @@ bool dso__needs_decompress(struct dso *dso) } /* + * Parses kernel module specified in @path and updates + * @m argument like: + * + * @comp - true if @path contains supported compression suffix, + * false otherwise + * @kmod - true if @path contains '.ko' suffix in right position, + * false otherwise + * @name - if (@alloc_name && @kmod) is true, it contains strdup-ed base name + * of the kernel module without suffixes, otherwise strudup-ed + * base name of @path + * @ext - if (@alloc_ext && @comp) is true, it contains strdup-ed string + * the compression suffix + * + * Returns 0 if there's no strdup error, -ENOMEM otherwise. + */ +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext) +{ + const char *name = strrchr(path, '/'); + const char *ext = strrchr(path, '.'); + bool is_simple_name = false; + + memset(m, 0x0, sizeof(*m)); + name = name ? name + 1 : path; + + /* + * '.' is also a valid character for module name. For example: + * [aaa.bbb] is a valid module name. '[' should have higher + * priority than '.ko' suffix. + * + * The kernel names are from machine__mmap_name. Such + * name should belong to kernel itself, not kernel module. + */ + if (name[0] == '[') { + is_simple_name = true; + if ((strncmp(name, "[kernel.kallsyms]", 17) == 0) || + (strncmp(name, "[guest.kernel.kallsyms", 22) == 0) || + (strncmp(name, "[vdso]", 6) == 0) || + (strncmp(name, "[vsyscall]", 10) == 0)) { + m->kmod = false; + + } else + m->kmod = true; + } + + /* No extension, just return name. */ + if ((ext == NULL) || is_simple_name) { + if (alloc_name) { + m->name = strdup(name); + return m->name ? 0 : -ENOMEM; + } + return 0; + } + + if (is_supported_compression(ext + 1)) { + m->comp = true; + ext -= 3; + } + + /* Check .ko extension only if there's enough name left. */ + if (ext > name) + m->kmod = !strncmp(ext, ".ko", 3); + + if (alloc_name) { + if (m->kmod) { + if (asprintf(&m->name, "[%.*s]", (int) (ext - name), name) == -1) + return -ENOMEM; + } else { + if (asprintf(&m->name, "%s", name) == -1) + return -ENOMEM; + } + + strxfrchar(m->name, '-', '_'); + } + + if (alloc_ext && m->comp) { + m->ext = strdup(ext + 4); + if (!m->ext) { + free((void *) m->name); + return -ENOMEM; + } + } + + return 0; +} + +/* * Global list of open DSOs and the counter. */ static LIST_HEAD(dso__data_open); static long dso__data_open_cnt; +static pthread_mutex_t dso__data_open_lock = PTHREAD_MUTEX_INITIALIZER; static void dso__list_add(struct dso *dso) { @@ -240,7 +330,7 @@ static int do_open(char *name) if (fd >= 0) return fd; - pr_debug("dso open failed, mmap: %s\n", + pr_debug("dso open failed: %s\n", strerror_r(errno, sbuf, sizeof(sbuf))); if (!dso__data_open_cnt || errno != EMFILE) break; @@ -382,18 +472,12 @@ static void check_data_close(void) */ void dso__data_close(struct dso *dso) { + pthread_mutex_lock(&dso__data_open_lock); close_dso(dso); + pthread_mutex_unlock(&dso__data_open_lock); } -/** - * dso__data_fd - Get dso's data file descriptor - * @dso: dso object - * @machine: machine object - * - * External interface to find dso's file, open it and - * returns file descriptor. - */ -int dso__data_fd(struct dso *dso, struct machine *machine) +static void try_to_open_dso(struct dso *dso, struct machine *machine) { enum dso_binary_type binary_type_data[] = { DSO_BINARY_TYPE__BUILD_ID_CACHE, @@ -402,11 +486,8 @@ int dso__data_fd(struct dso *dso, struct machine *machine) }; int i = 0; - if (dso->data.status == DSO_DATA_STATUS_ERROR) - return -1; - if (dso->data.fd >= 0) - goto out; + return; if (dso->binary_type != DSO_BINARY_TYPE__NOT_FOUND) { dso->data.fd = open_dso(dso, machine); @@ -426,10 +507,38 @@ out: dso->data.status = DSO_DATA_STATUS_OK; else dso->data.status = DSO_DATA_STATUS_ERROR; +} + +/** + * dso__data_get_fd - Get dso's data file descriptor + * @dso: dso object + * @machine: machine object + * + * External interface to find dso's file, open it and + * returns file descriptor. It should be paired with + * dso__data_put_fd() if it returns non-negative value. + */ +int dso__data_get_fd(struct dso *dso, struct machine *machine) +{ + if (dso->data.status == DSO_DATA_STATUS_ERROR) + return -1; + + if (pthread_mutex_lock(&dso__data_open_lock) < 0) + return -1; + + try_to_open_dso(dso, machine); + + if (dso->data.fd < 0) + pthread_mutex_unlock(&dso__data_open_lock); return dso->data.fd; } +void dso__data_put_fd(struct dso *dso __maybe_unused) +{ + pthread_mutex_unlock(&dso__data_open_lock); +} + bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) { u32 flag = 1 << by; @@ -443,10 +552,12 @@ bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by) } static void -dso_cache__free(struct rb_root *root) +dso_cache__free(struct dso *dso) { + struct rb_root *root = &dso->data.cache; struct rb_node *next = rb_first(root); + pthread_mutex_lock(&dso->lock); while (next) { struct dso_cache *cache; @@ -455,10 +566,12 @@ dso_cache__free(struct rb_root *root) rb_erase(&cache->rb_node, root); free(cache); } + pthread_mutex_unlock(&dso->lock); } -static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset) +static struct dso_cache *dso_cache__find(struct dso *dso, u64 offset) { + const struct rb_root *root = &dso->data.cache; struct rb_node * const *p = &root->rb_node; const struct rb_node *parent = NULL; struct dso_cache *cache; @@ -477,17 +590,20 @@ static struct dso_cache *dso_cache__find(const struct rb_root *root, u64 offset) else return cache; } + return NULL; } -static void -dso_cache__insert(struct rb_root *root, struct dso_cache *new) +static struct dso_cache * +dso_cache__insert(struct dso *dso, struct dso_cache *new) { + struct rb_root *root = &dso->data.cache; struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; struct dso_cache *cache; u64 offset = new->offset; + pthread_mutex_lock(&dso->lock); while (*p != NULL) { u64 end; @@ -499,10 +615,17 @@ dso_cache__insert(struct rb_root *root, struct dso_cache *new) p = &(*p)->rb_left; else if (offset >= end) p = &(*p)->rb_right; + else + goto out; } rb_link_node(&new->rb_node, parent, p); rb_insert_color(&new->rb_node, root); + + cache = NULL; +out: + pthread_mutex_unlock(&dso->lock); + return cache; } static ssize_t @@ -517,19 +640,33 @@ dso_cache__memcpy(struct dso_cache *cache, u64 offset, } static ssize_t -dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) +dso_cache__read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) { struct dso_cache *cache; + struct dso_cache *old; ssize_t ret; do { u64 cache_offset; - ret = -ENOMEM; - cache = zalloc(sizeof(*cache) + DSO__DATA_CACHE_SIZE); if (!cache) + return -ENOMEM; + + pthread_mutex_lock(&dso__data_open_lock); + + /* + * dso->data.fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); + + if (dso->data.fd < 0) { + ret = -errno; + dso->data.status = DSO_DATA_STATUS_ERROR; break; + } cache_offset = offset & DSO__DATA_CACHE_MASK; @@ -539,11 +676,20 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) cache->offset = cache_offset; cache->size = ret; - dso_cache__insert(&dso->data.cache, cache); + } while (0); - ret = dso_cache__memcpy(cache, offset, data, size); + pthread_mutex_unlock(&dso__data_open_lock); - } while (0); + if (ret > 0) { + old = dso_cache__insert(dso, cache); + if (old) { + /* we lose the race */ + free(cache); + cache = old; + } + + ret = dso_cache__memcpy(cache, offset, data, size); + } if (ret <= 0) free(cache); @@ -551,16 +697,16 @@ dso_cache__read(struct dso *dso, u64 offset, u8 *data, ssize_t size) return ret; } -static ssize_t dso_cache_read(struct dso *dso, u64 offset, - u8 *data, ssize_t size) +static ssize_t dso_cache_read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) { struct dso_cache *cache; - cache = dso_cache__find(&dso->data.cache, offset); + cache = dso_cache__find(dso, offset); if (cache) return dso_cache__memcpy(cache, offset, data, size); else - return dso_cache__read(dso, offset, data, size); + return dso_cache__read(dso, machine, offset, data, size); } /* @@ -568,7 +714,8 @@ static ssize_t dso_cache_read(struct dso *dso, u64 offset, * in the rb_tree. Any read to already cached data is served * by cached data. */ -static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) +static ssize_t cached_read(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) { ssize_t r = 0; u8 *p = data; @@ -576,7 +723,7 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) do { ssize_t ret; - ret = dso_cache_read(dso, offset, p, size); + ret = dso_cache_read(dso, machine, offset, p, size); if (ret < 0) return ret; @@ -596,21 +743,44 @@ static ssize_t cached_read(struct dso *dso, u64 offset, u8 *data, ssize_t size) return r; } -static int data_file_size(struct dso *dso) +static int data_file_size(struct dso *dso, struct machine *machine) { + int ret = 0; struct stat st; char sbuf[STRERR_BUFSIZE]; - if (!dso->data.file_size) { - if (fstat(dso->data.fd, &st)) { - pr_err("dso mmap failed, fstat: %s\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return -1; - } - dso->data.file_size = st.st_size; + if (dso->data.file_size) + return 0; + + if (dso->data.status == DSO_DATA_STATUS_ERROR) + return -1; + + pthread_mutex_lock(&dso__data_open_lock); + + /* + * dso->data.fd might be closed if other thread opened another + * file (dso) due to open file limit (RLIMIT_NOFILE). + */ + try_to_open_dso(dso, machine); + + if (dso->data.fd < 0) { + ret = -errno; + dso->data.status = DSO_DATA_STATUS_ERROR; + goto out; } - return 0; + if (fstat(dso->data.fd, &st) < 0) { + ret = -errno; + pr_err("dso cache fstat failed: %s\n", + strerror_r(errno, sbuf, sizeof(sbuf))); + dso->data.status = DSO_DATA_STATUS_ERROR; + goto out; + } + dso->data.file_size = st.st_size; + +out: + pthread_mutex_unlock(&dso__data_open_lock); + return ret; } /** @@ -622,23 +792,17 @@ static int data_file_size(struct dso *dso) */ off_t dso__data_size(struct dso *dso, struct machine *machine) { - int fd; - - fd = dso__data_fd(dso, machine); - if (fd < 0) - return fd; - - if (data_file_size(dso)) + if (data_file_size(dso, machine)) return -1; /* For now just estimate dso data size is close to file size */ return dso->data.file_size; } -static ssize_t data_read_offset(struct dso *dso, u64 offset, - u8 *data, ssize_t size) +static ssize_t data_read_offset(struct dso *dso, struct machine *machine, + u64 offset, u8 *data, ssize_t size) { - if (data_file_size(dso)) + if (data_file_size(dso, machine)) return -1; /* Check the offset sanity. */ @@ -648,7 +812,7 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset, if (offset + size < offset) return -1; - return cached_read(dso, offset, data, size); + return cached_read(dso, machine, offset, data, size); } /** @@ -665,10 +829,10 @@ static ssize_t data_read_offset(struct dso *dso, u64 offset, ssize_t dso__data_read_offset(struct dso *dso, struct machine *machine, u64 offset, u8 *data, ssize_t size) { - if (dso__data_fd(dso, machine) < 0) + if (dso->data.status == DSO_DATA_STATUS_ERROR) return -1; - return data_read_offset(dso, offset, data, size); + return data_read_offset(dso, machine, offset, data, size); } /** @@ -700,13 +864,13 @@ struct map *dso__new_map(const char *name) return map; } -struct dso *dso__kernel_findnew(struct machine *machine, const char *name, - const char *short_name, int dso_type) +struct dso *machine__findnew_kernel(struct machine *machine, const char *name, + const char *short_name, int dso_type) { /* * The kernel dso could be created by build_id processing. */ - struct dso *dso = __dsos__findnew(&machine->kernel_dsos, name); + struct dso *dso = machine__findnew_dso(machine, name); /* * We need to run this in all cases, since during the build_id @@ -725,8 +889,8 @@ struct dso *dso__kernel_findnew(struct machine *machine, const char *name, * Either one of the dso or name parameter must be non-NULL or the * function will not work. */ -static struct dso *dso__findlink_by_longname(struct rb_root *root, - struct dso *dso, const char *name) +static struct dso *__dso__findlink_by_longname(struct rb_root *root, + struct dso *dso, const char *name) { struct rb_node **p = &root->rb_node; struct rb_node *parent = NULL; @@ -773,10 +937,10 @@ static struct dso *dso__findlink_by_longname(struct rb_root *root, return NULL; } -static inline struct dso * -dso__find_by_longname(const struct rb_root *root, const char *name) +static inline struct dso *__dso__find_by_longname(struct rb_root *root, + const char *name) { - return dso__findlink_by_longname((struct rb_root *)root, NULL, name); + return __dso__findlink_by_longname(root, NULL, name); } void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated) @@ -884,6 +1048,8 @@ struct dso *dso__new(const char *name) RB_CLEAR_NODE(&dso->rb_node); INIT_LIST_HEAD(&dso->node); INIT_LIST_HEAD(&dso->data.open_entry); + pthread_mutex_init(&dso->lock, NULL); + atomic_set(&dso->refcnt, 1); } return dso; @@ -910,12 +1076,27 @@ void dso__delete(struct dso *dso) } dso__data_close(dso); - dso_cache__free(&dso->data.cache); + auxtrace_cache__free(dso->auxtrace_cache); + dso_cache__free(dso); dso__free_a2l(dso); zfree(&dso->symsrc_filename); + pthread_mutex_destroy(&dso->lock); free(dso); } +struct dso *dso__get(struct dso *dso) +{ + if (dso) + atomic_inc(&dso->refcnt); + return dso; +} + +void dso__put(struct dso *dso) +{ + if (dso && atomic_dec_and_test(&dso->refcnt)) + dso__delete(dso); +} + void dso__set_build_id(struct dso *dso, void *build_id) { memcpy(dso->build_id, build_id, sizeof(dso->build_id)); @@ -982,14 +1163,41 @@ bool __dsos__read_build_ids(struct list_head *head, bool with_hits) return have_build_id; } -void dsos__add(struct dsos *dsos, struct dso *dso) +void __dsos__add(struct dsos *dsos, struct dso *dso) { list_add_tail(&dso->node, &dsos->head); - dso__findlink_by_longname(&dsos->root, dso, NULL); + __dso__findlink_by_longname(&dsos->root, dso, NULL); + /* + * It is now in the linked list, grab a reference, then garbage collect + * this when needing memory, by looking at LRU dso instances in the + * list with atomic_read(&dso->refcnt) == 1, i.e. no references + * anywhere besides the one for the list, do, under a lock for the + * list: remove it from the list, then a dso__put(), that probably will + * be the last and will then call dso__delete(), end of life. + * + * That, or at the end of the 'struct machine' lifetime, when all + * 'struct dso' instances will be removed from the list, in + * dsos__exit(), if they have no other reference from some other data + * structure. + * + * E.g.: after processing a 'perf.data' file and storing references + * to objects instantiated while processing events, we will have + * references to the 'thread', 'map', 'dso' structs all from 'struct + * hist_entry' instances, but we may not need anything not referenced, + * so we might as well call machines__exit()/machines__delete() and + * garbage collect it. + */ + dso__get(dso); +} + +void dsos__add(struct dsos *dsos, struct dso *dso) +{ + pthread_rwlock_wrlock(&dsos->lock); + __dsos__add(dsos, dso); + pthread_rwlock_unlock(&dsos->lock); } -struct dso *dsos__find(const struct dsos *dsos, const char *name, - bool cmp_short) +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { struct dso *pos; @@ -999,21 +1207,42 @@ struct dso *dsos__find(const struct dsos *dsos, const char *name, return pos; return NULL; } - return dso__find_by_longname(&dsos->root, name); + return __dso__find_by_longname(&dsos->root, name); } -struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short) { - struct dso *dso = dsos__find(dsos, name, false); + struct dso *dso; + pthread_rwlock_rdlock(&dsos->lock); + dso = __dsos__find(dsos, name, cmp_short); + pthread_rwlock_unlock(&dsos->lock); + return dso; +} - if (!dso) { - dso = dso__new(name); - if (dso != NULL) { - dsos__add(dsos, dso); - dso__set_basename(dso); - } +struct dso *__dsos__addnew(struct dsos *dsos, const char *name) +{ + struct dso *dso = dso__new(name); + + if (dso != NULL) { + __dsos__add(dsos, dso); + dso__set_basename(dso); } + return dso; +} +struct dso *__dsos__findnew(struct dsos *dsos, const char *name) +{ + struct dso *dso = __dsos__find(dsos, name, false); + + return dso ? dso : __dsos__addnew(dsos, name); +} + +struct dso *dsos__findnew(struct dsos *dsos, const char *name) +{ + struct dso *dso; + pthread_rwlock_wrlock(&dsos->lock); + dso = dso__get(__dsos__findnew(dsos, name)); + pthread_rwlock_unlock(&dsos->lock); return dso; } @@ -1076,10 +1305,46 @@ size_t dso__fprintf(struct dso *dso, enum map_type type, FILE *fp) enum dso_type dso__type(struct dso *dso, struct machine *machine) { int fd; + enum dso_type type = DSO__TYPE_UNKNOWN; + + fd = dso__data_get_fd(dso, machine); + if (fd >= 0) { + type = dso__type_fd(fd); + dso__data_put_fd(dso); + } + + return type; +} + +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen) +{ + int idx, errnum = dso->load_errno; + /* + * This must have a same ordering as the enum dso_load_errno. + */ + static const char *dso_load__error_str[] = { + "Internal tools/perf/ library error", + "Invalid ELF file", + "Can not read build id", + "Mismatching build id", + "Decompression failure", + }; + + BUG_ON(buflen == 0); + + if (errnum >= 0) { + const char *err = strerror_r(errnum, buf, buflen); - fd = dso__data_fd(dso, machine); - if (fd < 0) - return DSO__TYPE_UNKNOWN; + if (err != buf) + scnprintf(buf, buflen, "%s", err); - return dso__type_fd(fd); + return 0; + } + + if (errnum < __DSO_LOAD_ERRNO__START || errnum >= __DSO_LOAD_ERRNO__END) + return -1; + + idx = errnum - __DSO_LOAD_ERRNO__START; + scnprintf(buf, buflen, "%s", dso_load__error_str[idx]); + return 0; } diff --git a/tools/perf/util/dso.h b/tools/perf/util/dso.h index ced92841ff97..2fe98bb0e95b 100644 --- a/tools/perf/util/dso.h +++ b/tools/perf/util/dso.h @@ -1,9 +1,11 @@ #ifndef __PERF_DSO #define __PERF_DSO +#include <linux/atomic.h> #include <linux/types.h> #include <linux/rbtree.h> #include <stdbool.h> +#include <pthread.h> #include <linux/types.h> #include <linux/bitops.h> #include "map.h" @@ -60,6 +62,31 @@ enum dso_type { DSO__TYPE_X32BIT, }; +enum dso_load_errno { + DSO_LOAD_ERRNO__SUCCESS = 0, + + /* + * Choose an arbitrary negative big number not to clash with standard + * errno since SUS requires the errno has distinct positive values. + * See 'Issue 6' in the link below. + * + * http://pubs.opengroup.org/onlinepubs/9699919799/basedefs/errno.h.html + */ + __DSO_LOAD_ERRNO__START = -10000, + + DSO_LOAD_ERRNO__INTERNAL_ERROR = __DSO_LOAD_ERRNO__START, + + /* for symsrc__init() */ + DSO_LOAD_ERRNO__INVALID_ELF, + DSO_LOAD_ERRNO__CANNOT_READ_BUILDID, + DSO_LOAD_ERRNO__MISMATCHING_BUILDID, + + /* for decompress_kmodule */ + DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE, + + __DSO_LOAD_ERRNO__END, +}; + #define DSO__SWAP(dso, type, val) \ ({ \ type ____r = val; \ @@ -99,9 +126,13 @@ struct dso_cache { struct dsos { struct list_head head; struct rb_root root; /* rbtree root sorted by long name */ + pthread_rwlock_t lock; }; +struct auxtrace_cache; + struct dso { + pthread_mutex_t lock; struct list_head node; struct rb_node rb_node; /* rbtree node sorted by long name */ struct rb_root symbols[MAP__NR_TYPES]; @@ -113,6 +144,7 @@ struct dso { enum dso_swap_type needs_swap; enum dso_binary_type symtab_type; enum dso_binary_type binary_type; + enum dso_load_errno load_errno; u8 adjust_symbols:1; u8 has_build_id:1; u8 has_srcline:1; @@ -130,6 +162,7 @@ struct dso { u16 long_name_len; u16 short_name_len; void *dwfl; /* DWARF debug info */ + struct auxtrace_cache *auxtrace_cache; /* dso data file */ struct { @@ -139,14 +172,15 @@ struct dso { u32 status_seen; size_t file_size; struct list_head open_entry; - u64 frame_offset; + u64 debug_frame_offset; + u64 eh_frame_hdr_offset; } data; union { /* Tool specific area */ void *priv; u64 db_id; }; - + atomic_t refcnt; char name[0]; }; @@ -173,6 +207,17 @@ void dso__set_long_name(struct dso *dso, const char *name, bool name_allocated); int dso__name_len(const struct dso *dso); +struct dso *dso__get(struct dso *dso); +void dso__put(struct dso *dso); + +static inline void __dso__zput(struct dso **dso) +{ + dso__put(*dso); + *dso = NULL; +} + +#define dso__zput(dso) __dso__zput(&dso) + bool dso__loaded(const struct dso *dso, enum map_type type); bool dso__sorted_by_name(const struct dso *dso, enum map_type type); @@ -189,14 +234,28 @@ char dso__symtab_origin(const struct dso *dso); int dso__read_binary_type_filename(const struct dso *dso, enum dso_binary_type type, char *root_dir, char *filename, size_t size); bool is_supported_compression(const char *ext); -bool is_kmodule_extension(const char *ext); -bool is_kernel_module(const char *pathname, bool *compressed); +bool is_kernel_module(const char *pathname, int cpumode); bool decompress_to_file(const char *ext, const char *filename, int output_fd); bool dso__needs_decompress(struct dso *dso); +struct kmod_path { + char *name; + char *ext; + bool comp; + bool kmod; +}; + +int __kmod_path__parse(struct kmod_path *m, const char *path, + bool alloc_name, bool alloc_ext); + +#define kmod_path__parse(__m, __p) __kmod_path__parse(__m, __p, false, false) +#define kmod_path__parse_name(__m, __p) __kmod_path__parse(__m, __p, true , false) +#define kmod_path__parse_ext(__m, __p) __kmod_path__parse(__m, __p, false, true) + /* * The dso__data_* external interface provides following functions: - * dso__data_fd + * dso__data_get_fd + * dso__data_put_fd * dso__data_close * dso__data_size * dso__data_read_offset @@ -213,8 +272,11 @@ bool dso__needs_decompress(struct dso *dso); * The current usage of the dso__data_* interface is as follows: * * Get DSO's fd: - * int fd = dso__data_fd(dso, machine); - * USE 'fd' SOMEHOW + * int fd = dso__data_get_fd(dso, machine); + * if (fd >= 0) { + * USE 'fd' SOMEHOW + * dso__data_put_fd(dso); + * } * * Read DSO's data: * n = dso__data_read_offset(dso_0, &machine, 0, buf, BUFSIZE); @@ -233,7 +295,8 @@ bool dso__needs_decompress(struct dso *dso); * * TODO */ -int dso__data_fd(struct dso *dso, struct machine *machine); +int dso__data_get_fd(struct dso *dso, struct machine *machine); +void dso__data_put_fd(struct dso *dso __maybe_unused); void dso__data_close(struct dso *dso); off_t dso__data_size(struct dso *dso, struct machine *machine); @@ -245,13 +308,16 @@ ssize_t dso__data_read_addr(struct dso *dso, struct map *map, bool dso__data_status_seen(struct dso *dso, enum dso_data_status_seen by); struct map *dso__new_map(const char *name); -struct dso *dso__kernel_findnew(struct machine *machine, const char *name, - const char *short_name, int dso_type); +struct dso *machine__findnew_kernel(struct machine *machine, const char *name, + const char *short_name, int dso_type); +void __dsos__add(struct dsos *dsos, struct dso *dso); void dsos__add(struct dsos *dsos, struct dso *dso); -struct dso *dsos__find(const struct dsos *dsos, const char *name, - bool cmp_short); +struct dso *__dsos__addnew(struct dsos *dsos, const char *name); +struct dso *__dsos__find(struct dsos *dsos, const char *name, bool cmp_short); +struct dso *dsos__find(struct dsos *dsos, const char *name, bool cmp_short); struct dso *__dsos__findnew(struct dsos *dsos, const char *name); +struct dso *dsos__findnew(struct dsos *dsos, const char *name); bool __dsos__read_build_ids(struct list_head *head, bool with_hits); size_t __dsos__fprintf_buildid(struct list_head *head, FILE *fp, @@ -279,4 +345,6 @@ void dso__free_a2l(struct dso *dso); enum dso_type dso__type(struct dso *dso, struct machine *machine); +int dso__strerror_load(struct dso *dso, char *buf, size_t buflen); + #endif /* __PERF_DSO */ diff --git a/tools/perf/util/dwarf-aux.c b/tools/perf/util/dwarf-aux.c index cc66c4049e09..57f3ef41c2bc 100644 --- a/tools/perf/util/dwarf-aux.c +++ b/tools/perf/util/dwarf-aux.c @@ -139,11 +139,27 @@ int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, bool die_compare_name(Dwarf_Die *dw_die, const char *tname) { const char *name; + name = dwarf_diename(dw_die); return name ? (strcmp(tname, name) == 0) : false; } /** + * die_match_name - Match diename and glob + * @dw_die: a DIE + * @glob: a string of target glob pattern + * + * Glob matching the name of @dw_die and @glob. Return false if matching fail. + */ +bool die_match_name(Dwarf_Die *dw_die, const char *glob) +{ + const char *name; + + name = dwarf_diename(dw_die); + return name ? strglobmatch(name, glob) : false; +} + +/** * die_get_call_lineno - Get callsite line number of inline-function instance * @in_die: a DIE of an inlined function instance * @@ -278,6 +294,21 @@ bool die_is_func_def(Dwarf_Die *dw_die) } /** + * die_is_func_instance - Ensure that this DIE is an instance of a subprogram + * @dw_die: a DIE + * + * Ensure that this DIE is an instance (which has an entry address). + * This returns true if @dw_die is a function instance. If not, you need to + * call die_walk_instances() to find actual instances. + **/ +bool die_is_func_instance(Dwarf_Die *dw_die) +{ + Dwarf_Addr tmp; + + /* Actually gcc optimizes non-inline as like as inlined */ + return !dwarf_func_inline(dw_die) && dwarf_entrypc(dw_die, &tmp) == 0; +} +/** * die_get_data_member_location - Get the data-member offset * @mb_die: a DIE of a member of a data structure * @offs: The offset of the member in the data structure @@ -402,6 +433,43 @@ struct __addr_die_search_param { Dwarf_Die *die_mem; }; +static int __die_search_func_tail_cb(Dwarf_Die *fn_die, void *data) +{ + struct __addr_die_search_param *ad = data; + Dwarf_Addr addr = 0; + + if (dwarf_tag(fn_die) == DW_TAG_subprogram && + !dwarf_highpc(fn_die, &addr) && + addr == ad->addr) { + memcpy(ad->die_mem, fn_die, sizeof(Dwarf_Die)); + return DWARF_CB_ABORT; + } + return DWARF_CB_OK; +} + +/** + * die_find_tailfunc - Search for a non-inlined function with tail call at + * given address + * @cu_die: a CU DIE which including @addr + * @addr: target address + * @die_mem: a buffer for result DIE + * + * Search for a non-inlined function DIE with tail call at @addr. Stores the + * DIE to @die_mem and returns it if found. Returns NULL if failed. + */ +Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem) +{ + struct __addr_die_search_param ad; + ad.addr = addr; + ad.die_mem = die_mem; + /* dwarf_getscopes can't find subprogram. */ + if (!dwarf_getfuncs(cu_die, __die_search_func_tail_cb, &ad, 0)) + return NULL; + else + return die_mem; +} + /* die_find callback for non-inlined function search */ static int __die_search_func_cb(Dwarf_Die *fn_die, void *data) { @@ -786,10 +854,16 @@ static int __die_find_member_cb(Dwarf_Die *die_mem, void *data) { const char *name = data; - if ((dwarf_tag(die_mem) == DW_TAG_member) && - die_compare_name(die_mem, name)) - return DIE_FIND_CB_END; - + if (dwarf_tag(die_mem) == DW_TAG_member) { + if (die_compare_name(die_mem, name)) + return DIE_FIND_CB_END; + else if (!dwarf_diename(die_mem)) { /* Unnamed structure */ + Dwarf_Die type_die, tmp_die; + if (die_get_type(die_mem, &type_die) && + die_find_member(&type_die, name, &tmp_die)) + return DIE_FIND_CB_END; + } + } return DIE_FIND_CB_SIBLING; } @@ -811,19 +885,17 @@ Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, /** * die_get_typename - Get the name of given variable DIE * @vr_die: a variable DIE - * @buf: a buffer for result type name - * @len: a max-length of @buf + * @buf: a strbuf for result type name * - * Get the name of @vr_die and stores it to @buf. Return the actual length - * of type name if succeeded. Return -E2BIG if @len is not enough long, and - * Return -ENOENT if failed to find type name. + * Get the name of @vr_die and stores it to @buf. Return 0 if succeeded. + * and Return -ENOENT if failed to find type name. * Note that the result will stores typedef name if possible, and stores * "*(function_type)" if the type is a function pointer. */ -int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) +int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf) { Dwarf_Die type; - int tag, ret, ret2; + int tag, ret; const char *tmp = ""; if (__die_get_real_type(vr_die, &type) == NULL) @@ -834,8 +906,8 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) tmp = "*"; else if (tag == DW_TAG_subroutine_type) { /* Function pointer */ - ret = snprintf(buf, len, "(function_type)"); - return (ret >= len) ? -E2BIG : ret; + strbuf_addf(buf, "(function_type)"); + return 0; } else { if (!dwarf_diename(&type)) return -ENOENT; @@ -846,39 +918,156 @@ int die_get_typename(Dwarf_Die *vr_die, char *buf, int len) else if (tag == DW_TAG_enumeration_type) tmp = "enum "; /* Write a base name */ - ret = snprintf(buf, len, "%s%s", tmp, dwarf_diename(&type)); - return (ret >= len) ? -E2BIG : ret; - } - ret = die_get_typename(&type, buf, len); - if (ret > 0) { - ret2 = snprintf(buf + ret, len - ret, "%s", tmp); - ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; + strbuf_addf(buf, "%s%s", tmp, dwarf_diename(&type)); + return 0; } + ret = die_get_typename(&type, buf); + if (ret == 0) + strbuf_addf(buf, "%s", tmp); + return ret; } /** * die_get_varname - Get the name and type of given variable DIE * @vr_die: a variable DIE - * @buf: a buffer for type and variable name - * @len: the max-length of @buf + * @buf: a strbuf for type and variable name * * Get the name and type of @vr_die and stores it in @buf as "type\tname". */ -int die_get_varname(Dwarf_Die *vr_die, char *buf, int len) +int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf) { - int ret, ret2; + int ret; - ret = die_get_typename(vr_die, buf, len); + ret = die_get_typename(vr_die, buf); if (ret < 0) { pr_debug("Failed to get type, make it unknown.\n"); - ret = snprintf(buf, len, "(unknown_type)"); + strbuf_addf(buf, "(unknown_type)"); } - if (ret > 0) { - ret2 = snprintf(buf + ret, len - ret, "\t%s", - dwarf_diename(vr_die)); - ret = (ret2 >= len - ret) ? -E2BIG : ret2 + ret; + + strbuf_addf(buf, "\t%s", dwarf_diename(vr_die)); + + return 0; +} + +/** + * die_get_var_innermost_scope - Get innermost scope range of given variable DIE + * @sp_die: a subprogram DIE + * @vr_die: a variable DIE + * @buf: a strbuf for variable byte offset range + * + * Get the innermost scope range of @vr_die and stores it in @buf as + * "@<function_name+[NN-NN,NN-NN]>". + */ +static int die_get_var_innermost_scope(Dwarf_Die *sp_die, Dwarf_Die *vr_die, + struct strbuf *buf) +{ + Dwarf_Die *scopes; + int count; + size_t offset = 0; + Dwarf_Addr base; + Dwarf_Addr start, end; + Dwarf_Addr entry; + int ret; + bool first = true; + const char *name; + + ret = dwarf_entrypc(sp_die, &entry); + if (ret) + return ret; + + name = dwarf_diename(sp_die); + if (!name) + return -ENOENT; + + count = dwarf_getscopes_die(vr_die, &scopes); + + /* (*SCOPES)[1] is the DIE for the scope containing that scope */ + if (count <= 1) { + ret = -EINVAL; + goto out; + } + + while ((offset = dwarf_ranges(&scopes[1], offset, &base, + &start, &end)) > 0) { + start -= entry; + end -= entry; + + if (first) { + strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); + first = false; + } else { + strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); + } } + + if (!first) + strbuf_addf(buf, "]>"); + +out: + free(scopes); return ret; } +/** + * die_get_var_range - Get byte offset range of given variable DIE + * @sp_die: a subprogram DIE + * @vr_die: a variable DIE + * @buf: a strbuf for type and variable name and byte offset range + * + * Get the byte offset range of @vr_die and stores it in @buf as + * "@<function_name+[NN-NN,NN-NN]>". + */ +int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, struct strbuf *buf) +{ + int ret = 0; + Dwarf_Addr base; + Dwarf_Addr start, end; + Dwarf_Addr entry; + Dwarf_Op *op; + size_t nops; + size_t offset = 0; + Dwarf_Attribute attr; + bool first = true; + const char *name; + + ret = dwarf_entrypc(sp_die, &entry); + if (ret) + return ret; + + name = dwarf_diename(sp_die); + if (!name) + return -ENOENT; + + if (dwarf_attr(vr_die, DW_AT_location, &attr) == NULL) + return -EINVAL; + + while ((offset = dwarf_getlocations( + &attr, offset, &base, + &start, &end, &op, &nops)) > 0) { + if (start == 0) { + /* Single Location Descriptions */ + ret = die_get_var_innermost_scope(sp_die, vr_die, buf); + return ret; + } + + /* Location Lists */ + start -= entry; + end -= entry; + if (first) { + strbuf_addf(buf, "@<%s+[%" PRIu64 "-%" PRIu64, + name, start, end); + first = false; + } else { + strbuf_addf(buf, ",%" PRIu64 "-%" PRIu64, + start, end); + } + } + + if (!first) + strbuf_addf(buf, "]>"); + + return ret; +} diff --git a/tools/perf/util/dwarf-aux.h b/tools/perf/util/dwarf-aux.h index b4fe90c6cb2d..c42ec366f2a7 100644 --- a/tools/perf/util/dwarf-aux.h +++ b/tools/perf/util/dwarf-aux.h @@ -41,9 +41,15 @@ extern int cu_walk_functions_at(Dwarf_Die *cu_die, Dwarf_Addr addr, /* Ensure that this DIE is a subprogram and definition (not declaration) */ extern bool die_is_func_def(Dwarf_Die *dw_die); +/* Ensure that this DIE is an instance of a subprogram */ +extern bool die_is_func_instance(Dwarf_Die *dw_die); + /* Compare diename and tname */ extern bool die_compare_name(Dwarf_Die *dw_die, const char *tname); +/* Matching diename with glob pattern */ +extern bool die_match_name(Dwarf_Die *dw_die, const char *glob); + /* Get callsite line number of inline-function instance */ extern int die_get_call_lineno(Dwarf_Die *in_die); @@ -79,6 +85,10 @@ extern Dwarf_Die *die_find_child(Dwarf_Die *rt_die, extern Dwarf_Die *die_find_realfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, Dwarf_Die *die_mem); +/* Search a non-inlined function with tail call at given address */ +Dwarf_Die *die_find_tailfunc(Dwarf_Die *cu_die, Dwarf_Addr addr, + Dwarf_Die *die_mem); + /* Search the top inlined function including given address */ extern Dwarf_Die *die_find_top_inlinefunc(Dwarf_Die *sp_die, Dwarf_Addr addr, Dwarf_Die *die_mem); @@ -111,8 +121,10 @@ extern Dwarf_Die *die_find_member(Dwarf_Die *st_die, const char *name, Dwarf_Die *die_mem); /* Get the name of given variable DIE */ -extern int die_get_typename(Dwarf_Die *vr_die, char *buf, int len); +extern int die_get_typename(Dwarf_Die *vr_die, struct strbuf *buf); /* Get the name and type of given variable DIE, stored as "type\tname" */ -extern int die_get_varname(Dwarf_Die *vr_die, char *buf, int len); +extern int die_get_varname(Dwarf_Die *vr_die, struct strbuf *buf); +extern int die_get_var_range(Dwarf_Die *sp_die, Dwarf_Die *vr_die, + struct strbuf *buf); #endif diff --git a/tools/perf/util/environment.c b/tools/perf/util/environment.c index 275b0ee345f5..7405123692f1 100644 --- a/tools/perf/util/environment.c +++ b/tools/perf/util/environment.c @@ -5,5 +5,4 @@ */ #include "cache.h" -const char *pager_program; int pager_use_color = 1; diff --git a/tools/perf/util/event.c b/tools/perf/util/event.c index 6c6d044e959a..67a977e5d0ab 100644 --- a/tools/perf/util/event.c +++ b/tools/perf/util/event.c @@ -23,12 +23,18 @@ static const char *perf_event__names[] = { [PERF_RECORD_FORK] = "FORK", [PERF_RECORD_READ] = "READ", [PERF_RECORD_SAMPLE] = "SAMPLE", + [PERF_RECORD_AUX] = "AUX", + [PERF_RECORD_ITRACE_START] = "ITRACE_START", + [PERF_RECORD_LOST_SAMPLES] = "LOST_SAMPLES", [PERF_RECORD_HEADER_ATTR] = "ATTR", [PERF_RECORD_HEADER_EVENT_TYPE] = "EVENT_TYPE", [PERF_RECORD_HEADER_TRACING_DATA] = "TRACING_DATA", [PERF_RECORD_HEADER_BUILD_ID] = "BUILD_ID", [PERF_RECORD_FINISHED_ROUND] = "FINISHED_ROUND", [PERF_RECORD_ID_INDEX] = "ID_INDEX", + [PERF_RECORD_AUXTRACE_INFO] = "AUXTRACE_INFO", + [PERF_RECORD_AUXTRACE] = "AUXTRACE", + [PERF_RECORD_AUXTRACE_ERROR] = "AUXTRACE_ERROR", }; const char *perf_event__name(unsigned int id) @@ -49,72 +55,103 @@ static struct perf_sample synth_sample = { .period = 1, }; -static pid_t perf_event__get_comm_tgid(pid_t pid, char *comm, size_t len) +/* + * Assumes that the first 4095 bytes of /proc/pid/stat contains + * the comm, tgid and ppid. + */ +static int perf_event__get_comm_ids(pid_t pid, char *comm, size_t len, + pid_t *tgid, pid_t *ppid) { char filename[PATH_MAX]; - char bf[BUFSIZ]; - FILE *fp; - size_t size = 0; - pid_t tgid = -1; + char bf[4096]; + int fd; + size_t size = 0, n; + char *nl, *name, *tgids, *ppids; + + *tgid = -1; + *ppid = -1; snprintf(filename, sizeof(filename), "/proc/%d/status", pid); - fp = fopen(filename, "r"); - if (fp == NULL) { + fd = open(filename, O_RDONLY); + if (fd < 0) { pr_debug("couldn't open %s\n", filename); - return 0; + return -1; } - while (!comm[0] || (tgid < 0)) { - if (fgets(bf, sizeof(bf), fp) == NULL) { - pr_warning("couldn't get COMM and pgid, malformed %s\n", - filename); - break; - } + n = read(fd, bf, sizeof(bf) - 1); + close(fd); + if (n <= 0) { + pr_warning("Couldn't get COMM, tigd and ppid for pid %d\n", + pid); + return -1; + } + bf[n] = '\0'; - if (memcmp(bf, "Name:", 5) == 0) { - char *name = bf + 5; - while (*name && isspace(*name)) - ++name; - size = strlen(name) - 1; - if (size >= len) - size = len - 1; - memcpy(comm, name, size); - comm[size] = '\0'; - - } else if (memcmp(bf, "Tgid:", 5) == 0) { - char *tgids = bf + 5; - while (*tgids && isspace(*tgids)) - ++tgids; - tgid = atoi(tgids); - } + name = strstr(bf, "Name:"); + tgids = strstr(bf, "Tgid:"); + ppids = strstr(bf, "PPid:"); + + if (name) { + name += 5; /* strlen("Name:") */ + + while (*name && isspace(*name)) + ++name; + + nl = strchr(name, '\n'); + if (nl) + *nl = '\0'; + + size = strlen(name); + if (size >= len) + size = len - 1; + memcpy(comm, name, size); + comm[size] = '\0'; + } else { + pr_debug("Name: string not found for pid %d\n", pid); } - fclose(fp); + if (tgids) { + tgids += 5; /* strlen("Tgid:") */ + *tgid = atoi(tgids); + } else { + pr_debug("Tgid: string not found for pid %d\n", pid); + } - return tgid; + if (ppids) { + ppids += 5; /* strlen("PPid:") */ + *ppid = atoi(ppids); + } else { + pr_debug("PPid: string not found for pid %d\n", pid); + } + + return 0; } -static pid_t perf_event__synthesize_comm(struct perf_tool *tool, - union perf_event *event, pid_t pid, - perf_event__handler_t process, - struct machine *machine) +static int perf_event__prepare_comm(union perf_event *event, pid_t pid, + struct machine *machine, + pid_t *tgid, pid_t *ppid) { size_t size; - pid_t tgid; + + *ppid = -1; memset(&event->comm, 0, sizeof(event->comm)); - if (machine__is_host(machine)) - tgid = perf_event__get_comm_tgid(pid, event->comm.comm, - sizeof(event->comm.comm)); - else - tgid = machine->pid; + if (machine__is_host(machine)) { + if (perf_event__get_comm_ids(pid, event->comm.comm, + sizeof(event->comm.comm), + tgid, ppid) != 0) { + return -1; + } + } else { + *tgid = machine->pid; + } - if (tgid < 0) - goto out; + if (*tgid < 0) + return -1; - event->comm.pid = tgid; + event->comm.pid = *tgid; event->comm.header.type = PERF_RECORD_COMM; size = strlen(event->comm.comm) + 1; @@ -125,23 +162,45 @@ static pid_t perf_event__synthesize_comm(struct perf_tool *tool, machine->id_hdr_size); event->comm.tid = pid; + return 0; +} + +static pid_t perf_event__synthesize_comm(struct perf_tool *tool, + union perf_event *event, pid_t pid, + perf_event__handler_t process, + struct machine *machine) +{ + pid_t tgid, ppid; + + if (perf_event__prepare_comm(event, pid, machine, &tgid, &ppid) != 0) + return -1; + if (process(tool, event, &synth_sample, machine) != 0) return -1; -out: return tgid; } static int perf_event__synthesize_fork(struct perf_tool *tool, - union perf_event *event, pid_t pid, - pid_t tgid, perf_event__handler_t process, + union perf_event *event, + pid_t pid, pid_t tgid, pid_t ppid, + perf_event__handler_t process, struct machine *machine) { memset(&event->fork, 0, sizeof(event->fork) + machine->id_hdr_size); - /* this is really a clone event but we use fork to synthesize it */ - event->fork.ppid = tgid; - event->fork.ptid = tgid; + /* + * for main thread set parent to ppid from status file. For other + * threads set parent pid to main thread. ie., assume main thread + * spawns all threads in a process + */ + if (tgid == pid) { + event->fork.ppid = ppid; + event->fork.ptid = ppid; + } else { + event->fork.ppid = tgid; + event->fork.ptid = tgid; + } event->fork.pid = tgid; event->fork.tid = pid; event->fork.header.type = PERF_RECORD_FORK; @@ -159,10 +218,14 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, - bool mmap_data) + bool mmap_data, + unsigned int proc_map_timeout) { char filename[PATH_MAX]; FILE *fp; + unsigned long long t; + bool truncation = false; + unsigned long long timeout = proc_map_timeout * 1000000ULL; int rc = 0; if (machine__is_default_guest(machine)) @@ -181,6 +244,7 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, } event->header.type = PERF_RECORD_MMAP2; + t = rdclock(); while (1) { char bf[BUFSIZ]; @@ -194,6 +258,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, if (fgets(bf, sizeof(bf), fp) == NULL) break; + if ((rdclock() - t) > timeout) { + pr_warning("Reading %s time out. " + "You may want to increase " + "the time limit by --proc-map-timeout\n", + filename); + truncation = true; + goto out; + } + /* ensure null termination since stack will be reused. */ strcpy(execname, ""); @@ -242,6 +315,10 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, event->header.misc |= PERF_RECORD_MISC_MMAP_DATA; } +out: + if (truncation) + event->header.misc |= PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT; + if (!strcmp(execname, "")) strcpy(execname, anonstr); @@ -260,6 +337,9 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, rc = -1; break; } + + if (truncation) + break; } fclose(fp); @@ -271,8 +351,9 @@ int perf_event__synthesize_modules(struct perf_tool *tool, struct machine *machine) { int rc = 0; - struct rb_node *nd; + struct map *pos; struct map_groups *kmaps = &machine->kmaps; + struct maps *maps = &kmaps->maps[MAP__FUNCTION]; union perf_event *event = zalloc((sizeof(event->mmap) + machine->id_hdr_size)); if (event == NULL) { @@ -292,10 +373,8 @@ int perf_event__synthesize_modules(struct perf_tool *tool, else event->header.misc = PERF_RECORD_MISC_GUEST_KERNEL; - for (nd = rb_first(&kmaps->maps[MAP__FUNCTION]); - nd; nd = rb_next(nd)) { + for (pos = maps__first(maps); pos; pos = map__next(pos)) { size_t size; - struct map *pos = rb_entry(nd, struct map, rb_node); if (pos->dso->kernel) continue; @@ -328,12 +407,15 @@ static int __event__synthesize_thread(union perf_event *comm_event, pid_t pid, int full, perf_event__handler_t process, struct perf_tool *tool, - struct machine *machine, bool mmap_data) + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout) { char filename[PATH_MAX]; DIR *tasks; struct dirent dirent, *next; - pid_t tgid; + pid_t tgid, ppid; + int rc = 0; /* special case: only send one comm event using passed in pid */ if (!full) { @@ -344,7 +426,8 @@ static int __event__synthesize_thread(union perf_event *comm_event, return -1; return perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data); + process, machine, mmap_data, + proc_map_timeout); } if (machine__is_default_guest(machine)) @@ -361,41 +444,46 @@ static int __event__synthesize_thread(union perf_event *comm_event, while (!readdir_r(tasks, &dirent, &next) && next) { char *end; - int rc = 0; pid_t _pid; _pid = strtol(dirent.d_name, &end, 10); if (*end) continue; - tgid = perf_event__synthesize_comm(tool, comm_event, _pid, - process, machine); - if (tgid == -1) - return -1; + rc = -1; + if (perf_event__prepare_comm(comm_event, _pid, machine, + &tgid, &ppid) != 0) + break; + + if (perf_event__synthesize_fork(tool, fork_event, _pid, tgid, + ppid, process, machine) < 0) + break; + /* + * Send the prepared comm event + */ + if (process(tool, comm_event, &synth_sample, machine) != 0) + break; + rc = 0; if (_pid == pid) { /* process the parent's maps too */ rc = perf_event__synthesize_mmap_events(tool, mmap_event, pid, tgid, - process, machine, mmap_data); - } else { - /* only fork the tid's map, to save time */ - rc = perf_event__synthesize_fork(tool, fork_event, _pid, tgid, - process, machine); + process, machine, mmap_data, proc_map_timeout); + if (rc) + break; } - - if (rc) - return rc; } closedir(tasks); - return 0; + return rc; } int perf_event__synthesize_thread_map(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, struct machine *machine, - bool mmap_data) + bool mmap_data, + unsigned int proc_map_timeout) { union perf_event *comm_event, *mmap_event, *fork_event; int err = -1, thread, j; @@ -416,9 +504,9 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, for (thread = 0; thread < threads->nr; ++thread) { if (__event__synthesize_thread(comm_event, mmap_event, fork_event, - threads->map[thread], 0, + thread_map__pid(threads, thread), 0, process, tool, machine, - mmap_data)) { + mmap_data, proc_map_timeout)) { err = -1; break; } @@ -427,12 +515,12 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, * comm.pid is set to thread group id by * perf_event__synthesize_comm */ - if ((int) comm_event->comm.pid != threads->map[thread]) { + if ((int) comm_event->comm.pid != thread_map__pid(threads, thread)) { bool need_leader = true; /* is thread group leader in thread_map? */ for (j = 0; j < threads->nr; ++j) { - if ((int) comm_event->comm.pid == threads->map[j]) { + if ((int) comm_event->comm.pid == thread_map__pid(threads, j)) { need_leader = false; break; } @@ -444,7 +532,7 @@ int perf_event__synthesize_thread_map(struct perf_tool *tool, fork_event, comm_event->comm.pid, 0, process, tool, machine, - mmap_data)) { + mmap_data, proc_map_timeout)) { err = -1; break; } @@ -461,7 +549,9 @@ out: int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, bool mmap_data) + struct machine *machine, + bool mmap_data, + unsigned int proc_map_timeout) { DIR *proc; char proc_path[PATH_MAX]; @@ -501,7 +591,8 @@ int perf_event__synthesize_threads(struct perf_tool *tool, * one thread couldn't be synthesized. */ __event__synthesize_thread(comm_event, mmap_event, fork_event, pid, - 1, process, tool, machine, mmap_data); + 1, process, tool, machine, mmap_data, + proc_map_timeout); } err = 0; @@ -615,7 +706,7 @@ size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp) else s = ""; - return fprintf(fp, "%s: %s:%d\n", s, event->comm.comm, event->comm.tid); + return fprintf(fp, "%s: %s:%d/%d\n", s, event->comm.comm, event->comm.pid, event->comm.tid); } int perf_event__process_comm(struct perf_tool *tool __maybe_unused, @@ -634,6 +725,30 @@ int perf_event__process_lost(struct perf_tool *tool __maybe_unused, return machine__process_lost_event(machine, event, sample); } +int perf_event__process_aux(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_aux_event(machine, event); +} + +int perf_event__process_itrace_start(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample __maybe_unused, + struct machine *machine) +{ + return machine__process_itrace_start_event(machine, event); +} + +int perf_event__process_lost_samples(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine) +{ + return machine__process_lost_samples_event(machine, event, sample); +} + size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp) { return fprintf(fp, " %d/%d: [%#" PRIx64 "(%#" PRIx64 ") @ %#" PRIx64 "]: %c %s\n", @@ -697,6 +812,21 @@ int perf_event__process_exit(struct perf_tool *tool __maybe_unused, return machine__process_exit_event(machine, event, sample); } +size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " offset: %#"PRIx64" size: %#"PRIx64" flags: %#"PRIx64" [%s%s]\n", + event->aux.aux_offset, event->aux.aux_size, + event->aux.flags, + event->aux.flags & PERF_AUX_FLAG_TRUNCATED ? "T" : "", + event->aux.flags & PERF_AUX_FLAG_OVERWRITE ? "O" : ""); +} + +size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp) +{ + return fprintf(fp, " pid: %u tid: %u\n", + event->itrace_start.pid, event->itrace_start.tid); +} + size_t perf_event__fprintf(union perf_event *event, FILE *fp) { size_t ret = fprintf(fp, "PERF_RECORD_%s", @@ -716,6 +846,12 @@ size_t perf_event__fprintf(union perf_event *event, FILE *fp) case PERF_RECORD_MMAP2: ret += perf_event__fprintf_mmap2(event, fp); break; + case PERF_RECORD_AUX: + ret += perf_event__fprintf_aux(event, fp); + break; + case PERF_RECORD_ITRACE_START: + ret += perf_event__fprintf_itrace_start(event, fp); + break; default: ret += fprintf(fp, "\n"); } @@ -819,6 +955,10 @@ void thread__find_addr_location(struct thread *thread, al->sym = NULL; } +/* + * Callers need to drop the reference to al->thread, obtained in + * machine__findnew_thread() + */ int perf_event__preprocess_sample(const union perf_event *event, struct machine *machine, struct addr_location *al, @@ -879,6 +1019,17 @@ int perf_event__preprocess_sample(const union perf_event *event, return 0; } +/* + * The preprocess_sample method will return with reference counts for the + * in it, when done using (and perhaps getting ref counts if needing to + * keep a pointer to one of those entries) it must be paired with + * addr_location__put(), so that the refcounts can be decremented. + */ +void addr_location__put(struct addr_location *al) +{ + thread__zput(al->thread); +} + bool is_bts_event(struct perf_event_attr *attr) { return attr->type == PERF_TYPE_HARDWARE && diff --git a/tools/perf/util/event.h b/tools/perf/util/event.h index c4ffe2bd0738..c53f36384b64 100644 --- a/tools/perf/util/event.h +++ b/tools/perf/util/event.h @@ -52,6 +52,11 @@ struct lost_event { u64 lost; }; +struct lost_samples_event { + struct perf_event_header header; + u64 lost; +}; + /* * PERF_FORMAT_ENABLED | PERF_FORMAT_RUNNING | PERF_FORMAT_ID */ @@ -157,6 +162,8 @@ enum { PERF_IP_FLAG_IN_TX = 1ULL << 10, }; +#define PERF_IP_FLAG_CHARS "bcrosyiABEx" + #define PERF_BRANCH_MASK (\ PERF_IP_FLAG_BRANCH |\ PERF_IP_FLAG_CALL |\ @@ -215,9 +222,17 @@ enum perf_user_event_type { /* above any possible kernel type */ PERF_RECORD_HEADER_BUILD_ID = 67, PERF_RECORD_FINISHED_ROUND = 68, PERF_RECORD_ID_INDEX = 69, + PERF_RECORD_AUXTRACE_INFO = 70, + PERF_RECORD_AUXTRACE = 71, + PERF_RECORD_AUXTRACE_ERROR = 72, PERF_RECORD_HEADER_MAX }; +enum auxtrace_error_type { + PERF_AUXTRACE_ERROR_ITRACE = 1, + PERF_AUXTRACE_ERROR_MAX +}; + /* * The kernel collects the number of events it couldn't send in a stretch and * when possible sends this number in a PERF_RECORD_LOST event. The number of @@ -225,6 +240,12 @@ enum perf_user_event_type { /* above any possible kernel type */ * total_lost tells exactly how many events the kernel in fact lost, i.e. it is * the sum of all struct lost_event.lost fields reported. * + * The kernel discards mixed up samples and sends the number in a + * PERF_RECORD_LOST_SAMPLES event. The number of lost-samples events is stored + * in .nr_events[PERF_RECORD_LOST_SAMPLES] while total_lost_samples tells + * exactly how many samples the kernel in fact dropped, i.e. it is the sum of + * all struct lost_samples_event.lost fields reported. + * * The total_period is needed because by default auto-freq is used, so * multipling nr_events[PERF_EVENT_SAMPLE] by a frequency isn't possible to get * the total number of low level events, it is necessary to to sum all struct @@ -234,6 +255,7 @@ struct events_stats { u64 total_period; u64 total_non_filtered_period; u64 total_lost; + u64 total_lost_samples; u64 total_invalid_chains; u32 nr_events[PERF_RECORD_HEADER_MAX]; u32 nr_non_filtered_samples; @@ -242,7 +264,8 @@ struct events_stats { u32 nr_invalid_chains; u32 nr_unknown_id; u32 nr_unprocessable_samples; - u32 nr_unordered_events; + u32 nr_auxtrace_errors[PERF_AUXTRACE_ERROR_MAX]; + u32 nr_proc_map_timeout; }; struct attr_event { @@ -281,6 +304,50 @@ struct id_index_event { struct id_index_entry entries[0]; }; +struct auxtrace_info_event { + struct perf_event_header header; + u32 type; + u32 reserved__; /* For alignment */ + u64 priv[]; +}; + +struct auxtrace_event { + struct perf_event_header header; + u64 size; + u64 offset; + u64 reference; + u32 idx; + u32 tid; + u32 cpu; + u32 reserved__; /* For alignment */ +}; + +#define MAX_AUXTRACE_ERROR_MSG 64 + +struct auxtrace_error_event { + struct perf_event_header header; + u32 type; + u32 code; + u32 cpu; + u32 pid; + u32 tid; + u32 reserved__; /* For alignment */ + u64 ip; + char msg[MAX_AUXTRACE_ERROR_MSG]; +}; + +struct aux_event { + struct perf_event_header header; + u64 aux_offset; + u64 aux_size; + u64 flags; +}; + +struct itrace_start_event { + struct perf_event_header header; + u32 pid, tid; +}; + union perf_event { struct perf_event_header header; struct mmap_event mmap; @@ -288,6 +355,7 @@ union perf_event { struct comm_event comm; struct fork_event fork; struct lost_event lost; + struct lost_samples_event lost_samples; struct read_event read; struct throttle_event throttle; struct sample_event sample; @@ -296,6 +364,11 @@ union perf_event { struct tracing_data_event tracing_data; struct build_id_event build_id; struct id_index_event id_index; + struct auxtrace_info_event auxtrace_info; + struct auxtrace_event auxtrace; + struct auxtrace_error_event auxtrace_error; + struct aux_event aux; + struct itrace_start_event itrace_start; }; void perf_event__print_totals(void); @@ -311,10 +384,12 @@ typedef int (*perf_event__handler_t)(struct perf_tool *tool, int perf_event__synthesize_thread_map(struct perf_tool *tool, struct thread_map *threads, perf_event__handler_t process, - struct machine *machine, bool mmap_data); + struct machine *machine, bool mmap_data, + unsigned int proc_map_timeout); int perf_event__synthesize_threads(struct perf_tool *tool, perf_event__handler_t process, - struct machine *machine, bool mmap_data); + struct machine *machine, bool mmap_data, + unsigned int proc_map_timeout); int perf_event__synthesize_kernel_mmap(struct perf_tool *tool, perf_event__handler_t process, struct machine *machine); @@ -331,6 +406,18 @@ int perf_event__process_lost(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct machine *machine); +int perf_event__process_lost_samples(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_aux(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); +int perf_event__process_itrace_start(struct perf_tool *tool, + union perf_event *event, + struct perf_sample *sample, + struct machine *machine); int perf_event__process_mmap(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -359,6 +446,8 @@ int perf_event__preprocess_sample(const union perf_event *event, struct addr_location *al, struct perf_sample *sample); +void addr_location__put(struct addr_location *al); + struct thread; bool is_bts_event(struct perf_event_attr *attr); @@ -382,12 +471,15 @@ int perf_event__synthesize_mmap_events(struct perf_tool *tool, pid_t pid, pid_t tgid, perf_event__handler_t process, struct machine *machine, - bool mmap_data); + bool mmap_data, + unsigned int proc_map_timeout); size_t perf_event__fprintf_comm(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap(union perf_event *event, FILE *fp); size_t perf_event__fprintf_mmap2(union perf_event *event, FILE *fp); size_t perf_event__fprintf_task(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_aux(union perf_event *event, FILE *fp); +size_t perf_event__fprintf_itrace_start(union perf_event *event, FILE *fp); size_t perf_event__fprintf(union perf_event *event, FILE *fp); u64 kallsyms__get_function_start(const char *kallsyms_filename, diff --git a/tools/perf/util/evlist.c b/tools/perf/util/evlist.c index 28b8ce86bf12..6cfdee68e763 100644 --- a/tools/perf/util/evlist.c +++ b/tools/perf/util/evlist.c @@ -7,7 +7,6 @@ * Released under the GPL v2. (and only v2, not any later version) */ #include "util.h" -#include <api/fs/debugfs.h> #include <api/fs/fs.h> #include <poll.h> #include "cpumap.h" @@ -115,8 +114,8 @@ void perf_evlist__delete(struct perf_evlist *evlist) { perf_evlist__munmap(evlist); perf_evlist__close(evlist); - cpu_map__delete(evlist->cpus); - thread_map__delete(evlist->threads); + cpu_map__put(evlist->cpus); + thread_map__put(evlist->threads); evlist->cpus = NULL; evlist->threads = NULL; perf_evlist__purge(evlist); @@ -298,6 +297,8 @@ void perf_evlist__disable(struct perf_evlist *evlist) PERF_EVENT_IOC_DISABLE, 0); } } + + evlist->enabled = false; } void perf_evlist__enable(struct perf_evlist *evlist) @@ -317,6 +318,13 @@ void perf_evlist__enable(struct perf_evlist *evlist) PERF_EVENT_IOC_ENABLE, 0); } } + + evlist->enabled = true; +} + +void perf_evlist__toggle_enable(struct perf_evlist *evlist) +{ + (evlist->enabled ? perf_evlist__disable : perf_evlist__enable)(evlist); } int perf_evlist__disable_event(struct perf_evlist *evlist, @@ -540,7 +548,7 @@ static void perf_evlist__set_sid_idx(struct perf_evlist *evlist, else sid->cpu = -1; if (!evsel->system_wide && evlist->threads && thread >= 0) - sid->tid = evlist->threads->map[thread]; + sid->tid = thread_map__pid(evlist->threads, thread); else sid->tid = -1; } @@ -635,11 +643,18 @@ static struct perf_evsel *perf_evlist__event2evsel(struct perf_evlist *evlist, union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) { struct perf_mmap *md = &evlist->mmap[idx]; - unsigned int head = perf_mmap__read_head(md); - unsigned int old = md->prev; + u64 head; + u64 old = md->prev; unsigned char *data = md->base + page_size; union perf_event *event = NULL; + /* + * Check if event was unmapped due to a POLLHUP/POLLERR. + */ + if (!atomic_read(&md->refcnt)) + return NULL; + + head = perf_mmap__read_head(md); if (evlist->overwrite) { /* * If we're further behind than half the buffer, there's a chance @@ -696,19 +711,19 @@ union perf_event *perf_evlist__mmap_read(struct perf_evlist *evlist, int idx) static bool perf_mmap__empty(struct perf_mmap *md) { - return perf_mmap__read_head(md) != md->prev; + return perf_mmap__read_head(md) == md->prev && !md->auxtrace_mmap.base; } static void perf_evlist__mmap_get(struct perf_evlist *evlist, int idx) { - ++evlist->mmap[idx].refcnt; + atomic_inc(&evlist->mmap[idx].refcnt); } static void perf_evlist__mmap_put(struct perf_evlist *evlist, int idx) { - BUG_ON(evlist->mmap[idx].refcnt == 0); + BUG_ON(atomic_read(&evlist->mmap[idx].refcnt) == 0); - if (--evlist->mmap[idx].refcnt == 0) + if (atomic_dec_and_test(&evlist->mmap[idx].refcnt)) __perf_evlist__munmap(evlist, idx); } @@ -717,22 +732,51 @@ void perf_evlist__mmap_consume(struct perf_evlist *evlist, int idx) struct perf_mmap *md = &evlist->mmap[idx]; if (!evlist->overwrite) { - unsigned int old = md->prev; + u64 old = md->prev; perf_mmap__write_tail(md, old); } - if (md->refcnt == 1 && perf_mmap__empty(md)) + if (atomic_read(&md->refcnt) == 1 && perf_mmap__empty(md)) perf_evlist__mmap_put(evlist, idx); } +int __weak auxtrace_mmap__mmap(struct auxtrace_mmap *mm __maybe_unused, + struct auxtrace_mmap_params *mp __maybe_unused, + void *userpg __maybe_unused, + int fd __maybe_unused) +{ + return 0; +} + +void __weak auxtrace_mmap__munmap(struct auxtrace_mmap *mm __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__init( + struct auxtrace_mmap_params *mp __maybe_unused, + off_t auxtrace_offset __maybe_unused, + unsigned int auxtrace_pages __maybe_unused, + bool auxtrace_overwrite __maybe_unused) +{ +} + +void __weak auxtrace_mmap_params__set_idx( + struct auxtrace_mmap_params *mp __maybe_unused, + struct perf_evlist *evlist __maybe_unused, + int idx __maybe_unused, + bool per_cpu __maybe_unused) +{ +} + static void __perf_evlist__munmap(struct perf_evlist *evlist, int idx) { if (evlist->mmap[idx].base != NULL) { munmap(evlist->mmap[idx].base, evlist->mmap_len); evlist->mmap[idx].base = NULL; - evlist->mmap[idx].refcnt = 0; + atomic_set(&evlist->mmap[idx].refcnt, 0); } + auxtrace_mmap__munmap(&evlist->mmap[idx].auxtrace_mmap); } void perf_evlist__munmap(struct perf_evlist *evlist) @@ -760,6 +804,7 @@ static int perf_evlist__alloc_mmap(struct perf_evlist *evlist) struct mmap_params { int prot; int mask; + struct auxtrace_mmap_params auxtrace_mp; }; static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, @@ -778,7 +823,7 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, * evlist layer can't just drop it when filtering events in * perf_evlist__filter_pollfd(). */ - evlist->mmap[idx].refcnt = 2; + atomic_set(&evlist->mmap[idx].refcnt, 2); evlist->mmap[idx].prev = 0; evlist->mmap[idx].mask = mp->mask; evlist->mmap[idx].base = mmap(NULL, evlist->mmap_len, mp->prot, @@ -790,6 +835,10 @@ static int __perf_evlist__mmap(struct perf_evlist *evlist, int idx, return -1; } + if (auxtrace_mmap__mmap(&evlist->mmap[idx].auxtrace_mmap, + &mp->auxtrace_mp, evlist->mmap[idx].base, fd)) + return -1; + return 0; } @@ -854,6 +903,9 @@ static int perf_evlist__mmap_per_cpu(struct perf_evlist *evlist, for (cpu = 0; cpu < nr_cpus; cpu++) { int output = -1; + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, cpu, + true); + for (thread = 0; thread < nr_threads; thread++) { if (perf_evlist__mmap_per_evsel(evlist, cpu, mp, cpu, thread, &output)) @@ -879,6 +931,9 @@ static int perf_evlist__mmap_per_thread(struct perf_evlist *evlist, for (thread = 0; thread < nr_threads; thread++) { int output = -1; + auxtrace_mmap_params__set_idx(&mp->auxtrace_mp, evlist, thread, + false); + if (perf_evlist__mmap_per_evsel(evlist, thread, mp, 0, thread, &output)) goto out_unmap; @@ -961,10 +1016,8 @@ static long parse_pages_arg(const char *str, unsigned long min, return pages; } -int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, - int unset __maybe_unused) +int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str) { - unsigned int *mmap_pages = opt->value; unsigned long max = UINT_MAX; long pages; @@ -981,20 +1034,32 @@ int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, return 0; } +int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, + int unset __maybe_unused) +{ + return __perf_evlist__parse_mmap_pages(opt->value, str); +} + /** - * perf_evlist__mmap - Create mmaps to receive events. + * perf_evlist__mmap_ex - Create mmaps to receive events. * @evlist: list of events * @pages: map length in pages * @overwrite: overwrite older events? + * @auxtrace_pages - auxtrace map length in pages + * @auxtrace_overwrite - overwrite older auxtrace data? * * If @overwrite is %false the user needs to signal event consumption using * perf_mmap__write_tail(). Using perf_evlist__mmap_read() does this * automatically. * + * Similarly, if @auxtrace_overwrite is %false the user needs to signal data + * consumption using auxtrace_mmap__write_tail(). + * * Return: %0 on success, negative error code otherwise. */ -int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, - bool overwrite) +int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, + bool overwrite, unsigned int auxtrace_pages, + bool auxtrace_overwrite) { struct perf_evsel *evsel; const struct cpu_map *cpus = evlist->cpus; @@ -1014,6 +1079,9 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, pr_debug("mmap size %zuB\n", evlist->mmap_len); mp.mask = evlist->mmap_len - page_size - 1; + auxtrace_mmap_params__init(&mp.auxtrace_mp, evlist->mmap_len, + auxtrace_pages, auxtrace_overwrite); + evlist__for_each(evlist, evsel) { if ((evsel->attr.read_format & PERF_FORMAT_ID) && evsel->sample_id == NULL && @@ -1027,6 +1095,37 @@ int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, return perf_evlist__mmap_per_cpu(evlist, &mp); } +int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, + bool overwrite) +{ + return perf_evlist__mmap_ex(evlist, pages, overwrite, 0, false); +} + +static int perf_evlist__propagate_maps(struct perf_evlist *evlist, + struct target *target) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + /* + * We already have cpus for evsel (via PMU sysfs) so + * keep it, if there's no target cpu list defined. + */ + if (evsel->cpus && target->cpu_list) + cpu_map__put(evsel->cpus); + + if (!evsel->cpus || target->cpu_list) + evsel->cpus = cpu_map__get(evlist->cpus); + + evsel->threads = thread_map__get(evlist->threads); + + if (!evsel->cpus || !evsel->threads) + return -ENOMEM; + } + + return 0; +} + int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) { evlist->threads = thread_map__new_str(target->pid, target->tid, @@ -1043,15 +1142,15 @@ int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target) if (evlist->cpus == NULL) goto out_delete_threads; - return 0; + return perf_evlist__propagate_maps(evlist, target); out_delete_threads: - thread_map__delete(evlist->threads); + thread_map__put(evlist->threads); evlist->threads = NULL; return -1; } -int perf_evlist__apply_filters(struct perf_evlist *evlist) +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel) { struct perf_evsel *evsel; int err = 0; @@ -1063,8 +1162,10 @@ int perf_evlist__apply_filters(struct perf_evlist *evlist) continue; err = perf_evsel__set_filter(evsel, ncpus, nthreads, evsel->filter); - if (err) + if (err) { + *err_evsel = evsel; break; + } } return err; @@ -1086,6 +1187,38 @@ int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter) return err; } +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids) +{ + char *filter; + int ret = -1; + size_t i; + + for (i = 0; i < npids; ++i) { + if (i == 0) { + if (asprintf(&filter, "common_pid != %d", pids[i]) < 0) + return -1; + } else { + char *tmp; + + if (asprintf(&tmp, "%s && common_pid != %d", filter, pids[i]) < 0) + goto out_free; + + free(filter); + filter = tmp; + } + } + + ret = perf_evlist__set_filter(evlist, filter); +out_free: + free(filter); + return ret; +} + +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid) +{ + return perf_evlist__set_filter_pids(evlist, 1, &pid); +} + bool perf_evlist__valid_sample_type(struct perf_evlist *evlist) { struct perf_evsel *pos; @@ -1245,7 +1378,7 @@ static int perf_evlist__create_syswide_maps(struct perf_evlist *evlist) out: return err; out_free_cpus: - cpu_map__delete(evlist->cpus); + cpu_map__put(evlist->cpus); evlist->cpus = NULL; goto out; } @@ -1329,7 +1462,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar * writing exactly one byte, in workload.cork_fd, usually via * perf_evlist__start_workload(). * - * For cancelling the workload without actuallin running it, + * For cancelling the workload without actually running it, * the parent will just close workload.cork_fd, without writing * anything, i.e. read will return zero and we just exit() * here. @@ -1367,7 +1500,7 @@ int perf_evlist__prepare_workload(struct perf_evlist *evlist, struct target *tar __func__, __LINE__); goto out_close_pipes; } - evlist->threads->map[0] = evlist->workload.pid; + thread_map__set_pid(evlist->threads, 0, evlist->workload.pid); } close(child_ready_pipe[1]); diff --git a/tools/perf/util/evlist.h b/tools/perf/util/evlist.h index e99a67632831..037633c1da9d 100644 --- a/tools/perf/util/evlist.h +++ b/tools/perf/util/evlist.h @@ -1,6 +1,7 @@ #ifndef __PERF_EVLIST_H #define __PERF_EVLIST_H 1 +#include <linux/atomic.h> #include <linux/list.h> #include <api/fd/array.h> #include <stdio.h> @@ -8,6 +9,7 @@ #include "event.h" #include "evsel.h" #include "util.h" +#include "auxtrace.h" #include <unistd.h> struct pollfd; @@ -26,8 +28,9 @@ struct record_opts; struct perf_mmap { void *base; int mask; - int refcnt; - unsigned int prev; + atomic_t refcnt; + u64 prev; + struct auxtrace_mmap auxtrace_mmap; char event_copy[PERF_SAMPLE_MAX_SIZE] __attribute__((aligned(8))); }; @@ -37,6 +40,8 @@ struct perf_evlist { int nr_entries; int nr_groups; int nr_mmaps; + bool overwrite; + bool enabled; size_t mmap_len; int id_pos; int is_pos; @@ -45,12 +50,12 @@ struct perf_evlist { int cork_fd; pid_t pid; } workload; - bool overwrite; struct fdarray pollfd; struct perf_mmap *mmap; struct thread_map *threads; struct cpu_map *cpus; struct perf_evsel *selected; + struct events_stats stats; }; struct perf_evsel_str_handler { @@ -77,6 +82,8 @@ int perf_evlist__add_newtp(struct perf_evlist *evlist, const char *sys, const char *name, void *handler); int perf_evlist__set_filter(struct perf_evlist *evlist, const char *filter); +int perf_evlist__set_filter_pid(struct perf_evlist *evlist, pid_t pid); +int perf_evlist__set_filter_pids(struct perf_evlist *evlist, size_t npids, pid_t *pids); struct perf_evsel * perf_evlist__find_tracepoint_by_id(struct perf_evlist *evlist, int id); @@ -119,16 +126,21 @@ int perf_evlist__start_workload(struct perf_evlist *evlist); struct option; +int __perf_evlist__parse_mmap_pages(unsigned int *mmap_pages, const char *str); int perf_evlist__parse_mmap_pages(const struct option *opt, const char *str, int unset); +int perf_evlist__mmap_ex(struct perf_evlist *evlist, unsigned int pages, + bool overwrite, unsigned int auxtrace_pages, + bool auxtrace_overwrite); int perf_evlist__mmap(struct perf_evlist *evlist, unsigned int pages, bool overwrite); void perf_evlist__munmap(struct perf_evlist *evlist); void perf_evlist__disable(struct perf_evlist *evlist); void perf_evlist__enable(struct perf_evlist *evlist); +void perf_evlist__toggle_enable(struct perf_evlist *evlist); int perf_evlist__disable_event(struct perf_evlist *evlist, struct perf_evsel *evsel); @@ -149,7 +161,7 @@ static inline void perf_evlist__set_maps(struct perf_evlist *evlist, } int perf_evlist__create_maps(struct perf_evlist *evlist, struct target *target); -int perf_evlist__apply_filters(struct perf_evlist *evlist); +int perf_evlist__apply_filters(struct perf_evlist *evlist, struct perf_evsel **err_evsel); void __perf_evlist__set_leader(struct list_head *list); void perf_evlist__set_leader(struct perf_evlist *evlist); @@ -186,16 +198,15 @@ size_t perf_evlist__fprintf(struct perf_evlist *evlist, FILE *fp); int perf_evlist__strerror_open(struct perf_evlist *evlist, int err, char *buf, size_t size); int perf_evlist__strerror_mmap(struct perf_evlist *evlist, int err, char *buf, size_t size); -static inline unsigned int perf_mmap__read_head(struct perf_mmap *mm) +static inline u64 perf_mmap__read_head(struct perf_mmap *mm) { struct perf_event_mmap_page *pc = mm->base; - int head = ACCESS_ONCE(pc->data_head); + u64 head = ACCESS_ONCE(pc->data_head); rmb(); return head; } -static inline void perf_mmap__write_tail(struct perf_mmap *md, - unsigned long tail) +static inline void perf_mmap__write_tail(struct perf_mmap *md, u64 tail) { struct perf_event_mmap_page *pc = md->base; @@ -278,5 +289,4 @@ void perf_evlist__to_front(struct perf_evlist *evlist, void perf_evlist__set_tracking_event(struct perf_evlist *evlist, struct perf_evsel *tracking_evsel); - #endif /* __PERF_EVLIST_H */ diff --git a/tools/perf/util/evsel.c b/tools/perf/util/evsel.c index ea51a90e20a0..2936b3080722 100644 --- a/tools/perf/util/evsel.c +++ b/tools/perf/util/evsel.c @@ -26,14 +26,19 @@ #include "perf_regs.h" #include "debug.h" #include "trace-event.h" +#include "stat.h" static struct { bool sample_id_all; bool exclude_guest; bool mmap2; bool cloexec; + bool clockid; + bool clockid_wrong; } perf_missing_features; +static clockid_t clockid; + static int perf_evsel__no_extra_init(struct perf_evsel *evsel __maybe_unused) { return 0; @@ -537,13 +542,30 @@ int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size) } static void -perf_evsel__config_callgraph(struct perf_evsel *evsel) +perf_evsel__config_callgraph(struct perf_evsel *evsel, + struct record_opts *opts) { bool function = perf_evsel__is_function_event(evsel); struct perf_event_attr *attr = &evsel->attr; perf_evsel__set_sample_bit(evsel, CALLCHAIN); + if (callchain_param.record_mode == CALLCHAIN_LBR) { + if (!opts->branch_stack) { + if (attr->exclude_user) { + pr_warning("LBR callstack option is only available " + "to get user callchain information. " + "Falling back to framepointers.\n"); + } else { + perf_evsel__set_sample_bit(evsel, BRANCH_STACK); + attr->branch_sample_type = PERF_SAMPLE_BRANCH_USER | + PERF_SAMPLE_BRANCH_CALL_STACK; + } + } else + pr_warning("Cannot use LBR callstack with branch stack. " + "Falling back to framepointers.\n"); + } + if (callchain_param.record_mode == CALLCHAIN_DWARF) { if (!function) { perf_evsel__set_sample_bit(evsel, REGS_USER); @@ -667,7 +689,7 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) evsel->attr.exclude_callchain_user = 1; if (callchain_param.enabled && !evsel->no_aux_samples) - perf_evsel__config_callgraph(evsel); + perf_evsel__config_callgraph(evsel, opts); if (opts->sample_intr_regs) { attr->sample_regs_intr = PERF_REGS_MASK; @@ -717,6 +739,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) if (opts->sample_transaction) perf_evsel__set_sample_bit(evsel, TRANSACTION); + if (opts->running_time) { + evsel->attr.read_format |= + PERF_FORMAT_TOTAL_TIME_ENABLED | + PERF_FORMAT_TOTAL_TIME_RUNNING; + } + /* * XXX see the function comment above * @@ -738,6 +766,12 @@ void perf_evsel__config(struct perf_evsel *evsel, struct record_opts *opts) attr->disabled = 0; attr->enable_on_exec = 0; } + + clockid = opts->clockid; + if (opts->use_clockid) { + attr->use_clockid = 1; + attr->clockid = opts->clockid; + } } static int perf_evsel__alloc_fd(struct perf_evsel *evsel, int ncpus, int nthreads) @@ -818,19 +852,6 @@ int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads) return 0; } -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus) -{ - memset(evsel->counts, 0, (sizeof(*evsel->counts) + - (ncpus * sizeof(struct perf_counts_values)))); -} - -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus) -{ - evsel->counts = zalloc((sizeof(*evsel->counts) + - (ncpus * sizeof(struct perf_counts_values)))); - return evsel->counts != NULL ? 0 : -ENOMEM; -} - static void perf_evsel__free_fd(struct perf_evsel *evsel) { xyarray__delete(evsel->fd); @@ -858,17 +879,14 @@ void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads) } } -void perf_evsel__free_counts(struct perf_evsel *evsel) -{ - zfree(&evsel->counts); -} - void perf_evsel__exit(struct perf_evsel *evsel) { assert(list_empty(&evsel->node)); perf_evsel__free_fd(evsel); perf_evsel__free_id(evsel); close_cgroup(evsel->cgrp); + cpu_map__put(evsel->cpus); + thread_map__put(evsel->threads); zfree(&evsel->group_name); zfree(&evsel->name); perf_evsel__object.fini(evsel); @@ -880,7 +898,7 @@ void perf_evsel__delete(struct perf_evsel *evsel) free(evsel); } -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count) { struct perf_counts_values tmp; @@ -892,8 +910,8 @@ void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, tmp = evsel->prev_raw_counts->aggr; evsel->prev_raw_counts->aggr = *count; } else { - tmp = evsel->prev_raw_counts->cpu[cpu]; - evsel->prev_raw_counts->cpu[cpu] = *count; + tmp = *perf_counts(evsel->prev_raw_counts, cpu, thread); + *perf_counts(evsel->prev_raw_counts, cpu, thread) = *count; } count->val = count->val - tmp.val; @@ -921,20 +939,18 @@ void perf_counts_values__scale(struct perf_counts_values *count, *pscaled = scaled; } -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb) +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count) { - struct perf_counts_values count; - - memset(&count, 0, sizeof(count)); + memset(count, 0, sizeof(*count)); if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (readn(FD(evsel, cpu, thread), &count, sizeof(count)) < 0) + if (readn(FD(evsel, cpu, thread), count, sizeof(*count)) < 0) return -errno; - return cb(evsel, cpu, thread, &count); + return 0; } int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, @@ -946,15 +962,15 @@ int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, if (FD(evsel, cpu, thread) < 0) return -EINVAL; - if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1) < 0) + if (evsel->counts == NULL && perf_evsel__alloc_counts(evsel, cpu + 1, thread + 1) < 0) return -ENOMEM; if (readn(FD(evsel, cpu, thread), &count, nv * sizeof(u64)) < 0) return -errno; - perf_evsel__compute_deltas(evsel, cpu, &count); + perf_evsel__compute_deltas(evsel, cpu, thread, &count); perf_counts_values__scale(&count, scale, NULL); - evsel->counts->cpu[cpu] = count; + *perf_counts(evsel->counts, cpu, thread) = count; return 0; } @@ -978,67 +994,127 @@ static int get_group_fd(struct perf_evsel *evsel, int cpu, int thread) return fd; } -#define __PRINT_ATTR(fmt, cast, field) \ - fprintf(fp, " %-19s "fmt"\n", #field, cast attr->field) - -#define PRINT_ATTR_U32(field) __PRINT_ATTR("%u" , , field) -#define PRINT_ATTR_X32(field) __PRINT_ATTR("%#x", , field) -#define PRINT_ATTR_U64(field) __PRINT_ATTR("%" PRIu64, (uint64_t), field) -#define PRINT_ATTR_X64(field) __PRINT_ATTR("%#"PRIx64, (uint64_t), field) - -#define PRINT_ATTR2N(name1, field1, name2, field2) \ - fprintf(fp, " %-19s %u %-19s %u\n", \ - name1, attr->field1, name2, attr->field2) - -#define PRINT_ATTR2(field1, field2) \ - PRINT_ATTR2N(#field1, field1, #field2, field2) - -static size_t perf_event_attr__fprintf(struct perf_event_attr *attr, FILE *fp) -{ - size_t ret = 0; - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); - ret += fprintf(fp, "perf_event_attr:\n"); - - ret += PRINT_ATTR_U32(type); - ret += PRINT_ATTR_U32(size); - ret += PRINT_ATTR_X64(config); - ret += PRINT_ATTR_U64(sample_period); - ret += PRINT_ATTR_U64(sample_freq); - ret += PRINT_ATTR_X64(sample_type); - ret += PRINT_ATTR_X64(read_format); - - ret += PRINT_ATTR2(disabled, inherit); - ret += PRINT_ATTR2(pinned, exclusive); - ret += PRINT_ATTR2(exclude_user, exclude_kernel); - ret += PRINT_ATTR2(exclude_hv, exclude_idle); - ret += PRINT_ATTR2(mmap, comm); - ret += PRINT_ATTR2(mmap2, comm_exec); - ret += PRINT_ATTR2(freq, inherit_stat); - ret += PRINT_ATTR2(enable_on_exec, task); - ret += PRINT_ATTR2(watermark, precise_ip); - ret += PRINT_ATTR2(mmap_data, sample_id_all); - ret += PRINT_ATTR2(exclude_host, exclude_guest); - ret += PRINT_ATTR2N("excl.callchain_kern", exclude_callchain_kernel, - "excl.callchain_user", exclude_callchain_user); - - ret += PRINT_ATTR_U32(wakeup_events); - ret += PRINT_ATTR_U32(wakeup_watermark); - ret += PRINT_ATTR_X32(bp_type); - ret += PRINT_ATTR_X64(bp_addr); - ret += PRINT_ATTR_X64(config1); - ret += PRINT_ATTR_U64(bp_len); - ret += PRINT_ATTR_X64(config2); - ret += PRINT_ATTR_X64(branch_sample_type); - ret += PRINT_ATTR_X64(sample_regs_user); - ret += PRINT_ATTR_U32(sample_stack_user); - ret += PRINT_ATTR_X64(sample_regs_intr); - - ret += fprintf(fp, "%.60s\n", graph_dotted_line); +struct bit_names { + int bit; + const char *name; +}; + +static void __p_bits(char *buf, size_t size, u64 value, struct bit_names *bits) +{ + bool first_bit = true; + int i = 0; + + do { + if (value & bits[i].bit) { + buf += scnprintf(buf, size, "%s%s", first_bit ? "" : "|", bits[i].name); + first_bit = false; + } + } while (bits[++i].name != NULL); +} + +static void __p_sample_type(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_SAMPLE_##n, #n } + struct bit_names bits[] = { + bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), + bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), + bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), + bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), + bit_name(IDENTIFIER), bit_name(REGS_INTR), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +static void __p_read_format(char *buf, size_t size, u64 value) +{ +#define bit_name(n) { PERF_FORMAT_##n, #n } + struct bit_names bits[] = { + bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), + bit_name(ID), bit_name(GROUP), + { .name = NULL, } + }; +#undef bit_name + __p_bits(buf, size, value, bits); +} + +#define BUF_SIZE 1024 + +#define p_hex(val) snprintf(buf, BUF_SIZE, "%#"PRIx64, (uint64_t)(val)) +#define p_unsigned(val) snprintf(buf, BUF_SIZE, "%"PRIu64, (uint64_t)(val)) +#define p_signed(val) snprintf(buf, BUF_SIZE, "%"PRId64, (int64_t)(val)) +#define p_sample_type(val) __p_sample_type(buf, BUF_SIZE, val) +#define p_read_format(val) __p_read_format(buf, BUF_SIZE, val) + +#define PRINT_ATTRn(_n, _f, _p) \ +do { \ + if (attr->_f) { \ + _p(attr->_f); \ + ret += attr__fprintf(fp, _n, buf, priv);\ + } \ +} while (0) + +#define PRINT_ATTRf(_f, _p) PRINT_ATTRn(#_f, _f, _p) + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv) +{ + char buf[BUF_SIZE]; + int ret = 0; + + PRINT_ATTRf(type, p_unsigned); + PRINT_ATTRf(size, p_unsigned); + PRINT_ATTRf(config, p_hex); + PRINT_ATTRn("{ sample_period, sample_freq }", sample_period, p_unsigned); + PRINT_ATTRf(sample_type, p_sample_type); + PRINT_ATTRf(read_format, p_read_format); + + PRINT_ATTRf(disabled, p_unsigned); + PRINT_ATTRf(inherit, p_unsigned); + PRINT_ATTRf(pinned, p_unsigned); + PRINT_ATTRf(exclusive, p_unsigned); + PRINT_ATTRf(exclude_user, p_unsigned); + PRINT_ATTRf(exclude_kernel, p_unsigned); + PRINT_ATTRf(exclude_hv, p_unsigned); + PRINT_ATTRf(exclude_idle, p_unsigned); + PRINT_ATTRf(mmap, p_unsigned); + PRINT_ATTRf(comm, p_unsigned); + PRINT_ATTRf(freq, p_unsigned); + PRINT_ATTRf(inherit_stat, p_unsigned); + PRINT_ATTRf(enable_on_exec, p_unsigned); + PRINT_ATTRf(task, p_unsigned); + PRINT_ATTRf(watermark, p_unsigned); + PRINT_ATTRf(precise_ip, p_unsigned); + PRINT_ATTRf(mmap_data, p_unsigned); + PRINT_ATTRf(sample_id_all, p_unsigned); + PRINT_ATTRf(exclude_host, p_unsigned); + PRINT_ATTRf(exclude_guest, p_unsigned); + PRINT_ATTRf(exclude_callchain_kernel, p_unsigned); + PRINT_ATTRf(exclude_callchain_user, p_unsigned); + PRINT_ATTRf(mmap2, p_unsigned); + PRINT_ATTRf(comm_exec, p_unsigned); + PRINT_ATTRf(use_clockid, p_unsigned); + + PRINT_ATTRn("{ wakeup_events, wakeup_watermark }", wakeup_events, p_unsigned); + PRINT_ATTRf(bp_type, p_unsigned); + PRINT_ATTRn("{ bp_addr, config1 }", bp_addr, p_hex); + PRINT_ATTRn("{ bp_len, config2 }", bp_len, p_hex); + PRINT_ATTRf(sample_regs_user, p_hex); + PRINT_ATTRf(sample_stack_user, p_unsigned); + PRINT_ATTRf(clockid, p_signed); + PRINT_ATTRf(sample_regs_intr, p_hex); + PRINT_ATTRf(aux_watermark, p_unsigned); return ret; } +static int __open_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, " %-32s %s\n", name, val); +} + static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, struct thread_map *threads) { @@ -1062,6 +1138,12 @@ static int __perf_evsel__open(struct perf_evsel *evsel, struct cpu_map *cpus, } fallback_missing_features: + if (perf_missing_features.clockid_wrong) + evsel->attr.clockid = CLOCK_MONOTONIC; /* should always work */ + if (perf_missing_features.clockid) { + evsel->attr.use_clockid = 0; + evsel->attr.clockid = 0; + } if (perf_missing_features.cloexec) flags &= ~(unsigned long)PERF_FLAG_FD_CLOEXEC; if (perf_missing_features.mmap2) @@ -1072,8 +1154,12 @@ retry_sample_id: if (perf_missing_features.sample_id_all) evsel->attr.sample_id_all = 0; - if (verbose >= 2) - perf_event_attr__fprintf(&evsel->attr, stderr); + if (verbose >= 2) { + fprintf(stderr, "%.60s\n", graph_dotted_line); + fprintf(stderr, "perf_event_attr:\n"); + perf_event_attr__fprintf(stderr, &evsel->attr, __open_attr__fprintf, NULL); + fprintf(stderr, "%.60s\n", graph_dotted_line); + } for (cpu = 0; cpu < cpus->nr; cpu++) { @@ -1081,7 +1167,7 @@ retry_sample_id: int group_fd; if (!evsel->cgrp && !evsel->system_wide) - pid = threads->map[thread]; + pid = thread_map__pid(threads, thread); group_fd = get_group_fd(evsel, cpu, thread); retry_open: @@ -1099,6 +1185,17 @@ retry_open: goto try_fallback; } set_rlimit = NO_CHANGE; + + /* + * If we succeeded but had to kill clockid, fail and + * have perf_evsel__open_strerror() print us a nice + * error. + */ + if (perf_missing_features.clockid || + perf_missing_features.clockid_wrong) { + err = -EINVAL; + goto out_close; + } } } @@ -1132,7 +1229,17 @@ try_fallback: if (err != -EINVAL || cpu > 0 || thread > 0) goto out_close; - if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { + /* + * Must probe features in the order they were added to the + * perf_event_attr interface. + */ + if (!perf_missing_features.clockid_wrong && evsel->attr.use_clockid) { + perf_missing_features.clockid_wrong = true; + goto fallback_missing_features; + } else if (!perf_missing_features.clockid && evsel->attr.use_clockid) { + perf_missing_features.clockid = true; + goto fallback_missing_features; + } else if (!perf_missing_features.cloexec && (flags & PERF_FLAG_FD_CLOEXEC)) { perf_missing_features.cloexec = true; goto fallback_missing_features; } else if (!perf_missing_features.mmap2 && evsel->attr.mmap2) { @@ -1892,7 +1999,7 @@ u64 perf_evsel__intval(struct perf_evsel *evsel, struct perf_sample *sample, value = *(u32 *)ptr; break; case 8: - value = *(u64 *)ptr; + memcpy(&value, ptr, sizeof(u64)); break; default: return 0; @@ -1933,62 +2040,9 @@ static int comma_fprintf(FILE *fp, bool *first, const char *fmt, ...) return ret; } -static int __if_fprintf(FILE *fp, bool *first, const char *field, u64 value) -{ - if (value == 0) - return 0; - - return comma_fprintf(fp, first, " %s: %" PRIu64, field, value); -} - -#define if_print(field) printed += __if_fprintf(fp, &first, #field, evsel->attr.field) - -struct bit_names { - int bit; - const char *name; -}; - -static int bits__fprintf(FILE *fp, const char *field, u64 value, - struct bit_names *bits, bool *first) -{ - int i = 0, printed = comma_fprintf(fp, first, " %s: ", field); - bool first_bit = true; - - do { - if (value & bits[i].bit) { - printed += fprintf(fp, "%s%s", first_bit ? "" : "|", bits[i].name); - first_bit = false; - } - } while (bits[++i].name != NULL); - - return printed; -} - -static int sample_type__fprintf(FILE *fp, bool *first, u64 value) +static int __print_attr__fprintf(FILE *fp, const char *name, const char *val, void *priv) { -#define bit_name(n) { PERF_SAMPLE_##n, #n } - struct bit_names bits[] = { - bit_name(IP), bit_name(TID), bit_name(TIME), bit_name(ADDR), - bit_name(READ), bit_name(CALLCHAIN), bit_name(ID), bit_name(CPU), - bit_name(PERIOD), bit_name(STREAM_ID), bit_name(RAW), - bit_name(BRANCH_STACK), bit_name(REGS_USER), bit_name(STACK_USER), - bit_name(IDENTIFIER), bit_name(REGS_INTR), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "sample_type", value, bits, first); -} - -static int read_format__fprintf(FILE *fp, bool *first, u64 value) -{ -#define bit_name(n) { PERF_FORMAT_##n, #n } - struct bit_names bits[] = { - bit_name(TOTAL_TIME_ENABLED), bit_name(TOTAL_TIME_RUNNING), - bit_name(ID), bit_name(GROUP), - { .name = NULL, } - }; -#undef bit_name - return bits__fprintf(fp, "read_format", value, bits, first); + return comma_fprintf(fp, (bool *)priv, " %s: %s", name, val); } int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -2017,47 +2071,13 @@ int perf_evsel__fprintf(struct perf_evsel *evsel, printed += fprintf(fp, "%s", perf_evsel__name(evsel)); - if (details->verbose || details->freq) { + if (details->verbose) { + printed += perf_event_attr__fprintf(fp, &evsel->attr, + __print_attr__fprintf, &first); + } else if (details->freq) { printed += comma_fprintf(fp, &first, " sample_freq=%" PRIu64, (u64)evsel->attr.sample_freq); } - - if (details->verbose) { - if_print(type); - if_print(config); - if_print(config1); - if_print(config2); - if_print(size); - printed += sample_type__fprintf(fp, &first, evsel->attr.sample_type); - if (evsel->attr.read_format) - printed += read_format__fprintf(fp, &first, evsel->attr.read_format); - if_print(disabled); - if_print(inherit); - if_print(pinned); - if_print(exclusive); - if_print(exclude_user); - if_print(exclude_kernel); - if_print(exclude_hv); - if_print(exclude_idle); - if_print(mmap); - if_print(mmap2); - if_print(comm); - if_print(comm_exec); - if_print(freq); - if_print(inherit_stat); - if_print(enable_on_exec); - if_print(task); - if_print(watermark); - if_print(precise_ip); - if_print(mmap_data); - if_print(sample_id_all); - if_print(exclude_host); - if_print(exclude_guest); - if_print(__reserved_1); - if_print(wakeup_events); - if_print(bp_type); - if_print(branch_sample_type); - } out: fputc('\n', fp); return ++printed; @@ -2112,7 +2132,9 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, case EMFILE: return scnprintf(msg, size, "%s", "Too many events are opened.\n" - "Try again after reducing the number of events."); + "Probably the maximum number of open file descriptors has been reached.\n" + "Hint: Try again after reducing the number of events.\n" + "Hint: Try increasing the limit with 'ulimit -n <limit>'"); case ENODEV: if (target->cpu_list) return scnprintf(msg, size, "%s", @@ -2135,6 +2157,12 @@ int perf_evsel__open_strerror(struct perf_evsel *evsel, struct target *target, "The PMU counters are busy/taken by another profiler.\n" "We found oprofile daemon running, please stop it and try again."); break; + case EINVAL: + if (perf_missing_features.clockid) + return scnprintf(msg, size, "clockid feature not supported."); + if (perf_missing_features.clockid_wrong) + return scnprintf(msg, size, "wrong clockid (%d).", clockid); + break; default: break; } diff --git a/tools/perf/util/evsel.h b/tools/perf/util/evsel.h index 38622747d130..4a7ed5656cf0 100644 --- a/tools/perf/util/evsel.h +++ b/tools/perf/util/evsel.h @@ -8,23 +8,8 @@ #include <linux/types.h> #include "xyarray.h" #include "symbol.h" - -struct perf_counts_values { - union { - struct { - u64 val; - u64 ena; - u64 run; - }; - u64 values[3]; - }; -}; - -struct perf_counts { - s8 scaled; - struct perf_counts_values aggr; - struct perf_counts_values cpu[]; -}; +#include "cpumap.h" +#include "stat.h" struct perf_evsel; @@ -73,7 +58,6 @@ struct perf_evsel { char *name; double scale; const char *unit; - bool snapshot; struct event_format *tp_format; union { void *priv; @@ -83,9 +67,11 @@ struct perf_evsel { struct cgroup_sel *cgrp; void *handler; struct cpu_map *cpus; + struct thread_map *threads; unsigned int sample_size; int id_pos; int is_pos; + bool snapshot; bool supported; bool needs_swap; bool no_aux_samples; @@ -93,11 +79,11 @@ struct perf_evsel { bool system_wide; bool tracking; bool per_pkg; - unsigned long *per_pkg_mask; /* parse modifier helper */ int exclude_GH; int nr_members; int sample_read; + unsigned long *per_pkg_mask; struct perf_evsel *leader; char *group_name; }; @@ -113,10 +99,20 @@ struct thread_map; struct perf_evlist; struct record_opts; +static inline struct cpu_map *perf_evsel__cpus(struct perf_evsel *evsel) +{ + return evsel->cpus; +} + +static inline int perf_evsel__nr_cpus(struct perf_evsel *evsel) +{ + return perf_evsel__cpus(evsel)->nr; +} + void perf_counts_values__scale(struct perf_counts_values *count, bool scale, s8 *pscaled); -void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, +void perf_evsel__compute_deltas(struct perf_evsel *evsel, int cpu, int thread, struct perf_counts_values *count); int perf_evsel__object_config(size_t object_size, @@ -170,9 +166,6 @@ const char *perf_evsel__group_name(struct perf_evsel *evsel); int perf_evsel__group_desc(struct perf_evsel *evsel, char *buf, size_t size); int perf_evsel__alloc_id(struct perf_evsel *evsel, int ncpus, int nthreads); -int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus); -void perf_evsel__reset_counts(struct perf_evsel *evsel, int ncpus); -void perf_evsel__free_counts(struct perf_evsel *evsel); void perf_evsel__close_fd(struct perf_evsel *evsel, int ncpus, int nthreads); void __perf_evsel__set_sample_bit(struct perf_evsel *evsel, @@ -236,12 +229,8 @@ static inline bool perf_evsel__match2(struct perf_evsel *e1, (a)->attr.type == (b)->attr.type && \ (a)->attr.config == (b)->attr.config) -typedef int (perf_evsel__read_cb_t)(struct perf_evsel *evsel, - int cpu, int thread, - struct perf_counts_values *count); - -int perf_evsel__read_cb(struct perf_evsel *evsel, int cpu, int thread, - perf_evsel__read_cb_t cb); +int perf_evsel__read(struct perf_evsel *evsel, int cpu, int thread, + struct perf_counts_values *count); int __perf_evsel__read_on_cpu(struct perf_evsel *evsel, int cpu, int thread, bool scale); @@ -335,6 +324,7 @@ struct perf_attr_details { bool freq; bool verbose; bool event_group; + bool force; }; int perf_evsel__fprintf(struct perf_evsel *evsel, @@ -355,4 +345,14 @@ for ((_evsel) = list_entry((_leader)->node.next, struct perf_evsel, node); \ (_evsel) && (_evsel)->leader == (_leader); \ (_evsel) = list_entry((_evsel)->node.next, struct perf_evsel, node)) +static inline bool has_branch_callstack(struct perf_evsel *evsel) +{ + return evsel->attr.branch_sample_type & PERF_SAMPLE_BRANCH_CALL_STACK; +} + +typedef int (*attr__fprintf_f)(FILE *, const char *, const char *, void *); + +int perf_event_attr__fprintf(FILE *fp, struct perf_event_attr *attr, + attr__fprintf_f attr__fprintf, void *priv); + #endif /* __PERF_EVSEL_H */ diff --git a/tools/perf/util/header.c b/tools/perf/util/header.c index 1f407f7352a7..03ace57a800c 100644 --- a/tools/perf/util/header.c +++ b/tools/perf/util/header.c @@ -869,6 +869,20 @@ static int write_branch_stack(int fd __maybe_unused, return 0; } +static int write_auxtrace(int fd, struct perf_header *h, + struct perf_evlist *evlist __maybe_unused) +{ + struct perf_session *session; + int err; + + session = container_of(h, struct perf_session, header); + + err = auxtrace_index__write(fd, &session->auxtrace_index); + if (err < 0) + pr_err("Failed to write auxtrace index\n"); + return err; +} + static void print_hostname(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1049,12 +1063,17 @@ out: free(buf); return events; error: - if (events) - free_event_desc(events); + free_event_desc(events); events = NULL; goto out; } +static int __desc_attr__fprintf(FILE *fp, const char *name, const char *val, + void *priv __attribute__((unused))) +{ + return fprintf(fp, ", %s = %s", name, val); +} + static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) { struct perf_evsel *evsel, *events = read_event_desc(ph, fd); @@ -1069,26 +1088,6 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) for (evsel = events; evsel->attr.size; evsel++) { fprintf(fp, "# event : name = %s, ", evsel->name); - fprintf(fp, "type = %d, config = 0x%"PRIx64 - ", config1 = 0x%"PRIx64", config2 = 0x%"PRIx64, - evsel->attr.type, - (u64)evsel->attr.config, - (u64)evsel->attr.config1, - (u64)evsel->attr.config2); - - fprintf(fp, ", excl_usr = %d, excl_kern = %d", - evsel->attr.exclude_user, - evsel->attr.exclude_kernel); - - fprintf(fp, ", excl_host = %d, excl_guest = %d", - evsel->attr.exclude_host, - evsel->attr.exclude_guest); - - fprintf(fp, ", precise_ip = %d", evsel->attr.precise_ip); - - fprintf(fp, ", attr_mmap2 = %d", evsel->attr.mmap2); - fprintf(fp, ", attr_mmap = %d", evsel->attr.mmap); - fprintf(fp, ", attr_mmap_data = %d", evsel->attr.mmap_data); if (evsel->ids) { fprintf(fp, ", id = {"); for (j = 0, id = evsel->id; j < evsel->ids; j++, id++) { @@ -1099,6 +1098,8 @@ static void print_event_desc(struct perf_header *ph, int fd, FILE *fp) fprintf(fp, " }"); } + perf_event_attr__fprintf(fp, &evsel->attr, __desc_attr__fprintf, NULL); + fputc('\n', fp); } @@ -1163,6 +1164,12 @@ static void print_branch_stack(struct perf_header *ph __maybe_unused, fprintf(fp, "# contains samples with branch stack\n"); } +static void print_auxtrace(struct perf_header *ph __maybe_unused, + int fd __maybe_unused, FILE *fp) +{ + fprintf(fp, "# contains AUX area data (e.g. instruction trace)\n"); +} + static void print_pmu_mappings(struct perf_header *ph, int fd __maybe_unused, FILE *fp) { @@ -1230,9 +1237,8 @@ static int __event_process_build_id(struct build_id_event *bev, struct perf_session *session) { int err = -1; - struct dsos *dsos; struct machine *machine; - u16 misc; + u16 cpumode; struct dso *dso; enum dso_kernel_type dso_type; @@ -1240,39 +1246,37 @@ static int __event_process_build_id(struct build_id_event *bev, if (!machine) goto out; - misc = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; + cpumode = bev->header.misc & PERF_RECORD_MISC_CPUMODE_MASK; - switch (misc) { + switch (cpumode) { case PERF_RECORD_MISC_KERNEL: dso_type = DSO_TYPE_KERNEL; - dsos = &machine->kernel_dsos; break; case PERF_RECORD_MISC_GUEST_KERNEL: dso_type = DSO_TYPE_GUEST_KERNEL; - dsos = &machine->kernel_dsos; break; case PERF_RECORD_MISC_USER: case PERF_RECORD_MISC_GUEST_USER: dso_type = DSO_TYPE_USER; - dsos = &machine->user_dsos; break; default: goto out; } - dso = __dsos__findnew(dsos, filename); + dso = machine__findnew_dso(machine, filename); if (dso != NULL) { char sbuild_id[BUILD_ID_SIZE * 2 + 1]; dso__set_build_id(dso, &bev->build_id); - if (!is_kernel_module(filename, NULL)) + if (!is_kernel_module(filename, cpumode)) dso->kernel = dso_type; build_id__sprintf(dso->build_id, sizeof(dso->build_id), sbuild_id); pr_debug("build id event received for %s: %s\n", dso->long_name, sbuild_id); + dso__put(dso); } err = 0; @@ -1833,6 +1837,22 @@ out_free: return ret; } +static int process_auxtrace(struct perf_file_section *section, + struct perf_header *ph, int fd, + void *data __maybe_unused) +{ + struct perf_session *session; + int err; + + session = container_of(ph, struct perf_session, header); + + err = auxtrace_index__process(fd, section->size, session, + ph->needs_swap); + if (err < 0) + pr_err("Failed to process auxtrace index\n"); + return err; +} + struct feature_ops { int (*write)(int fd, struct perf_header *h, struct perf_evlist *evlist); void (*print)(struct perf_header *h, int fd, FILE *fp); @@ -1873,6 +1893,7 @@ static const struct feature_ops feat_ops[HEADER_LAST_FEATURE] = { FEAT_OPA(HEADER_BRANCH_STACK, branch_stack), FEAT_OPP(HEADER_PMU_MAPPINGS, pmu_mappings), FEAT_OPP(HEADER_GROUP_DESC, group_desc), + FEAT_OPP(HEADER_AUXTRACE, auxtrace), }; struct header_print_data { @@ -2516,8 +2537,11 @@ int perf_session__read_header(struct perf_session *session) if (read_attr(fd, header, &f_attr) < 0) goto out_errno; - if (header->needs_swap) + if (header->needs_swap) { + f_attr.ids.size = bswap_64(f_attr.ids.size); + f_attr.ids.offset = bswap_64(f_attr.ids.offset); perf_event__attr_swap(&f_attr.attr); + } tmp = lseek(fd, 0, SEEK_CUR); evsel = perf_evsel__new(&f_attr.attr); diff --git a/tools/perf/util/header.h b/tools/perf/util/header.h index 3bb90ac172a1..d4d57962c591 100644 --- a/tools/perf/util/header.h +++ b/tools/perf/util/header.h @@ -30,6 +30,7 @@ enum { HEADER_BRANCH_STACK, HEADER_PMU_MAPPINGS, HEADER_GROUP_DESC, + HEADER_AUXTRACE, HEADER_LAST_FEATURE, HEADER_FEAT_BITS = 256, }; diff --git a/tools/perf/util/hist.c b/tools/perf/util/hist.c index 70b48a65064c..6f28d53d4e46 100644 --- a/tools/perf/util/hist.c +++ b/tools/perf/util/hist.c @@ -263,15 +263,9 @@ void hists__decay_entries(struct hists *hists, bool zap_user, bool zap_kernel) while (next) { n = rb_entry(next, struct hist_entry, rb_node); next = rb_next(&n->rb_node); - /* - * We may be annotating this, for instance, so keep it here in - * case some it gets new samples, we'll eventually free it when - * the user stops browsing and it agains gets fully decayed. - */ if (((zap_user && n->level == '.') || (zap_kernel && n->level != '.') || - hists__decay_entry(hists, n)) && - !n->used) { + hists__decay_entry(hists, n))) { hists__delete_entry(hists, n); } } @@ -319,8 +313,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, memset(&he->stat, 0, sizeof(he->stat)); } - if (he->ms.map) - he->ms.map->referenced = true; + map__get(he->ms.map); if (he->branch_info) { /* @@ -330,6 +323,7 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, */ he->branch_info = malloc(sizeof(*he->branch_info)); if (he->branch_info == NULL) { + map__zput(he->ms.map); free(he->stat_acc); free(he); return NULL; @@ -338,23 +332,20 @@ static struct hist_entry *hist_entry__new(struct hist_entry *template, memcpy(he->branch_info, template->branch_info, sizeof(*he->branch_info)); - if (he->branch_info->from.map) - he->branch_info->from.map->referenced = true; - if (he->branch_info->to.map) - he->branch_info->to.map->referenced = true; + map__get(he->branch_info->from.map); + map__get(he->branch_info->to.map); } if (he->mem_info) { - if (he->mem_info->iaddr.map) - he->mem_info->iaddr.map->referenced = true; - if (he->mem_info->daddr.map) - he->mem_info->daddr.map->referenced = true; + map__get(he->mem_info->iaddr.map); + map__get(he->mem_info->daddr.map); } if (symbol_conf.use_callchain) callchain_init(he->callchain); INIT_LIST_HEAD(&he->pairs.node); + thread__get(he->thread); } return he; @@ -367,10 +358,10 @@ static u8 symbol__parent_filter(const struct symbol *parent) return 0; } -static struct hist_entry *add_hist_entry(struct hists *hists, - struct hist_entry *entry, - struct addr_location *al, - bool sample_self) +static struct hist_entry *hists__findnew_entry(struct hists *hists, + struct hist_entry *entry, + struct addr_location *al, + bool sample_self) { struct rb_node **p; struct rb_node *parent = NULL; @@ -412,9 +403,8 @@ static struct hist_entry *add_hist_entry(struct hists *hists, * the history counter to increment. */ if (he->ms.map != entry->ms.map) { - he->ms.map = entry->ms.map; - if (he->ms.map) - he->ms.map->referenced = true; + map__put(he->ms.map); + he->ms.map = map__get(entry->ms.map); } goto out; } @@ -473,7 +463,7 @@ struct hist_entry *__hists__add_entry(struct hists *hists, .transaction = transaction, }; - return add_hist_entry(hists, &entry, al, sample_self); + return hists__findnew_entry(hists, &entry, al, sample_self); } static int @@ -553,9 +543,9 @@ iter_finish_mem_entry(struct hist_entry_iter *iter, out: /* - * We don't need to free iter->priv (mem_info) here since - * the mem info was either already freed in add_hist_entry() or - * passed to a new hist entry by hist_entry__new(). + * We don't need to free iter->priv (mem_info) here since the mem info + * was either already freed in hists__findnew_entry() or passed to a + * new hist entry by hist_entry__new(). */ iter->priv = NULL; @@ -856,19 +846,15 @@ const struct hist_iter_ops hist_iter_cumulative = { }; int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, - struct perf_evsel *evsel, struct perf_sample *sample, int max_stack_depth, void *arg) { int err, err2; - err = sample__resolve_callchain(sample, &iter->parent, evsel, al, - max_stack_depth); + err = sample__resolve_callchain(iter->sample, &iter->parent, + iter->evsel, al, max_stack_depth); if (err) return err; - iter->evsel = evsel; - iter->sample = sample; - err = iter->ops->prepare_entry(iter, al); if (err) goto out; @@ -941,8 +927,21 @@ hist_entry__collapse(struct hist_entry *left, struct hist_entry *right) void hist_entry__delete(struct hist_entry *he) { - zfree(&he->branch_info); - zfree(&he->mem_info); + thread__zput(he->thread); + map__zput(he->ms.map); + + if (he->branch_info) { + map__zput(he->branch_info->from.map); + map__zput(he->branch_info->to.map); + zfree(&he->branch_info); + } + + if (he->mem_info) { + map__zput(he->mem_info->iaddr.map); + map__zput(he->mem_info->daddr.map); + zfree(&he->mem_info); + } + zfree(&he->stat_acc); free_srcline(he->srcline); free_callchain(he->callchain); @@ -1167,8 +1166,9 @@ static void hists__remove_entry_filter(struct hists *hists, struct hist_entry *h return; /* force fold unfiltered entry for simplicity */ - h->ms.unfolded = false; + h->unfolded = false; h->row_offset = 0; + h->nr_rows = 0; hists->stats.nr_non_filtered_samples += h->stat.nr_events; diff --git a/tools/perf/util/hist.h b/tools/perf/util/hist.h index 2b690d028907..5ed8d9c22981 100644 --- a/tools/perf/util/hist.h +++ b/tools/perf/util/hist.h @@ -60,7 +60,7 @@ struct hists { struct rb_root entries_collapsed; u64 nr_entries; u64 nr_non_filtered_entries; - const struct thread *thread_filter; + struct thread *thread_filter; const struct dso *dso_filter; const char *uid_filter_str; const char *symbol_filter_str; @@ -111,7 +111,6 @@ struct hist_entry *__hists__add_entry(struct hists *hists, u64 weight, u64 transaction, bool sample_self); int hist_entry_iter__add(struct hist_entry_iter *iter, struct addr_location *al, - struct perf_evsel *evsel, struct perf_sample *sample, int max_stack_depth, void *arg); int64_t hist_entry__cmp(struct hist_entry *left, struct hist_entry *right); @@ -303,6 +302,9 @@ struct hist_browser_timer { #ifdef HAVE_SLANG_SUPPORT #include "../ui/keysyms.h" +int map_symbol__tui_annotate(struct map_symbol *ms, struct perf_evsel *evsel, + struct hist_browser_timer *hbt); + int hist_entry__tui_annotate(struct hist_entry *he, struct perf_evsel *evsel, struct hist_browser_timer *hbt); @@ -321,6 +323,12 @@ int perf_evlist__tui_browse_hists(struct perf_evlist *evlist __maybe_unused, { return 0; } +static inline int map_symbol__tui_annotate(struct map_symbol *ms __maybe_unused, + struct perf_evsel *evsel __maybe_unused, + struct hist_browser_timer *hbt __maybe_unused) +{ + return 0; +} static inline int hist_entry__tui_annotate(struct hist_entry *he __maybe_unused, struct perf_evsel *evsel __maybe_unused, diff --git a/tools/perf/util/include/asm/alternative-asm.h b/tools/perf/util/include/asm/alternative-asm.h index 6789d788d494..3a3a0f16456a 100644 --- a/tools/perf/util/include/asm/alternative-asm.h +++ b/tools/perf/util/include/asm/alternative-asm.h @@ -4,5 +4,6 @@ /* Just disable it so we can build arch/x86/lib/memcpy_64.S for perf bench: */ #define altinstruction_entry # +#define ALTERNATIVE_2 # #endif diff --git a/tools/perf/util/include/linux/poison.h b/tools/perf/util/include/linux/poison.h deleted file mode 100644 index fef6dbc9ce13..000000000000 --- a/tools/perf/util/include/linux/poison.h +++ /dev/null @@ -1 +0,0 @@ -#include "../../../../include/linux/poison.h" diff --git a/tools/perf/util/include/linux/rbtree.h b/tools/perf/util/include/linux/rbtree.h deleted file mode 100644 index 2a030c5af3aa..000000000000 --- a/tools/perf/util/include/linux/rbtree.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stdbool.h> -#include "../../../../include/linux/rbtree.h" diff --git a/tools/perf/util/include/linux/rbtree_augmented.h b/tools/perf/util/include/linux/rbtree_augmented.h deleted file mode 100644 index 9d6fcdf1788b..000000000000 --- a/tools/perf/util/include/linux/rbtree_augmented.h +++ /dev/null @@ -1,2 +0,0 @@ -#include <stdbool.h> -#include "../../../../include/linux/rbtree_augmented.h" diff --git a/tools/perf/util/kvm-stat.h b/tools/perf/util/kvm-stat.h index cf1d7913783b..ae825d4ec110 100644 --- a/tools/perf/util/kvm-stat.h +++ b/tools/perf/util/kvm-stat.h @@ -99,6 +99,7 @@ struct perf_kvm_stat { int timerfd; unsigned int display_time; bool live; + bool force; }; struct kvm_reg_events_ops { diff --git a/tools/perf/util/lzma.c b/tools/perf/util/lzma.c new file mode 100644 index 000000000000..95a1acb61245 --- /dev/null +++ b/tools/perf/util/lzma.c @@ -0,0 +1,95 @@ +#include <lzma.h> +#include <stdio.h> +#include <linux/compiler.h> +#include "util.h" +#include "debug.h" + +#define BUFSIZE 8192 + +static const char *lzma_strerror(lzma_ret ret) +{ + switch ((int) ret) { + case LZMA_MEM_ERROR: + return "Memory allocation failed"; + case LZMA_OPTIONS_ERROR: + return "Unsupported decompressor flags"; + case LZMA_FORMAT_ERROR: + return "The input is not in the .xz format"; + case LZMA_DATA_ERROR: + return "Compressed file is corrupt"; + case LZMA_BUF_ERROR: + return "Compressed file is truncated or otherwise corrupt"; + default: + return "Unknown error, possibly a bug"; + } +} + +int lzma_decompress_to_file(const char *input, int output_fd) +{ + lzma_action action = LZMA_RUN; + lzma_stream strm = LZMA_STREAM_INIT; + lzma_ret ret; + + u8 buf_in[BUFSIZE]; + u8 buf_out[BUFSIZE]; + FILE *infile; + + infile = fopen(input, "rb"); + if (!infile) { + pr_err("lzma: fopen failed on %s: '%s'\n", + input, strerror(errno)); + return -1; + } + + ret = lzma_stream_decoder(&strm, UINT64_MAX, LZMA_CONCATENATED); + if (ret != LZMA_OK) { + pr_err("lzma: lzma_stream_decoder failed %s (%d)\n", + lzma_strerror(ret), ret); + return -1; + } + + strm.next_in = NULL; + strm.avail_in = 0; + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + + while (1) { + if (strm.avail_in == 0 && !feof(infile)) { + strm.next_in = buf_in; + strm.avail_in = fread(buf_in, 1, sizeof(buf_in), infile); + + if (ferror(infile)) { + pr_err("lzma: read error: %s\n", strerror(errno)); + return -1; + } + + if (feof(infile)) + action = LZMA_FINISH; + } + + ret = lzma_code(&strm, action); + + if (strm.avail_out == 0 || ret == LZMA_STREAM_END) { + ssize_t write_size = sizeof(buf_out) - strm.avail_out; + + if (writen(output_fd, buf_out, write_size) != write_size) { + pr_err("lzma: write error: %s\n", strerror(errno)); + return -1; + } + + strm.next_out = buf_out; + strm.avail_out = sizeof(buf_out); + } + + if (ret != LZMA_OK) { + if (ret == LZMA_STREAM_END) + return 0; + + pr_err("lzma: failed %s\n", lzma_strerror(ret)); + return -1; + } + } + + fclose(infile); + return 0; +} diff --git a/tools/perf/util/machine.c b/tools/perf/util/machine.c index 1bca3a9f2b16..7ff682770fdb 100644 --- a/tools/perf/util/machine.c +++ b/tools/perf/util/machine.c @@ -14,20 +14,23 @@ #include "unwind.h" #include "linux/hash.h" +static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock); + static void dsos__init(struct dsos *dsos) { INIT_LIST_HEAD(&dsos->head); dsos->root = RB_ROOT; + pthread_rwlock_init(&dsos->lock, NULL); } int machine__init(struct machine *machine, const char *root_dir, pid_t pid) { map_groups__init(&machine->kmaps, machine); RB_CLEAR_NODE(&machine->rb_node); - dsos__init(&machine->user_dsos); - dsos__init(&machine->kernel_dsos); + dsos__init(&machine->dsos); machine->threads = RB_ROOT; + pthread_rwlock_init(&machine->threads_lock, NULL); INIT_LIST_HEAD(&machine->dead_threads); machine->last_match = NULL; @@ -54,6 +57,7 @@ int machine__init(struct machine *machine, const char *root_dir, pid_t pid) snprintf(comm, sizeof(comm), "[guest/%d]", pid); thread__set_comm(thread, comm, 0); + thread__put(thread); } machine->current_tid = NULL; @@ -78,48 +82,50 @@ out_delete: return NULL; } -static void dsos__delete(struct dsos *dsos) +static void dsos__purge(struct dsos *dsos) { struct dso *pos, *n; + pthread_rwlock_wrlock(&dsos->lock); + list_for_each_entry_safe(pos, n, &dsos->head, node) { RB_CLEAR_NODE(&pos->rb_node); - list_del(&pos->node); - dso__delete(pos); + list_del_init(&pos->node); + dso__put(pos); } + + pthread_rwlock_unlock(&dsos->lock); } -void machine__delete_dead_threads(struct machine *machine) +static void dsos__exit(struct dsos *dsos) { - struct thread *n, *t; - - list_for_each_entry_safe(t, n, &machine->dead_threads, node) { - list_del(&t->node); - thread__delete(t); - } + dsos__purge(dsos); + pthread_rwlock_destroy(&dsos->lock); } void machine__delete_threads(struct machine *machine) { - struct rb_node *nd = rb_first(&machine->threads); + struct rb_node *nd; + pthread_rwlock_wrlock(&machine->threads_lock); + nd = rb_first(&machine->threads); while (nd) { struct thread *t = rb_entry(nd, struct thread, rb_node); - rb_erase(&t->rb_node, &machine->threads); nd = rb_next(nd); - thread__delete(t); + __machine__remove_thread(machine, t, false); } + pthread_rwlock_unlock(&machine->threads_lock); } void machine__exit(struct machine *machine) { map_groups__exit(&machine->kmaps); - dsos__delete(&machine->user_dsos); - dsos__delete(&machine->kernel_dsos); - vdso__exit(machine); + dsos__exit(&machine->dsos); + machine__exit_vdso(machine); zfree(&machine->root_dir); zfree(&machine->current_tid); + pthread_rwlock_destroy(&machine->threads_lock); } void machine__delete(struct machine *machine) @@ -314,7 +320,7 @@ static void machine__update_thread_pid(struct machine *machine, if (th->pid_ == th->tid) return; - leader = machine__findnew_thread(machine, th->pid_, th->pid_); + leader = __machine__findnew_thread(machine, th->pid_, th->pid_); if (!leader) goto out_err; @@ -336,7 +342,7 @@ static void machine__update_thread_pid(struct machine *machine, if (!map_groups__empty(th->mg)) pr_err("Discarding thread maps for %d:%d\n", th->pid_, th->tid); - map_groups__delete(th->mg); + map_groups__put(th->mg); } th->mg = map_groups__get(leader->mg); @@ -347,9 +353,9 @@ out_err: pr_err("Failed to join map groups for %d:%d\n", th->pid_, th->tid); } -static struct thread *__machine__findnew_thread(struct machine *machine, - pid_t pid, pid_t tid, - bool create) +static struct thread *____machine__findnew_thread(struct machine *machine, + pid_t pid, pid_t tid, + bool create) { struct rb_node **p = &machine->threads.rb_node; struct rb_node *parent = NULL; @@ -361,9 +367,13 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * the full rbtree: */ th = machine->last_match; - if (th && th->tid == tid) { - machine__update_thread_pid(machine, th, pid); - return th; + if (th != NULL) { + if (th->tid == tid) { + machine__update_thread_pid(machine, th, pid); + return th; + } + + machine->last_match = NULL; } while (*p != NULL) { @@ -399,27 +409,45 @@ static struct thread *__machine__findnew_thread(struct machine *machine, * leader and that would screwed the rb tree. */ if (thread__init_map_groups(th, machine)) { - rb_erase(&th->rb_node, &machine->threads); + rb_erase_init(&th->rb_node, &machine->threads); + RB_CLEAR_NODE(&th->rb_node); thread__delete(th); return NULL; } - + /* + * It is now in the rbtree, get a ref + */ + thread__get(th); machine->last_match = th; } return th; } +struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) +{ + return ____machine__findnew_thread(machine, pid, tid, true); +} + struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid) { - return __machine__findnew_thread(machine, pid, tid, true); + struct thread *th; + + pthread_rwlock_wrlock(&machine->threads_lock); + th = thread__get(__machine__findnew_thread(machine, pid, tid)); + pthread_rwlock_unlock(&machine->threads_lock); + return th; } struct thread *machine__find_thread(struct machine *machine, pid_t pid, pid_t tid) { - return __machine__findnew_thread(machine, pid, tid, false); + struct thread *th; + pthread_rwlock_rdlock(&machine->threads_lock); + th = thread__get(____machine__findnew_thread(machine, pid, tid, false)); + pthread_rwlock_unlock(&machine->threads_lock); + return th; } struct comm *machine__thread_exec_comm(struct machine *machine, @@ -438,6 +466,7 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event event->comm.pid, event->comm.tid); bool exec = event->header.misc & PERF_RECORD_MISC_COMM_EXEC; + int err = 0; if (exec) machine->comm_exec = true; @@ -448,10 +477,12 @@ int machine__process_comm_event(struct machine *machine, union perf_event *event if (thread == NULL || __thread__set_comm(thread, event->comm.comm, sample->time, exec)) { dump_printf("problem processing PERF_RECORD_COMM, skipping event.\n"); - return -1; + err = -1; } - return 0; + thread__put(thread); + + return err; } int machine__process_lost_event(struct machine *machine __maybe_unused, @@ -462,43 +493,101 @@ int machine__process_lost_event(struct machine *machine __maybe_unused, return 0; } -struct map *machine__new_module(struct machine *machine, u64 start, - const char *filename) +int machine__process_lost_samples_event(struct machine *machine __maybe_unused, + union perf_event *event, struct perf_sample *sample) { - struct map *map; - struct dso *dso = __dsos__findnew(&machine->kernel_dsos, filename); - bool compressed; + dump_printf(": id:%" PRIu64 ": lost samples :%" PRIu64 "\n", + sample->id, event->lost_samples.lost); + return 0; +} - if (dso == NULL) - return NULL; +static struct dso *machine__findnew_module_dso(struct machine *machine, + struct kmod_path *m, + const char *filename) +{ + struct dso *dso; - map = map__new2(start, dso, MAP__FUNCTION); - if (map == NULL) + pthread_rwlock_wrlock(&machine->dsos.lock); + + dso = __dsos__find(&machine->dsos, m->name, true); + if (!dso) { + dso = __dsos__addnew(&machine->dsos, m->name); + if (dso == NULL) + goto out_unlock; + + if (machine__is_host(machine)) + dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; + else + dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + + /* _KMODULE_COMP should be next to _KMODULE */ + if (m->kmod && m->comp) + dso->symtab_type++; + + dso__set_short_name(dso, strdup(m->name), true); + dso__set_long_name(dso, strdup(filename), true); + } + + dso__get(dso); +out_unlock: + pthread_rwlock_unlock(&machine->dsos.lock); + return dso; +} + +int machine__process_aux_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_aux(event, stdout); + return 0; +} + +int machine__process_itrace_start_event(struct machine *machine __maybe_unused, + union perf_event *event) +{ + if (dump_trace) + perf_event__fprintf_itrace_start(event, stdout); + return 0; +} + +struct map *machine__findnew_module_map(struct machine *machine, u64 start, + const char *filename) +{ + struct map *map = NULL; + struct dso *dso; + struct kmod_path m; + + if (kmod_path__parse_name(&m, filename)) return NULL; - if (machine__is_host(machine)) - dso->symtab_type = DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE; - else - dso->symtab_type = DSO_BINARY_TYPE__GUEST_KMODULE; + map = map_groups__find_by_name(&machine->kmaps, MAP__FUNCTION, + m.name); + if (map) + goto out; + + dso = machine__findnew_module_dso(machine, &m, filename); + if (dso == NULL) + goto out; - /* _KMODULE_COMP should be next to _KMODULE */ - if (is_kernel_module(filename, &compressed) && compressed) - dso->symtab_type++; + map = map__new2(start, dso, MAP__FUNCTION); + if (map == NULL) + goto out; map_groups__insert(&machine->kmaps, map); + +out: + free(m.name); return map; } size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) { struct rb_node *nd; - size_t ret = __dsos__fprintf(&machines->host.kernel_dsos.head, fp) + - __dsos__fprintf(&machines->host.user_dsos.head, fp); + size_t ret = __dsos__fprintf(&machines->host.dsos.head, fp); for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { struct machine *pos = rb_entry(nd, struct machine, rb_node); - ret += __dsos__fprintf(&pos->kernel_dsos.head, fp); - ret += __dsos__fprintf(&pos->user_dsos.head, fp); + ret += __dsos__fprintf(&pos->dsos.head, fp); } return ret; @@ -507,8 +596,7 @@ size_t machines__fprintf_dsos(struct machines *machines, FILE *fp) size_t machine__fprintf_dsos_buildid(struct machine *m, FILE *fp, bool (skip)(struct dso *dso, int parm), int parm) { - return __dsos__fprintf_buildid(&m->kernel_dsos.head, fp, skip, parm) + - __dsos__fprintf_buildid(&m->user_dsos.head, fp, skip, parm); + return __dsos__fprintf_buildid(&m->dsos.head, fp, skip, parm); } size_t machines__fprintf_dsos_buildid(struct machines *machines, FILE *fp, @@ -548,12 +636,16 @@ size_t machine__fprintf(struct machine *machine, FILE *fp) size_t ret = 0; struct rb_node *nd; + pthread_rwlock_rdlock(&machine->threads_lock); + for (nd = rb_first(&machine->threads); nd; nd = rb_next(nd)) { struct thread *pos = rb_entry(nd, struct thread, rb_node); ret += thread__fprintf(pos, fp); } + pthread_rwlock_unlock(&machine->threads_lock); + return ret; } @@ -567,9 +659,8 @@ static struct dso *machine__get_kernel(struct machine *machine) if (!vmlinux_name) vmlinux_name = "[kernel.kallsyms]"; - kernel = dso__kernel_findnew(machine, vmlinux_name, - "[kernel]", - DSO_TYPE_KERNEL); + kernel = machine__findnew_kernel(machine, vmlinux_name, + "[kernel]", DSO_TYPE_KERNEL); } else { char bf[PATH_MAX]; @@ -579,9 +670,9 @@ static struct dso *machine__get_kernel(struct machine *machine) vmlinux_name = machine__mmap_name(machine, bf, sizeof(bf)); - kernel = dso__kernel_findnew(machine, vmlinux_name, - "[guest.kernel]", - DSO_TYPE_GUEST_KERNEL); + kernel = machine__findnew_kernel(machine, vmlinux_name, + "[guest.kernel]", + DSO_TYPE_GUEST_KERNEL); } if (kernel != NULL && (!kernel->has_build_id)) @@ -650,6 +741,9 @@ int __machine__create_kernel_maps(struct machine *machine, struct dso *kernel) machine->vmlinux_maps[type]->unmap_ip = identity__map_ip; kmap = map__kmap(machine->vmlinux_maps[type]); + if (!kmap) + return -1; + kmap->kmaps = &machine->kmaps; map_groups__insert(&machine->kmaps, machine->vmlinux_maps[type]); @@ -671,7 +765,7 @@ void machine__destroy_kernel_maps(struct machine *machine) kmap = map__kmap(machine->vmlinux_maps[type]); map_groups__remove(&machine->kmaps, machine->vmlinux_maps[type]); - if (kmap->ref_reloc_sym) { + if (kmap && kmap->ref_reloc_sym) { /* * ref_reloc_sym is shared among all maps, so free just * on one of them. @@ -683,7 +777,6 @@ void machine__destroy_kernel_maps(struct machine *machine) kmap->ref_reloc_sym = NULL; } - map__delete(machine->vmlinux_maps[type]); machine->vmlinux_maps[type] = NULL; } } @@ -827,6 +920,39 @@ static char *get_kernel_version(const char *root_dir) return strdup(name); } +static bool is_kmod_dso(struct dso *dso) +{ + return dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || + dso->symtab_type == DSO_BINARY_TYPE__GUEST_KMODULE; +} + +static int map_groups__set_module_path(struct map_groups *mg, const char *path, + struct kmod_path *m) +{ + struct map *map; + char *long_name; + + map = map_groups__find_by_name(mg, MAP__FUNCTION, m->name); + if (map == NULL) + return 0; + + long_name = strdup(path); + if (long_name == NULL) + return -ENOMEM; + + dso__set_long_name(map->dso, long_name, true); + dso__kernel_module_get_build_id(map->dso, ""); + + /* + * Full name could reveal us kmod compression, so + * we need to update the symtab_type if needed. + */ + if (m->comp && is_kmod_dso(map->dso)) + map->dso->symtab_type++; + + return 0; +} + static int map_groups__set_modules_path_dir(struct map_groups *mg, const char *dir_name, int depth) { @@ -865,35 +991,19 @@ static int map_groups__set_modules_path_dir(struct map_groups *mg, if (ret < 0) goto out; } else { - char *dot = strrchr(dent->d_name, '.'), - dso_name[PATH_MAX]; - struct map *map; - char *long_name; + struct kmod_path m; - if (dot == NULL) - continue; - - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1) && - is_kmodule_extension(dot - 2)) - dot -= 3; + ret = kmod_path__parse_name(&m, dent->d_name); + if (ret) + goto out; - snprintf(dso_name, sizeof(dso_name), "[%.*s]", - (int)(dot - dent->d_name), dent->d_name); + if (m.kmod) + ret = map_groups__set_module_path(mg, path, &m); - strxfrchar(dso_name, '-', '_'); - map = map_groups__find_by_name(mg, MAP__FUNCTION, - dso_name); - if (map == NULL) - continue; + free(m.name); - long_name = strdup(path); - if (long_name == NULL) { - ret = -1; + if (ret) goto out; - } - dso__set_long_name(map->dso, long_name, true); - dso__kernel_module_get_build_id(map->dso, ""); } } @@ -923,7 +1033,7 @@ static int machine__create_module(void *arg, const char *name, u64 start) struct machine *machine = arg; struct map *map; - map = machine__new_module(machine, start, name); + map = machine__findnew_module_map(machine, start, name); if (map == NULL) return -1; @@ -1015,7 +1125,7 @@ static bool machine__uses_kcore(struct machine *machine) { struct dso *dso; - list_for_each_entry(dso, &machine->kernel_dsos.head, node) { + list_for_each_entry(dso, &machine->dsos.head, node) { if (dso__is_kcore(dso)) return true; } @@ -1046,40 +1156,11 @@ static int machine__process_kernel_mmap_event(struct machine *machine, strlen(kmmap_prefix) - 1) == 0; if (event->mmap.filename[0] == '/' || (!is_kernel_mmap && event->mmap.filename[0] == '[')) { - - char short_module_name[1024]; - char *name, *dot; - - if (event->mmap.filename[0] == '/') { - name = strrchr(event->mmap.filename, '/'); - if (name == NULL) - goto out_problem; - - ++name; /* skip / */ - dot = strrchr(name, '.'); - if (dot == NULL) - goto out_problem; - /* On some system, modules are compressed like .ko.gz */ - if (is_supported_compression(dot + 1)) - dot -= 3; - if (!is_kmodule_extension(dot + 1)) - goto out_problem; - snprintf(short_module_name, sizeof(short_module_name), - "[%.*s]", (int)(dot - name), name); - strxfrchar(short_module_name, '-', '_'); - } else - strcpy(short_module_name, event->mmap.filename); - - map = machine__new_module(machine, event->mmap.start, - event->mmap.filename); + map = machine__findnew_module_map(machine, event->mmap.start, + event->mmap.filename); if (map == NULL) goto out_problem; - name = strdup(short_module_name); - if (name == NULL) - goto out_problem; - - dso__set_short_name(map->dso, name, true); map->end = map->start + event->mmap.len; } else if (is_kernel_mmap) { const char *symbol_name = (event->mmap.filename + @@ -1091,23 +1172,48 @@ static int machine__process_kernel_mmap_event(struct machine *machine, struct dso *kernel = NULL; struct dso *dso; - list_for_each_entry(dso, &machine->kernel_dsos.head, node) { - if (is_kernel_module(dso->long_name, NULL)) + pthread_rwlock_rdlock(&machine->dsos.lock); + + list_for_each_entry(dso, &machine->dsos.head, node) { + + /* + * The cpumode passed to is_kernel_module is not the + * cpumode of *this* event. If we insist on passing + * correct cpumode to is_kernel_module, we should + * record the cpumode when we adding this dso to the + * linked list. + * + * However we don't really need passing correct + * cpumode. We know the correct cpumode must be kernel + * mode (if not, we should not link it onto kernel_dsos + * list). + * + * Therefore, we pass PERF_RECORD_MISC_CPUMODE_UNKNOWN. + * is_kernel_module() treats it as a kernel cpumode. + */ + + if (!dso->kernel || + is_kernel_module(dso->long_name, + PERF_RECORD_MISC_CPUMODE_UNKNOWN)) continue; + kernel = dso; break; } + pthread_rwlock_unlock(&machine->dsos.lock); + if (kernel == NULL) - kernel = __dsos__findnew(&machine->kernel_dsos, - kmmap_prefix); + kernel = machine__findnew_dso(machine, kmmap_prefix); if (kernel == NULL) goto out_problem; kernel->kernel = kernel_type; - if (__machine__create_kernel_maps(machine, kernel) < 0) + if (__machine__create_kernel_maps(machine, kernel) < 0) { + dso__put(kernel); goto out_problem; + } if (strstr(kernel->long_name, "vmlinux")) dso__set_short_name(kernel, "[kernel.vmlinux]", false); @@ -1179,11 +1285,15 @@ int machine__process_mmap2_event(struct machine *machine, event->mmap2.filename, type, thread); if (map == NULL) - goto out_problem; + goto out_problem_map; thread__insert_map(thread, map); + thread__put(thread); + map__put(map); return 0; +out_problem_map: + thread__put(thread); out_problem: dump_printf("problem processing PERF_RECORD_MMAP2, skipping event.\n"); return 0; @@ -1226,25 +1336,44 @@ int machine__process_mmap_event(struct machine *machine, union perf_event *event type, thread); if (map == NULL) - goto out_problem; + goto out_problem_map; thread__insert_map(thread, map); + thread__put(thread); + map__put(map); return 0; +out_problem_map: + thread__put(thread); out_problem: dump_printf("problem processing PERF_RECORD_MMAP, skipping event.\n"); return 0; } -static void machine__remove_thread(struct machine *machine, struct thread *th) +static void __machine__remove_thread(struct machine *machine, struct thread *th, bool lock) { - machine->last_match = NULL; - rb_erase(&th->rb_node, &machine->threads); + if (machine->last_match == th) + machine->last_match = NULL; + + BUG_ON(atomic_read(&th->refcnt) == 0); + if (lock) + pthread_rwlock_wrlock(&machine->threads_lock); + rb_erase_init(&th->rb_node, &machine->threads); + RB_CLEAR_NODE(&th->rb_node); /* - * We may have references to this thread, for instance in some hist_entry - * instances, so just move them to a separate list. + * Move it first to the dead_threads list, then drop the reference, + * if this is the last reference, then the thread__delete destructor + * will be called and we will remove it from the dead_threads list. */ list_add_tail(&th->node, &machine->dead_threads); + if (lock) + pthread_rwlock_unlock(&machine->threads_lock); + thread__put(th); +} + +void machine__remove_thread(struct machine *machine, struct thread *th) +{ + return __machine__remove_thread(machine, th, true); } int machine__process_fork_event(struct machine *machine, union perf_event *event, @@ -1256,10 +1385,13 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct thread *parent = machine__findnew_thread(machine, event->fork.ppid, event->fork.ptid); + int err = 0; /* if a thread currently exists for the thread id remove it */ - if (thread != NULL) + if (thread != NULL) { machine__remove_thread(machine, thread); + thread__put(thread); + } thread = machine__findnew_thread(machine, event->fork.pid, event->fork.tid); @@ -1269,10 +1401,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event if (thread == NULL || parent == NULL || thread__fork(thread, parent, sample->time) < 0) { dump_printf("problem processing PERF_RECORD_FORK, skipping event.\n"); - return -1; + err = -1; } + thread__put(thread); + thread__put(parent); - return 0; + return err; } int machine__process_exit_event(struct machine *machine, union perf_event *event, @@ -1285,8 +1419,10 @@ int machine__process_exit_event(struct machine *machine, union perf_event *event if (dump_trace) perf_event__fprintf_task(event, stdout); - if (thread != NULL) + if (thread != NULL) { thread__exited(thread); + thread__put(thread); + } return 0; } @@ -1309,6 +1445,12 @@ int machine__process_event(struct machine *machine, union perf_event *event, ret = machine__process_exit_event(machine, event, sample); break; case PERF_RECORD_LOST: ret = machine__process_lost_event(machine, event, sample); break; + case PERF_RECORD_AUX: + ret = machine__process_aux_event(machine, event); break; + case PERF_RECORD_ITRACE_START: + ret = machine__process_itrace_start_event(machine, event); break; + case PERF_RECORD_LOST_SAMPLES: + ret = machine__process_lost_samples_event(machine, event, sample); break; default: ret = -1; break; @@ -1387,29 +1529,27 @@ struct mem_info *sample__resolve_mem(struct perf_sample *sample, static int add_callchain_ip(struct thread *thread, struct symbol **parent, struct addr_location *root_al, - bool branch_history, + u8 *cpumode, u64 ip) { struct addr_location al; al.filtered = 0; al.sym = NULL; - if (branch_history) + if (!cpumode) { thread__find_cpumode_addr_location(thread, MAP__FUNCTION, ip, &al); - else { - u8 cpumode = PERF_RECORD_MISC_USER; - + } else { if (ip >= PERF_CONTEXT_MAX) { switch (ip) { case PERF_CONTEXT_HV: - cpumode = PERF_RECORD_MISC_HYPERVISOR; + *cpumode = PERF_RECORD_MISC_HYPERVISOR; break; case PERF_CONTEXT_KERNEL: - cpumode = PERF_RECORD_MISC_KERNEL; + *cpumode = PERF_RECORD_MISC_KERNEL; break; case PERF_CONTEXT_USER: - cpumode = PERF_RECORD_MISC_USER; + *cpumode = PERF_RECORD_MISC_USER; break; default: pr_debug("invalid callchain context: " @@ -1423,8 +1563,8 @@ static int add_callchain_ip(struct thread *thread, } return 0; } - thread__find_addr_location(thread, cpumode, MAP__FUNCTION, - ip, &al); + thread__find_addr_location(thread, *cpumode, MAP__FUNCTION, + ip, &al); } if (al.sym != NULL) { @@ -1502,18 +1642,102 @@ static int remove_loops(struct branch_entry *l, int nr) return nr; } -static int thread__resolve_callchain_sample(struct thread *thread, - struct ip_callchain *chain, - struct branch_stack *branch, - struct symbol **parent, - struct addr_location *root_al, - int max_stack) +/* + * Recolve LBR callstack chain sample + * Return: + * 1 on success get LBR callchain information + * 0 no available LBR callchain information, should try fp + * negative error code on other errors. + */ +static int resolve_lbr_callchain_sample(struct thread *thread, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) { + struct ip_callchain *chain = sample->callchain; + int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; + int i, j, err; + u64 ip; + + for (i = 0; i < chain_nr; i++) { + if (chain->ips[i] == PERF_CONTEXT_USER) + break; + } + + /* LBR only affects the user callchain */ + if (i != chain_nr) { + struct branch_stack *lbr_stack = sample->branch_stack; + int lbr_nr = lbr_stack->nr; + /* + * LBR callstack can only get user call chain. + * The mix_chain_nr is kernel call chain + * number plus LBR user call chain number. + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER, + * lbr_nr + 1 is the user call chain number. + * For details, please refer to the comments + * in callchain__printf + */ + int mix_chain_nr = i + 1 + lbr_nr + 1; + + if (mix_chain_nr > PERF_MAX_STACK_DEPTH + PERF_MAX_BRANCH_DEPTH) { + pr_warning("corrupted callchain. skipping...\n"); + return 0; + } + + for (j = 0; j < mix_chain_nr; j++) { + if (callchain_param.order == ORDER_CALLEE) { + if (j < i + 1) + ip = chain->ips[j]; + else if (j > i + 1) + ip = lbr_stack->entries[j - i - 2].from; + else + ip = lbr_stack->entries[0].to; + } else { + if (j < lbr_nr) + ip = lbr_stack->entries[lbr_nr - j - 1].from; + else if (j > lbr_nr) + ip = chain->ips[i + 1 - (j - lbr_nr)]; + else + ip = lbr_stack->entries[0].to; + } + + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); + if (err) + return (err < 0) ? err : 0; + } + return 1; + } + + return 0; +} + +static int thread__resolve_callchain_sample(struct thread *thread, + struct perf_evsel *evsel, + struct perf_sample *sample, + struct symbol **parent, + struct addr_location *root_al, + int max_stack) +{ + struct branch_stack *branch = sample->branch_stack; + struct ip_callchain *chain = sample->callchain; int chain_nr = min(max_stack, (int)chain->nr); + u8 cpumode = PERF_RECORD_MISC_USER; int i, j, err; int skip_idx = -1; int first_call = 0; + callchain_cursor_reset(&callchain_cursor); + + if (has_branch_callstack(evsel)) { + err = resolve_lbr_callchain_sample(thread, sample, parent, + root_al, max_stack); + if (err) + return (err < 0) ? err : 0; + } + /* * Based on DWARF debug information, some architectures skip * a callchain entry saved by the kernel. @@ -1521,8 +1745,6 @@ static int thread__resolve_callchain_sample(struct thread *thread, if (chain->nr < PERF_MAX_STACK_DEPTH) skip_idx = arch_skip_callchain_idx(thread, chain); - callchain_cursor_reset(&callchain_cursor); - /* * Add branches to call stack for easier browsing. This gives * more context for a sample than just the callers. @@ -1568,10 +1790,10 @@ static int thread__resolve_callchain_sample(struct thread *thread, for (i = 0; i < nr; i++) { err = add_callchain_ip(thread, parent, root_al, - true, be[i].to); + NULL, be[i].to); if (!err) err = add_callchain_ip(thread, parent, root_al, - true, be[i].from); + NULL, be[i].from); if (err == -EINVAL) break; if (err) @@ -1600,7 +1822,7 @@ check_calls: #endif ip = chain->ips[j]; - err = add_callchain_ip(thread, parent, root_al, false, ip); + err = add_callchain_ip(thread, parent, root_al, &cpumode, ip); if (err) return (err < 0) ? err : 0; @@ -1623,9 +1845,9 @@ int thread__resolve_callchain(struct thread *thread, struct addr_location *root_al, int max_stack) { - int ret = thread__resolve_callchain_sample(thread, sample->callchain, - sample->branch_stack, - parent, root_al, max_stack); + int ret = thread__resolve_callchain_sample(thread, evsel, + sample, parent, + root_al, max_stack); if (ret) return ret; @@ -1667,14 +1889,36 @@ int machine__for_each_thread(struct machine *machine, return rc; } +int machines__for_each_thread(struct machines *machines, + int (*fn)(struct thread *thread, void *p), + void *priv) +{ + struct rb_node *nd; + int rc = 0; + + rc = machine__for_each_thread(&machines->host, fn, priv); + if (rc != 0) + return rc; + + for (nd = rb_first(&machines->guests); nd; nd = rb_next(nd)) { + struct machine *machine = rb_entry(nd, struct machine, rb_node); + + rc = machine__for_each_thread(machine, fn, priv); + if (rc != 0) + return rc; + } + return rc; +} + int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, - perf_event__handler_t process, bool data_mmap) + perf_event__handler_t process, bool data_mmap, + unsigned int proc_map_timeout) { if (target__has_task(target)) - return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap); + return perf_event__synthesize_thread_map(tool, threads, process, machine, data_mmap, proc_map_timeout); else if (target__has_cpu(target)) - return perf_event__synthesize_threads(tool, process, machine, data_mmap); + return perf_event__synthesize_threads(tool, process, machine, data_mmap, proc_map_timeout); /* command specified */ return 0; } @@ -1718,6 +1962,7 @@ int machine__set_current_tid(struct machine *machine, int cpu, pid_t pid, return -ENOMEM; thread->cpu = cpu; + thread__put(thread); return 0; } @@ -1743,3 +1988,8 @@ int machine__get_kernel_start(struct machine *machine) } return err; } + +struct dso *machine__findnew_dso(struct machine *machine, const char *filename) +{ + return dsos__findnew(&machine->dsos, filename); +} diff --git a/tools/perf/util/machine.h b/tools/perf/util/machine.h index e8b7779a0a3f..887798e511e9 100644 --- a/tools/perf/util/machine.h +++ b/tools/perf/util/machine.h @@ -30,11 +30,11 @@ struct machine { bool comm_exec; char *root_dir; struct rb_root threads; + pthread_rwlock_t threads_lock; struct list_head dead_threads; struct thread *last_match; struct vdso_info *vdso_info; - struct dsos user_dsos; - struct dsos kernel_dsos; + struct dsos dsos; struct map_groups kmaps; struct map *vmlinux_maps[MAP__NR_TYPES]; u64 kernel_start; @@ -81,6 +81,12 @@ int machine__process_fork_event(struct machine *machine, union perf_event *event struct perf_sample *sample); int machine__process_lost_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); +int machine__process_lost_samples_event(struct machine *machine, union perf_event *event, + struct perf_sample *sample); +int machine__process_aux_event(struct machine *machine, + union perf_event *event); +int machine__process_itrace_start_event(struct machine *machine, + union perf_event *event); int machine__process_mmap_event(struct machine *machine, union perf_event *event, struct perf_sample *sample); int machine__process_mmap2_event(struct machine *machine, union perf_event *event, @@ -118,9 +124,9 @@ void machines__set_comm_exec(struct machines *machines, bool comm_exec); struct machine *machine__new_host(void); int machine__init(struct machine *machine, const char *root_dir, pid_t pid); void machine__exit(struct machine *machine); -void machine__delete_dead_threads(struct machine *machine); void machine__delete_threads(struct machine *machine); void machine__delete(struct machine *machine); +void machine__remove_thread(struct machine *machine, struct thread *th); struct branch_info *sample__resolve_bstack(struct perf_sample *sample, struct addr_location *al); @@ -147,8 +153,10 @@ static inline bool machine__is_host(struct machine *machine) return machine ? machine->pid == HOST_KERNEL_ID : false; } -struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, - pid_t tid); +struct thread *__machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); +struct thread *machine__findnew_thread(struct machine *machine, pid_t pid, pid_t tid); + +struct dso *machine__findnew_dso(struct machine *machine, const char *filename); size_t machine__fprintf(struct machine *machine, FILE *fp); @@ -181,8 +189,8 @@ struct symbol *machine__find_kernel_function_by_name(struct machine *machine, filter); } -struct map *machine__new_module(struct machine *machine, u64 start, - const char *filename); +struct map *machine__findnew_module_map(struct machine *machine, u64 start, + const char *filename); int machine__load_kallsyms(struct machine *machine, const char *filename, enum map_type type, symbol_filter_t filter); @@ -208,16 +216,22 @@ size_t machine__fprintf_vmlinux_path(struct machine *machine, FILE *fp); int machine__for_each_thread(struct machine *machine, int (*fn)(struct thread *thread, void *p), void *priv); +int machines__for_each_thread(struct machines *machines, + int (*fn)(struct thread *thread, void *p), + void *priv); int __machine__synthesize_threads(struct machine *machine, struct perf_tool *tool, struct target *target, struct thread_map *threads, - perf_event__handler_t process, bool data_mmap); + perf_event__handler_t process, bool data_mmap, + unsigned int proc_map_timeout); static inline int machine__synthesize_threads(struct machine *machine, struct target *target, - struct thread_map *threads, bool data_mmap) + struct thread_map *threads, bool data_mmap, + unsigned int proc_map_timeout) { return __machine__synthesize_threads(machine, NULL, target, threads, - perf_event__process, data_mmap); + perf_event__process, data_mmap, + proc_map_timeout); } pid_t machine__get_current_tid(struct machine *machine, int cpu); diff --git a/tools/perf/util/map.c b/tools/perf/util/map.c index 62ca9f2607d5..b5a5e9c02437 100644 --- a/tools/perf/util/map.c +++ b/tools/perf/util/map.c @@ -16,6 +16,8 @@ #include "machine.h" #include <linux/string.h> +static void __maps__insert(struct maps *maps, struct map *map); + const char *map_type__name[MAP__NR_TYPES] = { [MAP__FUNCTION] = "Functions", [MAP__VARIABLE] = "Variables", @@ -130,13 +132,13 @@ void map__init(struct map *map, enum map_type type, map->end = end; map->pgoff = pgoff; map->reloc = 0; - map->dso = dso; + map->dso = dso__get(dso); map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; RB_CLEAR_NODE(&map->rb_node); map->groups = NULL; - map->referenced = false; map->erange_warned = false; + atomic_set(&map->refcnt, 1); } struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -175,9 +177,9 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (vdso) { pgoff = 0; - dso = vdso__dso_findnew(machine, thread); + dso = machine__findnew_vdso(machine, thread); } else - dso = __dsos__findnew(&machine->user_dsos, filename); + dso = machine__findnew_dso(machine, filename); if (dso == NULL) goto out_delete; @@ -195,6 +197,7 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, if (type != MAP__FUNCTION) dso__set_loaded(dso, map->type); } + dso__put(dso); } return map; out_delete: @@ -221,11 +224,24 @@ struct map *map__new2(u64 start, struct dso *dso, enum map_type type) return map; } +static void map__exit(struct map *map) +{ + BUG_ON(!RB_EMPTY_NODE(&map->rb_node)); + dso__zput(map->dso); +} + void map__delete(struct map *map) { + map__exit(map); free(map); } +void map__put(struct map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + map__delete(map); +} + void map__fixup_start(struct map *map) { struct rb_root *symbols = &map->dso->symbols[map->type]; @@ -292,6 +308,11 @@ int map__load(struct map *map, symbol_filter_t filter) return 0; } +int __weak arch__compare_symbol_names(const char *namea, const char *nameb) +{ + return strcmp(namea, nameb); +} + struct symbol *map__find_symbol(struct map *map, u64 addr, symbol_filter_t filter) { @@ -413,48 +434,49 @@ u64 map__objdump_2mem(struct map *map, u64 ip) return ip + map->reloc; } +static void maps__init(struct maps *maps) +{ + maps->entries = RB_ROOT; + pthread_rwlock_init(&maps->lock, NULL); +} + void map_groups__init(struct map_groups *mg, struct machine *machine) { int i; for (i = 0; i < MAP__NR_TYPES; ++i) { - mg->maps[i] = RB_ROOT; - INIT_LIST_HEAD(&mg->removed_maps[i]); + maps__init(&mg->maps[i]); } mg->machine = machine; - mg->refcnt = 1; + atomic_set(&mg->refcnt, 1); } -static void maps__delete(struct rb_root *maps) +static void __maps__purge(struct maps *maps) { - struct rb_node *next = rb_first(maps); + struct rb_root *root = &maps->entries; + struct rb_node *next = rb_first(root); while (next) { struct map *pos = rb_entry(next, struct map, rb_node); next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, maps); - map__delete(pos); + rb_erase_init(&pos->rb_node, root); + map__put(pos); } } -static void maps__delete_removed(struct list_head *maps) +static void maps__exit(struct maps *maps) { - struct map *pos, *n; - - list_for_each_entry_safe(pos, n, maps, node) { - list_del(&pos->node); - map__delete(pos); - } + pthread_rwlock_wrlock(&maps->lock); + __maps__purge(maps); + pthread_rwlock_unlock(&maps->lock); } void map_groups__exit(struct map_groups *mg) { int i; - for (i = 0; i < MAP__NR_TYPES; ++i) { - maps__delete(&mg->maps[i]); - maps__delete_removed(&mg->removed_maps[i]); - } + for (i = 0; i < MAP__NR_TYPES; ++i) + maps__exit(&mg->maps[i]); } bool map_groups__empty(struct map_groups *mg) @@ -464,8 +486,6 @@ bool map_groups__empty(struct map_groups *mg) for (i = 0; i < MAP__NR_TYPES; ++i) { if (maps__first(&mg->maps[i])) return false; - if (!list_empty(&mg->removed_maps[i])) - return false; } return true; @@ -489,32 +509,10 @@ void map_groups__delete(struct map_groups *mg) void map_groups__put(struct map_groups *mg) { - if (--mg->refcnt == 0) + if (mg && atomic_dec_and_test(&mg->refcnt)) map_groups__delete(mg); } -void map_groups__flush(struct map_groups *mg) -{ - int type; - - for (type = 0; type < MAP__NR_TYPES; type++) { - struct rb_root *root = &mg->maps[type]; - struct rb_node *next = rb_first(root); - - while (next) { - struct map *pos = rb_entry(next, struct map, rb_node); - next = rb_next(&pos->rb_node); - rb_erase(&pos->rb_node, root); - /* - * We may have references to this map, for - * instance in some hist_entry instances, so - * just move them to a separate list. - */ - list_add_tail(&pos->node, &mg->removed_maps[pos->type]); - } - } -} - struct symbol *map_groups__find_symbol(struct map_groups *mg, enum map_type type, u64 addr, struct map **mapp, @@ -538,20 +536,28 @@ struct symbol *map_groups__find_symbol_by_name(struct map_groups *mg, struct map **mapp, symbol_filter_t filter) { + struct maps *maps = &mg->maps[type]; + struct symbol *sym; struct rb_node *nd; - for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { + pthread_rwlock_rdlock(&maps->lock); + + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); - struct symbol *sym = map__find_symbol_by_name(pos, name, filter); + + sym = map__find_symbol_by_name(pos, name, filter); if (sym == NULL) continue; if (mapp != NULL) *mapp = pos; - return sym; + goto out; } - return NULL; + sym = NULL; +out: + pthread_rwlock_unlock(&maps->lock); + return sym; } int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) @@ -571,73 +577,54 @@ int map_groups__find_ams(struct addr_map_symbol *ams, symbol_filter_t filter) return ams->sym ? 0 : -1; } -size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, - FILE *fp) +static size_t maps__fprintf(struct maps *maps, FILE *fp) { - size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); + size_t printed = 0; struct rb_node *nd; - for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { + pthread_rwlock_rdlock(&maps->lock); + + for (nd = rb_first(&maps->entries); nd; nd = rb_next(nd)) { struct map *pos = rb_entry(nd, struct map, rb_node); printed += fprintf(fp, "Map:"); printed += map__fprintf(pos, fp); if (verbose > 2) { - printed += dso__fprintf(pos->dso, type, fp); + printed += dso__fprintf(pos->dso, pos->type, fp); printed += fprintf(fp, "--\n"); } } - return printed; -} + pthread_rwlock_unlock(&maps->lock); -static size_t map_groups__fprintf_maps(struct map_groups *mg, FILE *fp) -{ - size_t printed = 0, i; - for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_maps(mg, i, fp); return printed; } -static size_t __map_groups__fprintf_removed_maps(struct map_groups *mg, - enum map_type type, FILE *fp) +size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, + FILE *fp) { - struct map *pos; - size_t printed = 0; - - list_for_each_entry(pos, &mg->removed_maps[type], node) { - printed += fprintf(fp, "Map:"); - printed += map__fprintf(pos, fp); - if (verbose > 1) { - printed += dso__fprintf(pos->dso, type, fp); - printed += fprintf(fp, "--\n"); - } - } - return printed; + size_t printed = fprintf(fp, "%s:\n", map_type__name[type]); + return printed += maps__fprintf(&mg->maps[type], fp); } -static size_t map_groups__fprintf_removed_maps(struct map_groups *mg, - FILE *fp) +size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) { size_t printed = 0, i; for (i = 0; i < MAP__NR_TYPES; ++i) - printed += __map_groups__fprintf_removed_maps(mg, i, fp); + printed += __map_groups__fprintf_maps(mg, i, fp); return printed; } -size_t map_groups__fprintf(struct map_groups *mg, FILE *fp) +static int maps__fixup_overlappings(struct maps *maps, struct map *map, FILE *fp) { - size_t printed = map_groups__fprintf_maps(mg, fp); - printed += fprintf(fp, "Removed maps:\n"); - return printed + map_groups__fprintf_removed_maps(mg, fp); -} - -int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, - FILE *fp) -{ - struct rb_root *root = &mg->maps[map->type]; - struct rb_node *next = rb_first(root); + struct rb_root *root; + struct rb_node *next; int err = 0; + pthread_rwlock_wrlock(&maps->lock); + + root = &maps->entries; + next = rb_first(root); + while (next) { struct map *pos = rb_entry(next, struct map, rb_node); next = rb_next(&pos->rb_node); @@ -651,7 +638,7 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, map__fprintf(pos, fp); } - rb_erase(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, root); /* * Now check if we need to create new maps for areas not * overlapped by the new map: @@ -661,11 +648,11 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, if (before == NULL) { err = -ENOMEM; - goto move_map; + goto put_map; } before->end = map->start; - map_groups__insert(mg, before); + __maps__insert(maps, before); if (verbose >= 2) map__fprintf(before, fp); } @@ -675,28 +662,31 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, if (after == NULL) { err = -ENOMEM; - goto move_map; + goto put_map; } after->start = map->end; - map_groups__insert(mg, after); + __maps__insert(maps, after); if (verbose >= 2) map__fprintf(after, fp); } -move_map: - /* - * If we have references, just move them to a separate list. - */ - if (pos->referenced) - list_add_tail(&pos->node, &mg->removed_maps[map->type]); - else - map__delete(pos); +put_map: + map__put(pos); if (err) - return err; + goto out; } - return 0; + err = 0; +out: + pthread_rwlock_unlock(&maps->lock); + return err; +} + +int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, + FILE *fp) +{ + return maps__fixup_overlappings(&mg->maps[map->type], map, fp); } /* @@ -705,20 +695,28 @@ move_map: int map_groups__clone(struct map_groups *mg, struct map_groups *parent, enum map_type type) { - struct rb_node *nd; - for (nd = rb_first(&parent->maps[type]); nd; nd = rb_next(nd)) { - struct map *map = rb_entry(nd, struct map, rb_node); + int err = -ENOMEM; + struct map *map; + struct maps *maps = &parent->maps[type]; + + pthread_rwlock_rdlock(&maps->lock); + + for (map = maps__first(maps); map; map = map__next(map)) { struct map *new = map__clone(map); if (new == NULL) - return -ENOMEM; + goto out_unlock; map_groups__insert(mg, new); } - return 0; + + err = 0; +out_unlock: + pthread_rwlock_unlock(&maps->lock); + return err; } -void maps__insert(struct rb_root *maps, struct map *map) +static void __maps__insert(struct maps *maps, struct map *map) { - struct rb_node **p = &maps->rb_node; + struct rb_node **p = &maps->entries.rb_node; struct rb_node *parent = NULL; const u64 ip = map->start; struct map *m; @@ -733,20 +731,38 @@ void maps__insert(struct rb_root *maps, struct map *map) } rb_link_node(&map->rb_node, parent, p); - rb_insert_color(&map->rb_node, maps); + rb_insert_color(&map->rb_node, &maps->entries); + map__get(map); } -void maps__remove(struct rb_root *maps, struct map *map) +void maps__insert(struct maps *maps, struct map *map) { - rb_erase(&map->rb_node, maps); + pthread_rwlock_wrlock(&maps->lock); + __maps__insert(maps, map); + pthread_rwlock_unlock(&maps->lock); } -struct map *maps__find(struct rb_root *maps, u64 ip) +static void __maps__remove(struct maps *maps, struct map *map) { - struct rb_node **p = &maps->rb_node; - struct rb_node *parent = NULL; + rb_erase_init(&map->rb_node, &maps->entries); + map__put(map); +} + +void maps__remove(struct maps *maps, struct map *map) +{ + pthread_rwlock_wrlock(&maps->lock); + __maps__remove(maps, map); + pthread_rwlock_unlock(&maps->lock); +} + +struct map *maps__find(struct maps *maps, u64 ip) +{ + struct rb_node **p, *parent = NULL; struct map *m; + pthread_rwlock_rdlock(&maps->lock); + + p = &maps->entries.rb_node; while (*p != NULL) { parent = *p; m = rb_entry(parent, struct map, rb_node); @@ -755,22 +771,25 @@ struct map *maps__find(struct rb_root *maps, u64 ip) else if (ip >= m->end) p = &(*p)->rb_right; else - return m; + goto out; } - return NULL; + m = NULL; +out: + pthread_rwlock_unlock(&maps->lock); + return m; } -struct map *maps__first(struct rb_root *maps) +struct map *maps__first(struct maps *maps) { - struct rb_node *first = rb_first(maps); + struct rb_node *first = rb_first(&maps->entries); if (first) return rb_entry(first, struct map, rb_node); return NULL; } -struct map *maps__next(struct map *map) +struct map *map__next(struct map *map) { struct rb_node *next = rb_next(&map->rb_node); @@ -778,3 +797,23 @@ struct map *maps__next(struct map *map) return rb_entry(next, struct map, rb_node); return NULL; } + +struct kmap *map__kmap(struct map *map) +{ + if (!map->dso || !map->dso->kernel) { + pr_err("Internal error: map__kmap with a non-kernel map\n"); + return NULL; + } + return (struct kmap *)(map + 1); +} + +struct map_groups *map__kmaps(struct map *map) +{ + struct kmap *kmap = map__kmap(map); + + if (!kmap || !kmap->kmaps) { + pr_err("Internal error: map__kmaps with a non-kernel map\n"); + return NULL; + } + return kmap->kmaps; +} diff --git a/tools/perf/util/map.h b/tools/perf/util/map.h index 0e42438b1e59..d73e687b224e 100644 --- a/tools/perf/util/map.h +++ b/tools/perf/util/map.h @@ -1,9 +1,11 @@ #ifndef __PERF_MAP_H #define __PERF_MAP_H +#include <linux/atomic.h> #include <linux/compiler.h> #include <linux/list.h> #include <linux/rbtree.h> +#include <pthread.h> #include <stdio.h> #include <stdbool.h> #include <linux/types.h> @@ -32,7 +34,6 @@ struct map { u64 start; u64 end; u8 /* enum map_type */ type; - bool referenced; bool erange_warned; u32 priv; u32 prot; @@ -50,6 +51,7 @@ struct map { struct dso *dso; struct map_groups *groups; + atomic_t refcnt; }; struct kmap { @@ -57,11 +59,15 @@ struct kmap { struct map_groups *kmaps; }; +struct maps { + struct rb_root entries; + pthread_rwlock_t lock; +}; + struct map_groups { - struct rb_root maps[MAP__NR_TYPES]; - struct list_head removed_maps[MAP__NR_TYPES]; + struct maps maps[MAP__NR_TYPES]; struct machine *machine; - int refcnt; + atomic_t refcnt; }; struct map_groups *map_groups__new(struct machine *machine); @@ -70,16 +76,15 @@ bool map_groups__empty(struct map_groups *mg); static inline struct map_groups *map_groups__get(struct map_groups *mg) { - ++mg->refcnt; + if (mg) + atomic_inc(&mg->refcnt); return mg; } void map_groups__put(struct map_groups *mg); -static inline struct kmap *map__kmap(struct map *map) -{ - return (struct kmap *)(map + 1); -} +struct kmap *map__kmap(struct map *map); +struct map_groups *map__kmaps(struct map *map); static inline u64 map__map_ip(struct map *map, u64 ip) { @@ -126,7 +131,7 @@ struct thread; */ #define __map__for_each_symbol_by_name(map, sym_name, pos, filter) \ for (pos = map__find_symbol_by_name(map, sym_name, filter); \ - pos && strcmp(pos->name, sym_name) == 0; \ + pos && arch__compare_symbol_names(pos->name, sym_name) == 0; \ pos = symbol__next_by_name(pos)) #define map__for_each_symbol_by_name(map, sym_name, pos) \ @@ -134,6 +139,7 @@ struct thread; typedef int (*symbol_filter_t)(struct map *map, struct symbol *sym); +int arch__compare_symbol_names(const char *namea, const char *nameb); void map__init(struct map *map, enum map_type type, u64 start, u64 end, u64 pgoff, struct dso *dso); struct map *map__new(struct machine *machine, u64 start, u64 len, @@ -143,6 +149,24 @@ struct map *map__new(struct machine *machine, u64 start, u64 len, struct map *map__new2(u64 start, struct dso *dso, enum map_type type); void map__delete(struct map *map); struct map *map__clone(struct map *map); + +static inline struct map *map__get(struct map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void map__put(struct map *map); + +static inline void __map__zput(struct map **map) +{ + map__put(*map); + *map = NULL; +} + +#define map__zput(map) __map__zput(&map) + int map__overlap(struct map *l, struct map *r); size_t map__fprintf(struct map *map, FILE *fp); size_t map__fprintf_dsoname(struct map *map, FILE *fp); @@ -161,11 +185,11 @@ void map__reloc_vmlinux(struct map *map); size_t __map_groups__fprintf_maps(struct map_groups *mg, enum map_type type, FILE *fp); -void maps__insert(struct rb_root *maps, struct map *map); -void maps__remove(struct rb_root *maps, struct map *map); -struct map *maps__find(struct rb_root *maps, u64 addr); -struct map *maps__first(struct rb_root *maps); -struct map *maps__next(struct map *map); +void maps__insert(struct maps *maps, struct map *map); +void maps__remove(struct maps *maps, struct map *map); +struct map *maps__find(struct maps *maps, u64 addr); +struct map *maps__first(struct maps *maps); +struct map *map__next(struct map *map); void map_groups__init(struct map_groups *mg, struct machine *machine); void map_groups__exit(struct map_groups *mg); int map_groups__clone(struct map_groups *mg, @@ -200,7 +224,7 @@ static inline struct map *map_groups__first(struct map_groups *mg, static inline struct map *map_groups__next(struct map *map) { - return maps__next(map); + return map__next(map); } struct symbol *map_groups__find_symbol(struct map_groups *mg, @@ -232,6 +256,4 @@ int map_groups__fixup_overlappings(struct map_groups *mg, struct map *map, struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name); -void map_groups__flush(struct map_groups *mg); - #endif /* __PERF_MAP_H */ diff --git a/tools/perf/util/ordered-events.c b/tools/perf/util/ordered-events.c index fd4be94125fb..52be201b9b25 100644 --- a/tools/perf/util/ordered-events.c +++ b/tools/perf/util/ordered-events.c @@ -2,7 +2,6 @@ #include <linux/compiler.h> #include <linux/string.h> #include "ordered-events.h" -#include "evlist.h" #include "session.h" #include "asm/bug.h" #include "debug.h" @@ -131,8 +130,8 @@ static struct ordered_event *alloc_event(struct ordered_events *oe, return new; } -struct ordered_event * -ordered_events__new(struct ordered_events *oe, u64 timestamp, +static struct ordered_event * +ordered_events__new_event(struct ordered_events *oe, u64 timestamp, union perf_event *event) { struct ordered_event *new; @@ -153,20 +152,47 @@ void ordered_events__delete(struct ordered_events *oe, struct ordered_event *eve free_dup_event(oe, event->event); } -static int __ordered_events__flush(struct perf_session *s, - struct perf_tool *tool) +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset) +{ + u64 timestamp = sample->time; + struct ordered_event *oevent; + + if (!timestamp || timestamp == ~0ULL) + return -ETIME; + + if (timestamp < oe->last_flush) { + pr_oe_time(timestamp, "out of order event\n"); + pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", + oe->last_flush_type); + + oe->nr_unordered_events++; + } + + oevent = ordered_events__new_event(oe, timestamp, event); + if (!oevent) { + ordered_events__flush(oe, OE_FLUSH__HALF); + oevent = ordered_events__new_event(oe, timestamp, event); + } + + if (!oevent) + return -ENOMEM; + + oevent->file_offset = file_offset; + return 0; +} + +static int __ordered_events__flush(struct ordered_events *oe) { - struct ordered_events *oe = &s->ordered_events; struct list_head *head = &oe->events; struct ordered_event *tmp, *iter; - struct perf_sample sample; u64 limit = oe->next_flush; u64 last_ts = oe->last ? oe->last->timestamp : 0ULL; bool show_progress = limit == ULLONG_MAX; struct ui_progress prog; int ret; - if (!tool->ordered_events || !limit) + if (!limit) return 0; if (show_progress) @@ -178,16 +204,9 @@ static int __ordered_events__flush(struct perf_session *s, if (iter->timestamp > limit) break; - - ret = perf_evlist__parse_sample(s->evlist, iter->event, &sample); + ret = oe->deliver(oe, iter); if (ret) - pr_err("Can't parse sample, err = %d\n", ret); - else { - ret = perf_session__deliver_event(s, iter->event, &sample, tool, - iter->file_offset); - if (ret) - return ret; - } + return ret; ordered_events__delete(oe, iter); oe->last_flush = iter->timestamp; @@ -204,10 +223,8 @@ static int __ordered_events__flush(struct perf_session *s, return 0; } -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how) +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how) { - struct ordered_events *oe = &s->ordered_events; static const char * const str[] = { "NONE", "FINAL", @@ -216,6 +233,9 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, }; int err; + if (oe->nr_events == 0) + return 0; + switch (how) { case OE_FLUSH__FINAL: oe->next_flush = ULLONG_MAX; @@ -248,7 +268,7 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, str[how], oe->nr_events); pr_oe_time(oe->max_timestamp, "max_timestamp\n"); - err = __ordered_events__flush(s, tool); + err = __ordered_events__flush(oe); if (!err) { if (how == OE_FLUSH__ROUND) @@ -264,13 +284,14 @@ int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, return err; } -void ordered_events__init(struct ordered_events *oe) +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver) { INIT_LIST_HEAD(&oe->events); INIT_LIST_HEAD(&oe->cache); INIT_LIST_HEAD(&oe->to_free); oe->max_alloc_size = (u64) -1; oe->cur_alloc_size = 0; + oe->deliver = deliver; } void ordered_events__free(struct ordered_events *oe) diff --git a/tools/perf/util/ordered-events.h b/tools/perf/util/ordered-events.h index 7b8f9b011f38..f403991e3bfd 100644 --- a/tools/perf/util/ordered-events.h +++ b/tools/perf/util/ordered-events.h @@ -2,9 +2,8 @@ #define __ORDERED_EVENTS_H #include <linux/types.h> -#include "tool.h" -struct perf_session; +struct perf_sample; struct ordered_event { u64 timestamp; @@ -20,6 +19,11 @@ enum oe_flush { OE_FLUSH__HALF, }; +struct ordered_events; + +typedef int (*ordered_events__deliver_t)(struct ordered_events *oe, + struct ordered_event *event); + struct ordered_events { u64 last_flush; u64 next_flush; @@ -31,18 +35,19 @@ struct ordered_events { struct list_head to_free; struct ordered_event *buffer; struct ordered_event *last; + ordered_events__deliver_t deliver; int buffer_idx; unsigned int nr_events; enum oe_flush last_flush_type; + u32 nr_unordered_events; bool copy_on_queue; }; -struct ordered_event *ordered_events__new(struct ordered_events *oe, u64 timestamp, - union perf_event *event); +int ordered_events__queue(struct ordered_events *oe, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void ordered_events__delete(struct ordered_events *oe, struct ordered_event *event); -int ordered_events__flush(struct perf_session *s, struct perf_tool *tool, - enum oe_flush how); -void ordered_events__init(struct ordered_events *oe); +int ordered_events__flush(struct ordered_events *oe, enum oe_flush how); +void ordered_events__init(struct ordered_events *oe, ordered_events__deliver_t deliver); void ordered_events__free(struct ordered_events *oe); static inline diff --git a/tools/perf/util/pager.c b/tools/perf/util/pager.c index 31ee02d4e988..53ef006a951c 100644 --- a/tools/perf/util/pager.c +++ b/tools/perf/util/pager.c @@ -50,11 +50,6 @@ void setup_pager(void) if (!isatty(1)) return; - if (!pager) { - if (!pager_program) - perf_config(perf_default_config, NULL); - pager = pager_program; - } if (!pager) pager = getenv("PAGER"); if (!(pager || access("/usr/bin/pager", X_OK))) diff --git a/tools/perf/util/parse-branch-options.c b/tools/perf/util/parse-branch-options.c new file mode 100644 index 000000000000..a3b1e13a05c0 --- /dev/null +++ b/tools/perf/util/parse-branch-options.c @@ -0,0 +1,94 @@ +#include "perf.h" +#include "util/util.h" +#include "util/debug.h" +#include "util/parse-options.h" +#include "util/parse-branch-options.h" + +#define BRANCH_OPT(n, m) \ + { .name = n, .mode = (m) } + +#define BRANCH_END { .name = NULL } + +struct branch_mode { + const char *name; + int mode; +}; + +static const struct branch_mode branch_modes[] = { + BRANCH_OPT("u", PERF_SAMPLE_BRANCH_USER), + BRANCH_OPT("k", PERF_SAMPLE_BRANCH_KERNEL), + BRANCH_OPT("hv", PERF_SAMPLE_BRANCH_HV), + BRANCH_OPT("any", PERF_SAMPLE_BRANCH_ANY), + BRANCH_OPT("any_call", PERF_SAMPLE_BRANCH_ANY_CALL), + BRANCH_OPT("any_ret", PERF_SAMPLE_BRANCH_ANY_RETURN), + BRANCH_OPT("ind_call", PERF_SAMPLE_BRANCH_IND_CALL), + BRANCH_OPT("abort_tx", PERF_SAMPLE_BRANCH_ABORT_TX), + BRANCH_OPT("in_tx", PERF_SAMPLE_BRANCH_IN_TX), + BRANCH_OPT("no_tx", PERF_SAMPLE_BRANCH_NO_TX), + BRANCH_OPT("cond", PERF_SAMPLE_BRANCH_COND), + BRANCH_OPT("ind_jmp", PERF_SAMPLE_BRANCH_IND_JUMP), + BRANCH_END +}; + +int +parse_branch_stack(const struct option *opt, const char *str, int unset) +{ +#define ONLY_PLM \ + (PERF_SAMPLE_BRANCH_USER |\ + PERF_SAMPLE_BRANCH_KERNEL |\ + PERF_SAMPLE_BRANCH_HV) + + uint64_t *mode = (uint64_t *)opt->value; + const struct branch_mode *br; + char *s, *os = NULL, *p; + int ret = -1; + + if (unset) + return 0; + + /* + * cannot set it twice, -b + --branch-filter for instance + */ + if (*mode) + return -1; + + /* str may be NULL in case no arg is passed to -b */ + if (str) { + /* because str is read-only */ + s = os = strdup(str); + if (!s) + return -1; + + for (;;) { + p = strchr(s, ','); + if (p) + *p = '\0'; + + for (br = branch_modes; br->name; br++) { + if (!strcasecmp(s, br->name)) + break; + } + if (!br->name) { + ui__warning("unknown branch filter %s," + " check man page\n", s); + goto error; + } + + *mode |= br->mode; + + if (!p) + break; + + s = p + 1; + } + } + ret = 0; + + /* default to any branch */ + if ((*mode & ~ONLY_PLM) == 0) { + *mode = PERF_SAMPLE_BRANCH_ANY; + } +error: + free(os); + return ret; +} diff --git a/tools/perf/util/parse-branch-options.h b/tools/perf/util/parse-branch-options.h new file mode 100644 index 000000000000..b9d9470c2e82 --- /dev/null +++ b/tools/perf/util/parse-branch-options.h @@ -0,0 +1,5 @@ +#ifndef _PERF_PARSE_BRANCH_OPTIONS_H +#define _PERF_PARSE_BRANCH_OPTIONS_H 1 +struct option; +int parse_branch_stack(const struct option *opt, const char *str, int unset); +#endif /* _PERF_PARSE_BRANCH_OPTIONS_H */ diff --git a/tools/perf/util/parse-events.c b/tools/perf/util/parse-events.c index 7f8ec6ce2823..09f8d2357108 100644 --- a/tools/perf/util/parse-events.c +++ b/tools/perf/util/parse-events.c @@ -17,14 +17,11 @@ #include "parse-events-flex.h" #include "pmu.h" #include "thread_map.h" +#include "cpumap.h" +#include "asm/bug.h" #define MAX_NAME_LEN 100 -struct event_symbol { - const char *symbol; - const char *alias; -}; - #ifdef PARSER_DEBUG extern int parse_events_debug; #endif @@ -39,7 +36,7 @@ static struct perf_pmu_event_symbol *perf_pmu_events_list; */ static int perf_pmu_events_list_num; -static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { +struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { [PERF_COUNT_HW_CPU_CYCLES] = { .symbol = "cpu-cycles", .alias = "cycles", @@ -82,7 +79,7 @@ static struct event_symbol event_symbols_hw[PERF_COUNT_HW_MAX] = { }, }; -static struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { +struct event_symbol event_symbols_sw[PERF_COUNT_SW_MAX] = { [PERF_COUNT_SW_CPU_CLOCK] = { .symbol = "cpu-clock", .alias = "", @@ -175,9 +172,6 @@ struct tracepoint_path *tracepoint_id_to_path(u64 config) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return NULL; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return NULL; @@ -292,7 +286,9 @@ __add_event(struct list_head *list, int *idx, if (!evsel) return NULL; - evsel->cpus = cpus; + if (cpus) + evsel->cpus = cpu_map__get(cpus); + if (name) evsel->name = strdup(name); list_add_tail(&evsel->node, list); @@ -473,12 +469,6 @@ static int add_tracepoint_multi_sys(struct list_head *list, int *idx, int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event) { - int ret; - - ret = debugfs_valid_mountpoint(tracing_events_path); - if (ret) - return ret; - if (strpbrk(sys, "*?")) return add_tracepoint_multi_sys(list, idx, sys, event); else @@ -552,16 +542,40 @@ int parse_events_add_breakpoint(struct list_head *list, int *idx, return add_event(list, idx, &attr, NULL); } +static int check_type_val(struct parse_events_term *term, + struct parse_events_error *err, + int type) +{ + if (type == term->type_val) + return 0; + + if (err) { + err->idx = term->err_val; + if (type == PARSE_EVENTS__TERM_TYPE_NUM) + err->str = strdup("expected numeric value"); + else + err->str = strdup("expected string value"); + } + return -EINVAL; +} + static int config_term(struct perf_event_attr *attr, - struct parse_events_term *term) + struct parse_events_term *term, + struct parse_events_error *err) { -#define CHECK_TYPE_VAL(type) \ -do { \ - if (PARSE_EVENTS__TERM_TYPE_ ## type != term->type_val) \ - return -EINVAL; \ +#define CHECK_TYPE_VAL(type) \ +do { \ + if (check_type_val(term, err, PARSE_EVENTS__TERM_TYPE_ ## type)) \ + return -EINVAL; \ } while (0) switch (term->type_term) { + case PARSE_EVENTS__TERM_TYPE_USER: + /* + * Always succeed for sysfs terms, as we dont know + * at this point what type they need to have. + */ + return 0; case PARSE_EVENTS__TERM_TYPE_CONFIG: CHECK_TYPE_VAL(NUM); attr->config = term->val.num; @@ -596,18 +610,20 @@ do { \ } static int config_attr(struct perf_event_attr *attr, - struct list_head *head, int fail) + struct list_head *head, + struct parse_events_error *err) { struct parse_events_term *term; list_for_each_entry(term, head, list) - if (config_term(attr, term) && fail) + if (config_term(attr, term, err)) return -EINVAL; return 0; } -int parse_events_add_numeric(struct list_head *list, int *idx, +int parse_events_add_numeric(struct parse_events_evlist *data, + struct list_head *list, u32 type, u64 config, struct list_head *head_config) { @@ -618,10 +634,10 @@ int parse_events_add_numeric(struct list_head *list, int *idx, attr.config = config; if (head_config && - config_attr(&attr, head_config, 1)) + config_attr(&attr, head_config, data->error)) return -EINVAL; - return add_event(list, idx, &attr, NULL); + return add_event(list, &data->idx, &attr, NULL); } static int parse_events__is_name_term(struct parse_events_term *term) @@ -640,8 +656,9 @@ static char *pmu_event_name(struct list_head *head_terms) return NULL; } -int parse_events_add_pmu(struct list_head *list, int *idx, - char *name, struct list_head *head_config) +int parse_events_add_pmu(struct parse_events_evlist *data, + struct list_head *list, char *name, + struct list_head *head_config) { struct perf_event_attr attr; struct perf_pmu_info info; @@ -661,7 +678,7 @@ int parse_events_add_pmu(struct list_head *list, int *idx, if (!head_config) { attr.type = pmu->type; - evsel = __add_event(list, idx, &attr, NULL, pmu->cpus); + evsel = __add_event(list, &data->idx, &attr, NULL, pmu->cpus); return evsel ? 0 : -ENOMEM; } @@ -672,13 +689,14 @@ int parse_events_add_pmu(struct list_head *list, int *idx, * Configure hardcoded terms first, no need to check * return value when called with fail == 0 ;) */ - config_attr(&attr, head_config, 0); + if (config_attr(&attr, head_config, data->error)) + return -EINVAL; - if (perf_pmu__config(pmu, &attr, head_config)) + if (perf_pmu__config(pmu, &attr, head_config, data->error)) return -EINVAL; - evsel = __add_event(list, idx, &attr, pmu_event_name(head_config), - pmu->cpus); + evsel = __add_event(list, &data->idx, &attr, + pmu_event_name(head_config), pmu->cpus); if (evsel) { evsel->unit = info.unit; evsel->scale = info.scale; @@ -723,6 +741,7 @@ struct event_modifier { int eh; int eH; int eG; + int eI; int precise; int exclude_GH; int sample_read; @@ -737,6 +756,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, int eh = evsel ? evsel->attr.exclude_hv : 0; int eH = evsel ? evsel->attr.exclude_host : 0; int eG = evsel ? evsel->attr.exclude_guest : 0; + int eI = evsel ? evsel->attr.exclude_idle : 0; int precise = evsel ? evsel->attr.precise_ip : 0; int sample_read = 0; int pinned = evsel ? evsel->attr.pinned : 0; @@ -767,6 +787,8 @@ static int get_event_modifier(struct event_modifier *mod, char *str, if (!exclude_GH) exclude_GH = eG = eH = 1; eH = 0; + } else if (*str == 'I') { + eI = 1; } else if (*str == 'p') { precise++; /* use of precise requires exclude_guest */ @@ -800,6 +822,7 @@ static int get_event_modifier(struct event_modifier *mod, char *str, mod->eh = eh; mod->eH = eH; mod->eG = eG; + mod->eI = eI; mod->precise = precise; mod->exclude_GH = exclude_GH; mod->sample_read = sample_read; @@ -817,7 +840,7 @@ static int check_modifier(char *str) char *p = str; /* The sizeof includes 0 byte as well. */ - if (strlen(str) > (sizeof("ukhGHpppSD") - 1)) + if (strlen(str) > (sizeof("ukhGHpppSDI") - 1)) return -1; while (*p) { @@ -853,6 +876,7 @@ int parse_events__modifier_event(struct list_head *list, char *str, bool add) evsel->attr.precise_ip = mod.precise; evsel->attr.exclude_host = mod.eH; evsel->attr.exclude_guest = mod.eG; + evsel->attr.exclude_idle = mod.eI; evsel->exclude_GH = mod.exclude_GH; evsel->sample_read = mod.sample_read; @@ -1027,11 +1051,13 @@ int parse_events_terms(struct list_head *terms, const char *str) return ret; } -int parse_events(struct perf_evlist *evlist, const char *str) +int parse_events(struct perf_evlist *evlist, const char *str, + struct parse_events_error *err) { struct parse_events_evlist data = { - .list = LIST_HEAD_INIT(data.list), - .idx = evlist->nr_entries, + .list = LIST_HEAD_INIT(data.list), + .idx = evlist->nr_entries, + .error = err, }; int ret; @@ -1052,16 +1078,87 @@ int parse_events(struct perf_evlist *evlist, const char *str) return ret; } +#define MAX_WIDTH 1000 +static int get_term_width(void) +{ + struct winsize ws; + + get_term_dimensions(&ws); + return ws.ws_col > MAX_WIDTH ? MAX_WIDTH : ws.ws_col; +} + +static void parse_events_print_error(struct parse_events_error *err, + const char *event) +{ + const char *str = "invalid or unsupported event: "; + char _buf[MAX_WIDTH]; + char *buf = (char *) event; + int idx = 0; + + if (err->str) { + /* -2 for extra '' in the final fprintf */ + int width = get_term_width() - 2; + int len_event = strlen(event); + int len_str, max_len, cut = 0; + + /* + * Maximum error index indent, we will cut + * the event string if it's bigger. + */ + int max_err_idx = 10; + + /* + * Let's be specific with the message when + * we have the precise error. + */ + str = "event syntax error: "; + len_str = strlen(str); + max_len = width - len_str; + + buf = _buf; + + /* We're cutting from the beggining. */ + if (err->idx > max_err_idx) + cut = err->idx - max_err_idx; + + strncpy(buf, event + cut, max_len); + + /* Mark cut parts with '..' on both sides. */ + if (cut) + buf[0] = buf[1] = '.'; + + if ((len_event - cut) > max_len) { + buf[max_len - 1] = buf[max_len - 2] = '.'; + buf[max_len] = 0; + } + + idx = len_str + err->idx - cut; + } + + fprintf(stderr, "%s'%s'\n", str, buf); + if (idx) { + fprintf(stderr, "%*s\\___ %s\n", idx + 1, "", err->str); + if (err->help) + fprintf(stderr, "\n%s\n", err->help); + free(err->str); + free(err->help); + } + + fprintf(stderr, "Run 'perf list' for a list of valid events\n"); +} + +#undef MAX_WIDTH + int parse_events_option(const struct option *opt, const char *str, int unset __maybe_unused) { struct perf_evlist *evlist = *(struct perf_evlist **)opt->value; - int ret = parse_events(evlist, str); + struct parse_events_error err = { .idx = 0, }; + int ret = parse_events(evlist, str, &err); + + if (ret) + parse_events_print_error(&err, str); - if (ret) { - fprintf(stderr, "invalid or unsupported event: '%s'\n", str); - fprintf(stderr, "Run 'perf list' for a list of valid events\n"); - } return ret; } @@ -1098,6 +1195,14 @@ static const char * const event_type_descriptors[] = { "Hardware breakpoint", }; +static int cmp_string(const void *a, const void *b) +{ + const char * const *as = a; + const char * const *bs = b; + + return strcmp(*as, *bs); +} + /* * Print the events from <debugfs_mount_point>/tracing/events */ @@ -1109,18 +1214,21 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, struct dirent *sys_next, *evt_next, sys_dirent, evt_dirent; char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - char sbuf[STRERR_BUFSIZE]; - - if (debugfs_valid_mountpoint(tracing_events_path)) { - printf(" [ Tracepoints not available: %s ]\n", - strerror_r(errno, sbuf, sizeof(sbuf))); - return; - } + char **evt_list = NULL; + unsigned int evt_i = 0, evt_num = 0; + bool evt_num_known = false; +restart: sys_dir = opendir(tracing_events_path); if (!sys_dir) return; + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_close_sys_dir; + } + for_each_subsystem(sys_dir, sys_dirent, sys_next) { if (subsys_glob != NULL && !strglobmatch(sys_dirent.d_name, subsys_glob)) @@ -1137,19 +1245,56 @@ void print_tracepoint_events(const char *subsys_glob, const char *event_glob, !strglobmatch(evt_dirent.d_name, event_glob)) continue; - if (name_only) { - printf("%s:%s ", sys_dirent.d_name, evt_dirent.d_name); + if (!evt_num_known) { + evt_num++; continue; } snprintf(evt_path, MAXPATHLEN, "%s:%s", sys_dirent.d_name, evt_dirent.d_name); - printf(" %-50s [%s]\n", evt_path, - event_type_descriptors[PERF_TYPE_TRACEPOINT]); + + evt_list[evt_i] = strdup(evt_path); + if (evt_list[evt_i] == NULL) + goto out_close_evt_dir; + evt_i++; } closedir(evt_dir); } closedir(sys_dir); + + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + } + if (evt_num) + printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_close_evt_dir: + closedir(evt_dir); +out_close_sys_dir: + closedir(sys_dir); + + printf("FATAL: not enough memory to print %s\n", + event_type_descriptors[PERF_TYPE_TRACEPOINT]); + if (evt_list) + goto out_free; } /* @@ -1163,9 +1308,6 @@ int is_valid_tracepoint(const char *event_string) char evt_path[MAXPATHLEN]; char dir_path[MAXPATHLEN]; - if (debugfs_valid_mountpoint(tracing_events_path)) - return 0; - sys_dir = opendir(tracing_events_path); if (!sys_dir) return 0; @@ -1233,38 +1375,19 @@ static bool is_event_supported(u8 type, unsigned config) return ret; } -static void __print_events_type(u8 type, struct event_symbol *syms, - unsigned max) -{ - char name[64]; - unsigned i; - - for (i = 0; i < max ; i++, syms++) { - if (!is_event_supported(type, i)) - continue; - - if (strlen(syms->alias)) - snprintf(name, sizeof(name), "%s OR %s", - syms->symbol, syms->alias); - else - snprintf(name, sizeof(name), "%s", syms->symbol); - - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - } -} - -void print_events_type(u8 type) -{ - if (type == PERF_TYPE_SOFTWARE) - __print_events_type(type, event_symbols_sw, PERF_COUNT_SW_MAX); - else - __print_events_type(type, event_symbols_hw, PERF_COUNT_HW_MAX); -} - int print_hwcache_events(const char *event_glob, bool name_only) { - unsigned int type, op, i, printed = 0; + unsigned int type, op, i, evt_i = 0, evt_num = 0; char name[64]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + } for (type = 0; type < PERF_COUNT_HW_CACHE_MAX; type++) { for (op = 0; op < PERF_COUNT_HW_CACHE_OP_MAX; op++) { @@ -1282,27 +1405,66 @@ int print_hwcache_events(const char *event_glob, bool name_only) type | (op << 8) | (i << 16))) continue; - if (name_only) - printf("%s ", name); - else - printf(" %-50s [%s]\n", name, - event_type_descriptors[PERF_TYPE_HW_CACHE]); - ++printed; + if (!evt_num_known) { + evt_num++; + continue; + } + + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } } } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], + event_type_descriptors[PERF_TYPE_HW_CACHE]); + } + if (evt_num) printf("\n"); - return printed; + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return evt_num; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[PERF_TYPE_HW_CACHE]); + if (evt_list) + goto out_free; + return evt_num; } -static void print_symbol_events(const char *event_glob, unsigned type, +void print_symbol_events(const char *event_glob, unsigned type, struct event_symbol *syms, unsigned max, bool name_only) { - unsigned i, printed = 0; + unsigned int i, evt_i = 0, evt_num = 0; char name[MAX_NAME_LEN]; + char **evt_list = NULL; + bool evt_num_known = false; + +restart: + if (evt_num_known) { + evt_list = zalloc(sizeof(char *) * evt_num); + if (!evt_list) + goto out_enomem; + syms -= max; + } for (i = 0; i < max; i++, syms++) { @@ -1314,23 +1476,49 @@ static void print_symbol_events(const char *event_glob, unsigned type, if (!is_event_supported(type, i)) continue; - if (name_only) { - printf("%s ", syms->symbol); + if (!evt_num_known) { + evt_num++; continue; } - if (strlen(syms->alias)) + if (!name_only && strlen(syms->alias)) snprintf(name, MAX_NAME_LEN, "%s OR %s", syms->symbol, syms->alias); else strncpy(name, syms->symbol, MAX_NAME_LEN); - printf(" %-50s [%s]\n", name, event_type_descriptors[type]); - - printed++; + evt_list[evt_i] = strdup(name); + if (evt_list[evt_i] == NULL) + goto out_enomem; + evt_i++; } - if (printed) + if (!evt_num_known) { + evt_num_known = true; + goto restart; + } + qsort(evt_list, evt_num, sizeof(char *), cmp_string); + evt_i = 0; + while (evt_i < evt_num) { + if (name_only) { + printf("%s ", evt_list[evt_i++]); + continue; + } + printf(" %-50s [%s]\n", evt_list[evt_i++], event_type_descriptors[type]); + } + if (evt_num) printf("\n"); + +out_free: + evt_num = evt_i; + for (evt_i = 0; evt_i < evt_num; evt_i++) + zfree(&evt_list[evt_i]); + zfree(&evt_list); + return; + +out_enomem: + printf("FATAL: not enough memory to print %s\n", event_type_descriptors[type]); + if (evt_list) + goto out_free; } /* @@ -1338,11 +1526,6 @@ static void print_symbol_events(const char *event_glob, unsigned type, */ void print_events(const char *event_glob, bool name_only) { - if (!name_only) { - printf("\n"); - printf("List of pre-defined events (to be used in -e):\n"); - } - print_symbol_events(event_glob, PERF_TYPE_HARDWARE, event_symbols_hw, PERF_COUNT_HW_MAX, name_only); @@ -1382,7 +1565,7 @@ int parse_events__is_hardcoded_term(struct parse_events_term *term) static int new_term(struct parse_events_term **_term, int type_val, int type_term, char *config, - char *str, u64 num) + char *str, u64 num, int err_term, int err_val) { struct parse_events_term *term; @@ -1394,6 +1577,8 @@ static int new_term(struct parse_events_term **_term, int type_val, term->type_val = type_val; term->type_term = type_term; term->config = config; + term->err_term = err_term; + term->err_val = err_val; switch (type_val) { case PARSE_EVENTS__TERM_TYPE_NUM: @@ -1412,17 +1597,29 @@ static int new_term(struct parse_events_term **_term, int type_val, } int parse_events_term__num(struct parse_events_term **term, - int type_term, char *config, u64 num) + int type_term, char *config, u64 num, + void *loc_term_, void *loc_val_) { + YYLTYPE *loc_term = loc_term_; + YYLTYPE *loc_val = loc_val_; + return new_term(term, PARSE_EVENTS__TERM_TYPE_NUM, type_term, - config, NULL, num); + config, NULL, num, + loc_term ? loc_term->first_column : 0, + loc_val ? loc_val->first_column : 0); } int parse_events_term__str(struct parse_events_term **term, - int type_term, char *config, char *str) + int type_term, char *config, char *str, + void *loc_term_, void *loc_val_) { + YYLTYPE *loc_term = loc_term_; + YYLTYPE *loc_val = loc_val_; + return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, type_term, - config, str, 0); + config, str, 0, + loc_term ? loc_term->first_column : 0, + loc_val ? loc_val->first_column : 0); } int parse_events_term__sym_hw(struct parse_events_term **term, @@ -1436,18 +1633,20 @@ int parse_events_term__sym_hw(struct parse_events_term **term, if (config) return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, PARSE_EVENTS__TERM_TYPE_USER, config, - (char *) sym->symbol, 0); + (char *) sym->symbol, 0, 0, 0); else return new_term(term, PARSE_EVENTS__TERM_TYPE_STR, PARSE_EVENTS__TERM_TYPE_USER, - (char *) "event", (char *) sym->symbol, 0); + (char *) "event", (char *) sym->symbol, + 0, 0, 0); } int parse_events_term__clone(struct parse_events_term **new, struct parse_events_term *term) { return new_term(new, term->type_val, term->type_term, term->config, - term->val.str, term->val.num); + term->val.str, term->val.num, + term->err_term, term->err_val); } void parse_events__free_terms(struct list_head *terms) @@ -1457,3 +1656,15 @@ void parse_events__free_terms(struct list_head *terms) list_for_each_entry_safe(term, h, terms, list) free(term); } + +void parse_events_evlist_error(struct parse_events_evlist *data, + int idx, const char *str) +{ + struct parse_events_error *err = data->error; + + if (!err) + return; + err->idx = idx; + err->str = strdup(str); + WARN_ONCE(!err->str, "WARNING: failed to allocate error string"); +} diff --git a/tools/perf/util/parse-events.h b/tools/perf/util/parse-events.h index ff6e1fa4111e..131f29b2f132 100644 --- a/tools/perf/util/parse-events.h +++ b/tools/perf/util/parse-events.h @@ -12,6 +12,7 @@ struct list_head; struct perf_evsel; struct perf_evlist; +struct parse_events_error; struct option; @@ -29,7 +30,8 @@ const char *event_type(int type); extern int parse_events_option(const struct option *opt, const char *str, int unset); -extern int parse_events(struct perf_evlist *evlist, const char *str); +extern int parse_events(struct perf_evlist *evlist, const char *str, + struct parse_events_error *error); extern int parse_events_terms(struct list_head *terms, const char *str); extern int parse_filter(const struct option *opt, const char *str, int unset); @@ -72,12 +74,23 @@ struct parse_events_term { int type_term; struct list_head list; bool used; + + /* error string indexes for within parsed string */ + int err_term; + int err_val; +}; + +struct parse_events_error { + int idx; /* index in the parsed string */ + char *str; /* string to display at the index */ + char *help; /* optional help string */ }; struct parse_events_evlist { - struct list_head list; - int idx; - int nr_groups; + struct list_head list; + int idx; + int nr_groups; + struct parse_events_error *error; }; struct parse_events_terms { @@ -85,10 +98,12 @@ struct parse_events_terms { }; int parse_events__is_hardcoded_term(struct parse_events_term *term); -int parse_events_term__num(struct parse_events_term **_term, - int type_term, char *config, u64 num); -int parse_events_term__str(struct parse_events_term **_term, - int type_term, char *config, char *str); +int parse_events_term__num(struct parse_events_term **term, + int type_term, char *config, u64 num, + void *loc_term, void *loc_val); +int parse_events_term__str(struct parse_events_term **term, + int type_term, char *config, char *str, + void *loc_term, void *loc_val); int parse_events_term__sym_hw(struct parse_events_term **term, char *config, unsigned idx); int parse_events_term__clone(struct parse_events_term **new, @@ -99,29 +114,41 @@ int parse_events__modifier_group(struct list_head *list, char *event_mod); int parse_events_name(struct list_head *list, char *name); int parse_events_add_tracepoint(struct list_head *list, int *idx, char *sys, char *event); -int parse_events_add_numeric(struct list_head *list, int *idx, +int parse_events_add_numeric(struct parse_events_evlist *data, + struct list_head *list, u32 type, u64 config, struct list_head *head_config); int parse_events_add_cache(struct list_head *list, int *idx, char *type, char *op_result1, char *op_result2); int parse_events_add_breakpoint(struct list_head *list, int *idx, void *ptr, char *type, u64 len); -int parse_events_add_pmu(struct list_head *list, int *idx, - char *pmu , struct list_head *head_config); +int parse_events_add_pmu(struct parse_events_evlist *data, + struct list_head *list, char *name, + struct list_head *head_config); enum perf_pmu_event_symbol_type perf_pmu__parse_check(const char *name); void parse_events__set_leader(char *name, struct list_head *list); void parse_events_update_lists(struct list_head *list_event, struct list_head *list_all); -void parse_events_error(void *data, void *scanner, char const *msg); +void parse_events_evlist_error(struct parse_events_evlist *data, + int idx, const char *str); void print_events(const char *event_glob, bool name_only); -void print_events_type(u8 type); + +struct event_symbol { + const char *symbol; + const char *alias; +}; +extern struct event_symbol event_symbols_hw[]; +extern struct event_symbol event_symbols_sw[]; +void print_symbol_events(const char *event_glob, unsigned type, + struct event_symbol *syms, unsigned max, + bool name_only); void print_tracepoint_events(const char *subsys_glob, const char *event_glob, bool name_only); int print_hwcache_events(const char *event_glob, bool name_only); extern int is_valid_tracepoint(const char *event_string); -extern int valid_debugfs_mount(const char *debugfs); +int valid_event_mount(const char *eventfs); #endif /* __PERF_PARSE_EVENTS_H */ diff --git a/tools/perf/util/parse-events.l b/tools/perf/util/parse-events.l index 94eacb6c1ef7..13cef3c65565 100644 --- a/tools/perf/util/parse-events.l +++ b/tools/perf/util/parse-events.l @@ -3,6 +3,8 @@ %option bison-bridge %option prefix="parse_events_" %option stack +%option bison-locations +%option yylineno %{ #include <errno.h> @@ -51,6 +53,18 @@ static int str(yyscan_t scanner, int token) return token; } +#define REWIND(__alloc) \ +do { \ + YYSTYPE *__yylval = parse_events_get_lval(yyscanner); \ + char *text = parse_events_get_text(yyscanner); \ + \ + if (__alloc) \ + __yylval->str = strdup(text); \ + \ + yycolumn -= strlen(text); \ + yyless(0); \ +} while (0) + static int pmu_str_check(yyscan_t scanner) { YYSTYPE *yylval = parse_events_get_lval(scanner); @@ -85,6 +99,13 @@ static int term(yyscan_t scanner, int type) return PE_TERM; } +#define YY_USER_ACTION \ +do { \ + yylloc->last_column = yylloc->first_column; \ + yylloc->first_column = yycolumn; \ + yycolumn += yyleng; \ +} while (0); + %} %x mem @@ -98,10 +119,10 @@ event [^,{}/]+ num_dec [0-9]+ num_hex 0x[a-fA-F0-9]+ num_raw_hex [a-fA-F0-9]+ -name [a-zA-Z_*?][a-zA-Z0-9_*?]* -name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?]* +name [a-zA-Z_*?][a-zA-Z0-9_*?.]* +name_minus [a-zA-Z_*?][a-zA-Z0-9\-_*?.]* /* If you add a modifier you need to update check_modifier() */ -modifier_event [ukhpGHSD]+ +modifier_event [ukhpGHSDI]+ modifier_bp [rwx]{1,3} %% @@ -119,6 +140,12 @@ modifier_bp [rwx]{1,3} if (start_token) { parse_events_set_extra(NULL, yyscanner); + /* + * The flex parser does not init locations variable + * via the scan_string interface, so we need do the + * init in here. + */ + yycolumn = 0; return start_token; } } @@ -127,24 +154,29 @@ modifier_bp [rwx]{1,3} <event>{ {group} { - BEGIN(INITIAL); yyless(0); + BEGIN(INITIAL); + REWIND(0); } {event_pmu} | {event} { - str(yyscanner, PE_EVENT_NAME); - BEGIN(INITIAL); yyless(0); + BEGIN(INITIAL); + REWIND(1); return PE_EVENT_NAME; } -. | <<EOF>> { - BEGIN(INITIAL); yyless(0); + BEGIN(INITIAL); + REWIND(0); } } <config>{ + /* + * Please update formats_error_string any time + * new static term is added. + */ config { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG); } config1 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG1); } config2 { return term(yyscanner, PARSE_EVENTS__TERM_TYPE_CONFIG2); } diff --git a/tools/perf/util/parse-events.y b/tools/perf/util/parse-events.y index 72def077dbbf..591905a02b92 100644 --- a/tools/perf/util/parse-events.y +++ b/tools/perf/util/parse-events.y @@ -2,6 +2,7 @@ %parse-param {void *_data} %parse-param {void *scanner} %lex-param {void* scanner} +%locations %{ @@ -14,8 +15,6 @@ #include "parse-events.h" #include "parse-events-bison.h" -extern int parse_events_lex (YYSTYPE* lvalp, void* scanner); - #define ABORT_ON(val) \ do { \ if (val) \ @@ -208,7 +207,7 @@ PE_NAME '/' event_config '/' struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, $3)); + ABORT_ON(parse_events_add_pmu(data, list, $1, $3)); parse_events__free_terms($3); $$ = list; } @@ -219,7 +218,7 @@ PE_NAME '/' '/' struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, $1, NULL)); + ABORT_ON(parse_events_add_pmu(data, list, $1, NULL)); $$ = list; } | @@ -232,11 +231,11 @@ PE_KERNEL_PMU_EVENT sep_dc ALLOC_LIST(head); ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1)); + $1, 1, &@1, NULL)); list_add_tail(&term->list, head); ALLOC_LIST(list); - ABORT_ON(parse_events_add_pmu(list, &data->idx, "cpu", head)); + ABORT_ON(parse_events_add_pmu(data, list, "cpu", head)); parse_events__free_terms(head); $$ = list; } @@ -252,7 +251,7 @@ PE_PMU_EVENT_PRE '-' PE_PMU_EVENT_SUF sep_dc ALLOC_LIST(head); ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - &pmu_name, 1)); + &pmu_name, 1, &@1, NULL)); list_add_tail(&term->list, head); ALLOC_LIST(list); @@ -275,8 +274,7 @@ value_sym '/' event_config '/' int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(list, &data->idx, - type, config, $3)); + ABORT_ON(parse_events_add_numeric(data, list, type, config, $3)); parse_events__free_terms($3); $$ = list; } @@ -289,8 +287,7 @@ value_sym sep_slash_dc int config = $1 & 255; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(list, &data->idx, - type, config, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, type, config, NULL)); $$ = list; } @@ -389,7 +386,15 @@ PE_NAME ':' PE_NAME struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_tracepoint(list, &data->idx, $1, $3)); + if (parse_events_add_tracepoint(list, &data->idx, $1, $3)) { + struct parse_events_error *error = data->error; + + if (error) { + error->idx = @1.first_column; + error->str = strdup("unknown tracepoint"); + } + return -1; + } $$ = list; } @@ -400,7 +405,7 @@ PE_VALUE ':' PE_VALUE struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(list, &data->idx, (u32)$1, $3, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, (u32)$1, $3, NULL)); $$ = list; } @@ -411,8 +416,7 @@ PE_RAW struct list_head *list; ALLOC_LIST(list); - ABORT_ON(parse_events_add_numeric(list, &data->idx, - PERF_TYPE_RAW, $1, NULL)); + ABORT_ON(parse_events_add_numeric(data, list, PERF_TYPE_RAW, $1, NULL)); $$ = list; } @@ -450,7 +454,7 @@ PE_NAME '=' PE_NAME struct parse_events_term *term; ABORT_ON(parse_events_term__str(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3)); + $1, $3, &@1, &@3)); $$ = term; } | @@ -459,7 +463,7 @@ PE_NAME '=' PE_VALUE struct parse_events_term *term; ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, $3)); + $1, $3, &@1, &@3)); $$ = term; } | @@ -477,7 +481,7 @@ PE_NAME struct parse_events_term *term; ABORT_ON(parse_events_term__num(&term, PARSE_EVENTS__TERM_TYPE_USER, - $1, 1)); + $1, 1, &@1, NULL)); $$ = term; } | @@ -494,7 +498,7 @@ PE_TERM '=' PE_NAME { struct parse_events_term *term; - ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3)); + ABORT_ON(parse_events_term__str(&term, (int)$1, NULL, $3, &@1, &@3)); $$ = term; } | @@ -502,7 +506,7 @@ PE_TERM '=' PE_VALUE { struct parse_events_term *term; - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3)); + ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, $3, &@1, &@3)); $$ = term; } | @@ -510,7 +514,7 @@ PE_TERM { struct parse_events_term *term; - ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1)); + ABORT_ON(parse_events_term__num(&term, (int)$1, NULL, 1, &@1, NULL)); $$ = term; } @@ -520,7 +524,9 @@ sep_slash_dc: '/' | ':' | %% -void parse_events_error(void *data __maybe_unused, void *scanner __maybe_unused, +void parse_events_error(YYLTYPE *loc, void *data, + void *scanner __maybe_unused, char const *msg __maybe_unused) { + parse_events_evlist_error(data, loc->last_column, "parser error"); } diff --git a/tools/perf/util/parse-options.c b/tools/perf/util/parse-options.c index 4a015f77e2b5..01626be2a8eb 100644 --- a/tools/perf/util/parse-options.c +++ b/tools/perf/util/parse-options.c @@ -37,6 +37,7 @@ static int get_value(struct parse_opt_ctx_t *p, { const char *s, *arg = NULL; const int unset = flags & OPT_UNSET; + int err; if (unset && p->opt) return opterror(opt, "takes no value", flags); @@ -114,13 +115,29 @@ static int get_value(struct parse_opt_ctx_t *p, return 0; case OPTION_STRING: + err = 0; if (unset) *(const char **)opt->value = NULL; else if (opt->flags & PARSE_OPT_OPTARG && !p->opt) *(const char **)opt->value = (const char *)opt->defval; else - return get_arg(p, opt, flags, (const char **)opt->value); - return 0; + err = get_arg(p, opt, flags, (const char **)opt->value); + + /* PARSE_OPT_NOEMPTY: Allow NULL but disallow empty string. */ + if (opt->flags & PARSE_OPT_NOEMPTY) { + const char *val = *(const char **)opt->value; + + if (!val) + return err; + + /* Similar to unset if we are given an empty string. */ + if (val[0] == '\0') { + *(const char **)opt->value = NULL; + return 0; + } + } + + return err; case OPTION_CALLBACK: if (unset) @@ -505,13 +522,18 @@ int parse_options_subcommand(int argc, const char **argv, const struct option *o break; case PARSE_OPT_LIST_OPTS: while (options->type != OPTION_END) { - printf("--%s ", options->long_name); + if (options->long_name) + printf("--%s ", options->long_name); options++; } + putchar('\n'); exit(130); case PARSE_OPT_LIST_SUBCMDS: - for (int i = 0; subcommands[i]; i++) - printf("%s ", subcommands[i]); + if (subcommands) { + for (int i = 0; subcommands[i]; i++) + printf("%s ", subcommands[i]); + } + putchar('\n'); exit(130); default: /* PARSE_OPT_UNKNOWN */ if (ctx.argv[0][1] == '-') { diff --git a/tools/perf/util/parse-options.h b/tools/perf/util/parse-options.h index 97b153fb4999..367d8b816cc7 100644 --- a/tools/perf/util/parse-options.h +++ b/tools/perf/util/parse-options.h @@ -40,6 +40,7 @@ enum parse_opt_option_flags { PARSE_OPT_LASTARG_DEFAULT = 16, PARSE_OPT_DISABLED = 32, PARSE_OPT_EXCLUSIVE = 64, + PARSE_OPT_NOEMPTY = 128, }; struct option; @@ -122,6 +123,11 @@ struct option { #define OPT_LONG(s, l, v, h) { .type = OPTION_LONG, .short_name = (s), .long_name = (l), .value = check_vtype(v, long *), .help = (h) } #define OPT_U64(s, l, v, h) { .type = OPTION_U64, .short_name = (s), .long_name = (l), .value = check_vtype(v, u64 *), .help = (h) } #define OPT_STRING(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h) } +#define OPT_STRING_OPTARG(s, l, v, a, h, d) \ + { .type = OPTION_STRING, .short_name = (s), .long_name = (l), \ + .value = check_vtype(v, const char **), (a), .help = (h), \ + .flags = PARSE_OPT_OPTARG, .defval = (intptr_t)(d) } +#define OPT_STRING_NOEMPTY(s, l, v, a, h) { .type = OPTION_STRING, .short_name = (s), .long_name = (l), .value = check_vtype(v, const char **), (a), .help = (h), .flags = PARSE_OPT_NOEMPTY} #define OPT_DATE(s, l, v, h) \ { .type = OPTION_CALLBACK, .short_name = (s), .long_name = (l), .value = (v), .argh = "time", .help = (h), .callback = parse_opt_approxidate_cb } #define OPT_CALLBACK(s, l, v, a, h, f) \ diff --git a/tools/perf/util/pmu.c b/tools/perf/util/pmu.c index 48411674da0f..7bcb8c315615 100644 --- a/tools/perf/util/pmu.c +++ b/tools/perf/util/pmu.c @@ -1,4 +1,5 @@ #include <linux/list.h> +#include <linux/compiler.h> #include <sys/types.h> #include <unistd.h> #include <stdio.h> @@ -112,7 +113,11 @@ static int perf_pmu__parse_scale(struct perf_pmu_alias *alias, char *dir, char * if (sret < 0) goto error; - scale[sret] = '\0'; + if (scale[sret - 1] == '\n') + scale[sret - 1] = '\0'; + else + scale[sret] = '\0'; + /* * save current locale */ @@ -154,7 +159,10 @@ static int perf_pmu__parse_unit(struct perf_pmu_alias *alias, char *dir, char *n close(fd); - alias->unit[sret] = '\0'; + if (alias->unit[sret - 1] == '\n') + alias->unit[sret - 1] = '\0'; + else + alias->unit[sret] = '\0'; return 0; error: @@ -198,17 +206,12 @@ static int perf_pmu__parse_snapshot(struct perf_pmu_alias *alias, return 0; } -static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +static int __perf_pmu__new_alias(struct list_head *list, char *dir, char *name, + char *desc __maybe_unused, char *val) { struct perf_pmu_alias *alias; - char buf[256]; int ret; - ret = fread(buf, 1, sizeof(buf), file); - if (ret == 0) - return -EINVAL; - buf[ret] = 0; - alias = malloc(sizeof(*alias)); if (!alias) return -ENOMEM; @@ -218,26 +221,43 @@ static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FI alias->unit[0] = '\0'; alias->per_pkg = false; - ret = parse_events_terms(&alias->terms, buf); + ret = parse_events_terms(&alias->terms, val); if (ret) { + pr_err("Cannot parse alias %s: %d\n", val, ret); free(alias); return ret; } alias->name = strdup(name); - /* - * load unit name and scale if available - */ - perf_pmu__parse_unit(alias, dir, name); - perf_pmu__parse_scale(alias, dir, name); - perf_pmu__parse_per_pkg(alias, dir, name); - perf_pmu__parse_snapshot(alias, dir, name); + if (dir) { + /* + * load unit name and scale if available + */ + perf_pmu__parse_unit(alias, dir, name); + perf_pmu__parse_scale(alias, dir, name); + perf_pmu__parse_per_pkg(alias, dir, name); + perf_pmu__parse_snapshot(alias, dir, name); + } list_add_tail(&alias->list, list); return 0; } +static int perf_pmu__new_alias(struct list_head *list, char *dir, char *name, FILE *file) +{ + char buf[256]; + int ret; + + ret = fread(buf, 1, sizeof(buf), file); + if (ret == 0) + return -EINVAL; + + buf[ret] = 0; + + return __perf_pmu__new_alias(list, dir, name, NULL, buf); +} + static inline bool pmu_alias_info_file(char *name) { size_t len; @@ -429,7 +449,7 @@ static struct cpu_map *pmu_cpumask(const char *name) return cpus; } -struct perf_event_attr *__attribute__((weak)) +struct perf_event_attr * __weak perf_pmu__get_default_config(struct perf_pmu *pmu __maybe_unused) { return NULL; @@ -442,6 +462,10 @@ static struct perf_pmu *pmu_lookup(const char *name) LIST_HEAD(aliases); __u32 type; + /* No support for intel_bts or intel_pt so disallow them */ + if (!strcmp(name, "intel_bts") || !strcmp(name, "intel_pt")) + return NULL; + /* * The pmu data we store & need consists of the pmu * type value and format definitions. Load both right @@ -579,6 +603,38 @@ static int pmu_resolve_param_term(struct parse_events_term *term, return -1; } +static char *formats_error_string(struct list_head *formats) +{ + struct perf_pmu_format *format; + char *err, *str; + static const char *static_terms = "config,config1,config2,name,period,branch_type\n"; + unsigned i = 0; + + if (!asprintf(&str, "valid terms:")) + return NULL; + + /* sysfs exported terms */ + list_for_each_entry(format, formats, list) { + char c = i++ ? ',' : ' '; + + err = str; + if (!asprintf(&str, "%s%c%s", err, c, format->name)) + goto fail; + free(err); + } + + /* static terms */ + err = str; + if (!asprintf(&str, "%s,%s", err, static_terms)) + goto fail; + + free(err); + return str; +fail: + free(err); + return NULL; +} + /* * Setup one of config[12] attr members based on the * user input data - term parameter. @@ -587,7 +643,7 @@ static int pmu_config_term(struct list_head *formats, struct perf_event_attr *attr, struct parse_events_term *term, struct list_head *head_terms, - bool zero) + bool zero, struct parse_events_error *err) { struct perf_pmu_format *format; __u64 *vp; @@ -611,6 +667,11 @@ static int pmu_config_term(struct list_head *formats, if (!format) { if (verbose) printf("Invalid event/parameter '%s'\n", term->config); + if (err) { + err->idx = term->err_term; + err->str = strdup("unknown term"); + err->help = formats_error_string(formats); + } return -EINVAL; } @@ -636,9 +697,14 @@ static int pmu_config_term(struct list_head *formats, val = term->val.num; else if (term->type_val == PARSE_EVENTS__TERM_TYPE_STR) { if (strcmp(term->val.str, "?")) { - if (verbose) + if (verbose) { pr_info("Invalid sysfs entry %s=%s\n", term->config, term->val.str); + } + if (err) { + err->idx = term->err_val; + err->str = strdup("expected numeric value"); + } return -EINVAL; } @@ -654,12 +720,13 @@ static int pmu_config_term(struct list_head *formats, int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, - bool zero) + bool zero, struct parse_events_error *err) { struct parse_events_term *term; list_for_each_entry(term, head_terms, list) { - if (pmu_config_term(formats, attr, term, head_terms, zero)) + if (pmu_config_term(formats, attr, term, head_terms, + zero, err)) return -EINVAL; } @@ -672,12 +739,14 @@ int perf_pmu__config_terms(struct list_head *formats, * 2) pmu format definitions - specified by pmu parameter */ int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms) + struct list_head *head_terms, + struct parse_events_error *err) { bool zero = !!pmu->default_config; attr->type = pmu->type; - return perf_pmu__config_terms(&pmu->format, attr, head_terms, zero); + return perf_pmu__config_terms(&pmu->format, attr, head_terms, + zero, err); } static struct perf_pmu_alias *pmu_find_alias(struct perf_pmu *pmu, diff --git a/tools/perf/util/pmu.h b/tools/perf/util/pmu.h index 6b1249fbdb5f..7b9c8cf8ae3e 100644 --- a/tools/perf/util/pmu.h +++ b/tools/perf/util/pmu.h @@ -4,6 +4,7 @@ #include <linux/bitmap.h> #include <linux/perf_event.h> #include <stdbool.h> +#include "parse-events.h" enum { PERF_PMU_FORMAT_VALUE_CONFIG, @@ -47,11 +48,12 @@ struct perf_pmu_alias { struct perf_pmu *perf_pmu__find(const char *name); int perf_pmu__config(struct perf_pmu *pmu, struct perf_event_attr *attr, - struct list_head *head_terms); + struct list_head *head_terms, + struct parse_events_error *error); int perf_pmu__config_terms(struct list_head *formats, struct perf_event_attr *attr, struct list_head *head_terms, - bool zero); + bool zero, struct parse_events_error *error); int perf_pmu__check_alias(struct perf_pmu *pmu, struct list_head *head_terms, struct perf_pmu_info *info); struct list_head *perf_pmu__alias(struct perf_pmu *pmu, diff --git a/tools/perf/util/probe-event.c b/tools/perf/util/probe-event.c index 919937eb0be2..381f23a443c7 100644 --- a/tools/perf/util/probe-event.c +++ b/tools/perf/util/probe-event.c @@ -41,6 +41,7 @@ #include "symbol.h" #include "thread.h" #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include "trace-event.h" /* For __maybe_unused */ #include "probe-event.h" #include "probe-finder.h" @@ -50,6 +51,7 @@ #define PERFPROBE_GROUP "probe" bool probe_event_dry_run; /* Dry run flag */ +struct probe_conf probe_conf; #define semantic_error(msg ...) pr_err("Semantic error :" msg) @@ -79,6 +81,7 @@ static int init_symbol_maps(bool user_only) int ret; symbol_conf.sort_by_name = true; + symbol_conf.allow_aliases = true; ret = symbol__init(NULL); if (ret < 0) { pr_debug("Failed to init symbol map.\n"); @@ -133,6 +136,8 @@ static struct ref_reloc_sym *kernel_get_ref_reloc_sym(void) return NULL; kmap = map__kmap(host_machine->vmlinux_maps[MAP__FUNCTION]); + if (!kmap) + return NULL; return kmap->ref_reloc_sym; } @@ -150,25 +155,25 @@ static u64 kernel_get_symbol_address_by_name(const char *name, bool reloc) sym = __find_kernel_function_by_name(name, &map); if (sym) return map->unmap_ip(map, sym->start) - - (reloc) ? 0 : map->reloc; + ((reloc) ? 0 : map->reloc); } return 0; } static struct map *kernel_get_module_map(const char *module) { - struct rb_node *nd; struct map_groups *grp = &host_machine->kmaps; + struct maps *maps = &grp->maps[MAP__FUNCTION]; + struct map *pos; /* A file path -- this is an offline module */ if (module && strchr(module, '/')) - return machine__new_module(host_machine, 0, module); + return machine__findnew_module_map(host_machine, 0, module); if (!module) module = "kernel"; - for (nd = rb_first(&grp->maps[MAP__FUNCTION]); nd; nd = rb_next(nd)) { - struct map *pos = rb_entry(nd, struct map, rb_node); + for (pos = maps__first(maps); pos; pos = map__next(pos)) { if (strncmp(pos->dso->short_name + 1, module, pos->dso->short_name_len - 2) == 0) { return pos; @@ -177,46 +182,24 @@ static struct map *kernel_get_module_map(const char *module) return NULL; } -static struct dso *kernel_get_module_dso(const char *module) +static struct map *get_target_map(const char *target, bool user) { - struct dso *dso; - struct map *map; - const char *vmlinux_name; - - if (module) { - list_for_each_entry(dso, &host_machine->kernel_dsos.head, - node) { - if (strncmp(dso->short_name + 1, module, - dso->short_name_len - 2) == 0) - goto found; - } - pr_debug("Failed to find module %s.\n", module); - return NULL; - } - - map = host_machine->vmlinux_maps[MAP__FUNCTION]; - dso = map->dso; - - vmlinux_name = symbol_conf.vmlinux_name; - if (vmlinux_name) { - if (dso__load_vmlinux(dso, map, vmlinux_name, false, NULL) <= 0) - return NULL; - } else { - if (dso__load_vmlinux_path(dso, map, NULL) <= 0) { - pr_debug("Failed to load kernel map.\n"); - return NULL; - } - } -found: - return dso; + /* Init maps of given executable or kernel */ + if (user) + return dso__new_map(target); + else + return kernel_get_module_map(target); } -const char *kernel_get_module_path(const char *module) +static void put_target_map(struct map *map, bool user) { - struct dso *dso = kernel_get_module_dso(module); - return (dso) ? dso->long_name : NULL; + if (map && user) { + /* Only the user map needs to be released */ + map__put(map); + } } + static int convert_exec_to_group(const char *exec, char **result) { char *ptr1, *ptr2, *exec_copy; @@ -248,6 +231,13 @@ out: return ret; } +static void clear_perf_probe_point(struct perf_probe_point *pp) +{ + free(pp->file); + free(pp->function); + free(pp->lazy_line); +} + static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) { int i; @@ -256,22 +246,180 @@ static void clear_probe_trace_events(struct probe_trace_event *tevs, int ntevs) clear_probe_trace_event(tevs + i); } +static bool kprobe_blacklist__listed(unsigned long address); +static bool kprobe_warn_out_range(const char *symbol, unsigned long address) +{ + u64 etext_addr; + + /* Get the address of _etext for checking non-probable text symbol */ + etext_addr = kernel_get_symbol_address_by_name("_etext", false); + + if (etext_addr != 0 && etext_addr < address) + pr_warning("%s is out of .text, skip it.\n", symbol); + else if (kprobe_blacklist__listed(address)) + pr_warning("%s is blacklisted function, skip it.\n", symbol); + else + return false; + + return true; +} + #ifdef HAVE_DWARF_SUPPORT +static int kernel_get_module_dso(const char *module, struct dso **pdso) +{ + struct dso *dso; + struct map *map; + const char *vmlinux_name; + int ret = 0; + + if (module) { + list_for_each_entry(dso, &host_machine->dsos.head, node) { + if (!dso->kernel) + continue; + if (strncmp(dso->short_name + 1, module, + dso->short_name_len - 2) == 0) + goto found; + } + pr_debug("Failed to find module %s.\n", module); + return -ENOENT; + } + + map = host_machine->vmlinux_maps[MAP__FUNCTION]; + dso = map->dso; + + vmlinux_name = symbol_conf.vmlinux_name; + dso->load_errno = 0; + if (vmlinux_name) + ret = dso__load_vmlinux(dso, map, vmlinux_name, false, NULL); + else + ret = dso__load_vmlinux_path(dso, map, NULL); +found: + *pdso = dso; + return ret; +} + +/* + * Some binaries like glibc have special symbols which are on the symbol + * table, but not in the debuginfo. If we can find the address of the + * symbol from map, we can translate the address back to the probe point. + */ +static int find_alternative_probe_point(struct debuginfo *dinfo, + struct perf_probe_point *pp, + struct perf_probe_point *result, + const char *target, bool uprobes) +{ + struct map *map = NULL; + struct symbol *sym; + u64 address = 0; + int ret = -ENOENT; + + /* This can work only for function-name based one */ + if (!pp->function || pp->file) + return -ENOTSUP; + + map = get_target_map(target, uprobes); + if (!map) + return -EINVAL; + + /* Find the address of given function */ + map__for_each_symbol_by_name(map, pp->function, sym) { + if (uprobes) + address = sym->start; + else + address = map->unmap_ip(map, sym->start); + break; + } + if (!address) { + ret = -ENOENT; + goto out; + } + pr_debug("Symbol %s address found : %" PRIx64 "\n", + pp->function, address); + + ret = debuginfo__find_probe_point(dinfo, (unsigned long)address, + result); + if (ret <= 0) + ret = (!ret) ? -ENOENT : ret; + else { + result->offset += pp->offset; + result->line += pp->line; + result->retprobe = pp->retprobe; + ret = 0; + } + +out: + put_target_map(map, uprobes); + return ret; + +} + +static int get_alternative_probe_event(struct debuginfo *dinfo, + struct perf_probe_event *pev, + struct perf_probe_point *tmp) +{ + int ret; + + memcpy(tmp, &pev->point, sizeof(*tmp)); + memset(&pev->point, 0, sizeof(pev->point)); + ret = find_alternative_probe_point(dinfo, tmp, &pev->point, + pev->target, pev->uprobes); + if (ret < 0) + memcpy(&pev->point, tmp, sizeof(*tmp)); + + return ret; +} + +static int get_alternative_line_range(struct debuginfo *dinfo, + struct line_range *lr, + const char *target, bool user) +{ + struct perf_probe_point pp = { .function = lr->function, + .file = lr->file, + .line = lr->start }; + struct perf_probe_point result; + int ret, len = 0; + + memset(&result, 0, sizeof(result)); + + if (lr->end != INT_MAX) + len = lr->end - lr->start; + ret = find_alternative_probe_point(dinfo, &pp, &result, + target, user); + if (!ret) { + lr->function = result.function; + lr->file = result.file; + lr->start = result.line; + if (lr->end != INT_MAX) + lr->end = lr->start + len; + clear_perf_probe_point(&pp); + } + return ret; +} + /* Open new debuginfo of given module */ static struct debuginfo *open_debuginfo(const char *module, bool silent) { const char *path = module; - struct debuginfo *ret; + char reason[STRERR_BUFSIZE]; + struct debuginfo *ret = NULL; + struct dso *dso = NULL; + int err; if (!module || !strchr(module, '/')) { - path = kernel_get_module_path(module); - if (!path) { + err = kernel_get_module_dso(module, &dso); + if (err < 0) { + if (!dso || dso->load_errno == 0) { + if (!strerror_r(-err, reason, STRERR_BUFSIZE)) + strcpy(reason, "(unknown)"); + } else + dso__strerror_load(dso, reason, STRERR_BUFSIZE); if (!silent) - pr_err("Failed to find path of %s module.\n", - module ?: "kernel"); + pr_err("Failed to find the path for %s: %s\n", + module ?: "kernel", reason); return NULL; } + path = dso->long_name; } ret = debuginfo__new(path); if (!ret && !silent) { @@ -285,6 +433,41 @@ static struct debuginfo *open_debuginfo(const char *module, bool silent) return ret; } +/* For caching the last debuginfo */ +static struct debuginfo *debuginfo_cache; +static char *debuginfo_cache_path; + +static struct debuginfo *debuginfo_cache__open(const char *module, bool silent) +{ + if ((debuginfo_cache_path && !strcmp(debuginfo_cache_path, module)) || + (!debuginfo_cache_path && !module && debuginfo_cache)) + goto out; + + /* Copy module path */ + free(debuginfo_cache_path); + if (module) { + debuginfo_cache_path = strdup(module); + if (!debuginfo_cache_path) { + debuginfo__delete(debuginfo_cache); + debuginfo_cache = NULL; + goto out; + } + } + + debuginfo_cache = open_debuginfo(module, silent); + if (!debuginfo_cache) + zfree(&debuginfo_cache_path); +out: + return debuginfo_cache; +} + +static void debuginfo_cache__exit(void) +{ + debuginfo__delete(debuginfo_cache); + debuginfo_cache = NULL; + zfree(&debuginfo_cache_path); +} + static int get_text_start_address(const char *exec, unsigned long *address) { @@ -346,12 +529,11 @@ static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp, pr_debug("try to find information at %" PRIx64 " in %s\n", addr, tp->module ? : "kernel"); - dinfo = open_debuginfo(tp->module, verbose == 0); - if (dinfo) { + dinfo = debuginfo_cache__open(tp->module, verbose == 0); + if (dinfo) ret = debuginfo__find_probe_point(dinfo, (unsigned long)addr, pp); - debuginfo__delete(dinfo); - } else + else ret = -ENOENT; if (ret > 0) { @@ -430,7 +612,7 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, { struct ref_reloc_sym *reloc_sym; char *tmp; - int i; + int i, skipped = 0; if (uprobe) return add_exec_to_probe_trace_events(tevs, ntevs, module); @@ -446,30 +628,40 @@ static int post_process_probe_trace_events(struct probe_trace_event *tevs, } for (i = 0; i < ntevs; i++) { - if (tevs[i].point.address && !tevs[i].point.retprobe) { + if (!tevs[i].point.address || tevs[i].point.retprobe) + continue; + /* If we found a wrong one, mark it by NULL symbol */ + if (kprobe_warn_out_range(tevs[i].point.symbol, + tevs[i].point.address)) { + tmp = NULL; + skipped++; + } else { tmp = strdup(reloc_sym->name); if (!tmp) return -ENOMEM; - free(tevs[i].point.symbol); - tevs[i].point.symbol = tmp; - tevs[i].point.offset = tevs[i].point.address - - reloc_sym->unrelocated_addr; } + /* If we have no realname, use symbol for it */ + if (!tevs[i].point.realname) + tevs[i].point.realname = tevs[i].point.symbol; + else + free(tevs[i].point.symbol); + tevs[i].point.symbol = tmp; + tevs[i].point.offset = tevs[i].point.address - + reloc_sym->unrelocated_addr; } - return 0; + return skipped; } /* Try to find perf_probe_event with debuginfo */ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs, - int max_tevs, const char *target) + struct probe_trace_event **tevs) { bool need_dwarf = perf_probe_event_need_dwarf(pev); + struct perf_probe_point tmp; struct debuginfo *dinfo; int ntevs, ret = 0; - dinfo = open_debuginfo(target, !need_dwarf); - + dinfo = open_debuginfo(pev->target, !need_dwarf); if (!dinfo) { if (need_dwarf) return -ENOENT; @@ -479,27 +671,41 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, pr_debug("Try to find probe point from debuginfo.\n"); /* Searching trace events corresponding to a probe event */ - ntevs = debuginfo__find_trace_events(dinfo, pev, tevs, max_tevs); + ntevs = debuginfo__find_trace_events(dinfo, pev, tevs); + + if (ntevs == 0) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp); + if (!ret) { + ntevs = debuginfo__find_trace_events(dinfo, pev, tevs); + /* + * Write back to the original probe_event for + * setting appropriate (user given) event name + */ + clear_perf_probe_point(&pev->point); + memcpy(&pev->point, &tmp, sizeof(tmp)); + } + } debuginfo__delete(dinfo); if (ntevs > 0) { /* Succeeded to find trace events */ pr_debug("Found %d probe_trace_events.\n", ntevs); ret = post_process_probe_trace_events(*tevs, ntevs, - target, pev->uprobes); - if (ret < 0) { + pev->target, pev->uprobes); + if (ret < 0 || ret == ntevs) { clear_probe_trace_events(*tevs, ntevs); zfree(tevs); } - return ret < 0 ? ret : ntevs; + if (ret != ntevs) + return ret < 0 ? ret : ntevs; + ntevs = 0; + /* Fall through */ } if (ntevs == 0) { /* No error but failed to find probe point. */ - pr_warning("Probe point '%s' not found in debuginfo.\n", + pr_warning("Probe point '%s' not found.\n", synthesize_perf_probe_point(&pev->point)); - if (need_dwarf) - return -ENOENT; - return 0; + return -ENOENT; } /* Error path : ntevs < 0 */ pr_debug("An error occurred in debuginfo analysis (%d).\n", ntevs); @@ -514,63 +720,6 @@ static int try_to_find_probe_trace_events(struct perf_probe_event *pev, return ntevs; } -/* - * Find a src file from a DWARF tag path. Prepend optional source path prefix - * and chop off leading directories that do not exist. Result is passed back as - * a newly allocated path on success. - * Return 0 if file was found and readable, -errno otherwise. - */ -static int get_real_path(const char *raw_path, const char *comp_dir, - char **new_path) -{ - const char *prefix = symbol_conf.source_prefix; - - if (!prefix) { - if (raw_path[0] != '/' && comp_dir) - /* If not an absolute path, try to use comp_dir */ - prefix = comp_dir; - else { - if (access(raw_path, R_OK) == 0) { - *new_path = strdup(raw_path); - return 0; - } else - return -errno; - } - } - - *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); - if (!*new_path) - return -ENOMEM; - - for (;;) { - sprintf(*new_path, "%s/%s", prefix, raw_path); - - if (access(*new_path, R_OK) == 0) - return 0; - - if (!symbol_conf.source_prefix) - /* In case of searching comp_dir, don't retry */ - return -errno; - - switch (errno) { - case ENAMETOOLONG: - case ENOENT: - case EROFS: - case EFAULT: - raw_path = strchr(++raw_path, '/'); - if (!raw_path) { - zfree(new_path); - return -ENOENT; - } - continue; - - default: - zfree(new_path); - return -errno; - } - } -} - #define LINEBUF_SIZE 256 #define NR_ADDITIONAL_LINES 2 @@ -622,7 +771,8 @@ static int _show_one_line(FILE *fp, int l, bool skip, bool show_num) * Show line-range always requires debuginfo to find source file and * line number. */ -static int __show_line_range(struct line_range *lr, const char *module) +static int __show_line_range(struct line_range *lr, const char *module, + bool user) { int l = 1; struct int_node *ln; @@ -638,6 +788,11 @@ static int __show_line_range(struct line_range *lr, const char *module) return -ENOENT; ret = debuginfo__find_line_range(dinfo, lr); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_line_range(dinfo, lr, module, user); + if (!ret) + ret = debuginfo__find_line_range(dinfo, lr); + } debuginfo__delete(dinfo); if (ret == 0 || ret == -ENOENT) { pr_warning("Specified source line is not found.\n"); @@ -650,7 +805,11 @@ static int __show_line_range(struct line_range *lr, const char *module) /* Convert source file path */ tmp = lr->path; ret = get_real_path(tmp, lr->comp_dir, &lr->path); - free(tmp); /* Free old path */ + + /* Free old path when new path is assigned */ + if (tmp != lr->path) + free(tmp); + if (ret < 0) { pr_warning("Failed to find source file path.\n"); return ret; @@ -707,7 +866,7 @@ int show_line_range(struct line_range *lr, const char *module, bool user) ret = init_symbol_maps(user); if (ret < 0) return ret; - ret = __show_line_range(lr, module); + ret = __show_line_range(lr, module, user); exit_symbol_maps(); return ret; @@ -715,13 +874,13 @@ int show_line_range(struct line_range *lr, const char *module, bool user) static int show_available_vars_at(struct debuginfo *dinfo, struct perf_probe_event *pev, - int max_vls, struct strfilter *_filter, - bool externs) + struct strfilter *_filter) { char *buf; int ret, i, nvars; struct str_node *node; struct variable_list *vls = NULL, *vl; + struct perf_probe_point tmp; const char *var; buf = synthesize_perf_probe_point(&pev->point); @@ -729,8 +888,16 @@ static int show_available_vars_at(struct debuginfo *dinfo, return -EINVAL; pr_debug("Searching variables at %s\n", buf); - ret = debuginfo__find_available_vars_at(dinfo, pev, &vls, - max_vls, externs); + ret = debuginfo__find_available_vars_at(dinfo, pev, &vls); + if (!ret) { /* Not found, retry with an alternative */ + ret = get_alternative_probe_event(dinfo, pev, &tmp); + if (!ret) { + ret = debuginfo__find_available_vars_at(dinfo, pev, + &vls); + /* Release the old probe_point */ + clear_perf_probe_point(&tmp); + } + } if (ret <= 0) { if (ret == 0 || ret == -ENOENT) { pr_err("Failed to find the address of %s\n", buf); @@ -773,8 +940,7 @@ end: /* Show available variables on given probe point */ int show_available_vars(struct perf_probe_event *pevs, int npevs, - int max_vls, const char *module, - struct strfilter *_filter, bool externs) + struct strfilter *_filter) { int i, ret = 0; struct debuginfo *dinfo; @@ -783,7 +949,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, if (ret < 0) return ret; - dinfo = open_debuginfo(module, false); + dinfo = open_debuginfo(pevs->target, false); if (!dinfo) { ret = -ENOENT; goto out; @@ -792,8 +958,7 @@ int show_available_vars(struct perf_probe_event *pevs, int npevs, setup_pager(); for (i = 0; i < npevs && ret >= 0; i++) - ret = show_available_vars_at(dinfo, &pevs[i], max_vls, _filter, - externs); + ret = show_available_vars_at(dinfo, &pevs[i], _filter); debuginfo__delete(dinfo); out: @@ -803,6 +968,10 @@ out: #else /* !HAVE_DWARF_SUPPORT */ +static void debuginfo_cache__exit(void) +{ +} + static int find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused, struct perf_probe_point *pp __maybe_unused, @@ -812,9 +981,7 @@ find_perf_probe_point_from_dwarf(struct probe_trace_point *tp __maybe_unused, } static int try_to_find_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs __maybe_unused, - int max_tevs __maybe_unused, - const char *target __maybe_unused) + struct probe_trace_event **tevs __maybe_unused) { if (perf_probe_event_need_dwarf(pev)) { pr_warning("Debuginfo-analysis is not supported.\n"); @@ -833,10 +1000,8 @@ int show_line_range(struct line_range *lr __maybe_unused, } int show_available_vars(struct perf_probe_event *pevs __maybe_unused, - int npevs __maybe_unused, int max_vls __maybe_unused, - const char *module __maybe_unused, - struct strfilter *filter __maybe_unused, - bool externs __maybe_unused) + int npevs __maybe_unused, + struct strfilter *filter __maybe_unused) { pr_warning("Debuginfo-analysis is not supported.\n"); return -ENOSYS; @@ -876,6 +1041,18 @@ static int parse_line_num(char **ptr, int *val, const char *what) return 0; } +/* Check the name is good for event, group or function */ +static bool is_c_func_name(const char *name) +{ + if (!isalpha(*name) && *name != '_') + return false; + while (*++name != '\0') { + if (!isalpha(*name) && !isdigit(*name) && *name != '_') + return false; + } + return true; +} + /* * Stuff 'lr' according to the line range described by 'arg'. * The line range syntax is described by: @@ -944,10 +1121,15 @@ int parse_line_range_desc(const char *arg, struct line_range *lr) goto err; } lr->function = name; - } else if (strchr(name, '.')) + } else if (strchr(name, '/') || strchr(name, '.')) lr->file = name; - else + else if (is_c_func_name(name))/* We reuse it for checking funcname */ lr->function = name; + else { /* Invalid name */ + semantic_error("'%s' is not a valid function name.\n", name); + err = -EINVAL; + goto err; + } return 0; err: @@ -955,24 +1137,13 @@ err: return err; } -/* Check the name is good for event/group */ -static bool check_event_name(const char *name) -{ - if (!isalpha(*name) && *name != '_') - return false; - while (*++name != '\0') { - if (!isalpha(*name) && !isdigit(*name) && *name != '_') - return false; - } - return true; -} - /* Parse probepoint definition. */ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) { struct perf_probe_point *pp = &pev->point; char *ptr, *tmp; char c, nc = 0; + bool file_spec = false; /* * <Syntax> * perf probe [EVENT=]SRC[:LN|;PTN] @@ -980,6 +1151,8 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) * * TODO:Group name support */ + if (!arg) + return -EINVAL; ptr = strpbrk(arg, ";=@+%"); if (ptr && *ptr == '=') { /* Event name */ @@ -989,7 +1162,7 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) semantic_error("Group name is not supported yet.\n"); return -ENOTSUP; } - if (!check_event_name(arg)) { + if (!is_c_func_name(arg)) { semantic_error("%s is bad for event name -it must " "follow C symbol-naming rule.\n", arg); return -EINVAL; @@ -1001,6 +1174,23 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) arg = tmp; } + /* + * Check arg is function or file name and copy it. + * + * We consider arg to be a file spec if and only if it satisfies + * all of the below criteria:: + * - it does not include any of "+@%", + * - it includes one of ":;", and + * - it has a period '.' in the name. + * + * Otherwise, we consider arg to be a function specification. + */ + if (!strpbrk(arg, "+@%") && (ptr = strpbrk(arg, ";:")) != NULL) { + /* This is a file spec if it includes a '.' before ; or : */ + if (memchr(arg, '.', ptr - arg)) + file_spec = true; + } + ptr = strpbrk(arg, ";:+@%"); if (ptr) { nc = *ptr; @@ -1011,10 +1201,9 @@ static int parse_perf_probe_point(char *arg, struct perf_probe_event *pev) if (tmp == NULL) return -ENOMEM; - /* Check arg is function or file and copy it */ - if (strchr(tmp, '.')) /* File */ + if (file_spec) pp->file = tmp; - else /* Function */ + else pp->function = tmp; /* Parse other options */ @@ -1656,8 +1845,7 @@ static int find_perf_probe_point_from_map(struct probe_trace_point *tp, out: if (map && !is_kprobe) { - dso__delete(map->dso); - map__delete(map); + map__put(map); } return ret; @@ -1739,15 +1927,13 @@ static int convert_to_perf_probe_event(struct probe_trace_event *tev, void clear_perf_probe_event(struct perf_probe_event *pev) { - struct perf_probe_point *pp = &pev->point; struct perf_probe_arg_field *field, *next; int i; free(pev->event); free(pev->group); - free(pp->file); - free(pp->function); - free(pp->lazy_line); + free(pev->target); + clear_perf_probe_point(&pev->point); for (i = 0; i < pev->nargs; i++) { free(pev->args[i].name); @@ -1773,6 +1959,7 @@ static void clear_probe_trace_event(struct probe_trace_event *tev) free(tev->event); free(tev->group); free(tev->point.symbol); + free(tev->point.realname); free(tev->point.module); for (i = 0; i < tev->nargs; i++) { free(tev->args[i].name); @@ -1805,7 +1992,7 @@ static void print_open_warning(int err, bool is_kprobe) " - please rebuild kernel with %s.\n", is_kprobe ? 'k' : 'u', config); } else if (err == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else pr_warning("Failed to open %cprobe_events: %s\n", is_kprobe ? 'k' : 'u', @@ -1816,7 +2003,7 @@ static void print_both_open_warning(int kerr, int uerr) { /* Both kprobes and uprobes are disabled, warn it. */ if (kerr == -ENOTSUP && uerr == -ENOTSUP) - pr_warning("Debugfs is not mounted.\n"); + pr_warning("Tracefs or debugfs is not mounted.\n"); else if (kerr == -ENOENT && uerr == -ENOENT) pr_warning("Please rebuild kernel with CONFIG_KPROBE_EVENTS " "or/and CONFIG_UPROBE_EVENTS.\n"); @@ -1833,17 +2020,24 @@ static int open_probe_events(const char *trace_file, bool readwrite) { char buf[PATH_MAX]; const char *__debugfs; + const char *tracing_dir = ""; int ret; - __debugfs = debugfs_find_mountpoint(); - if (__debugfs == NULL) - return -ENOTSUP; + __debugfs = tracefs_find_mountpoint(); + if (__debugfs == NULL) { + tracing_dir = "tracing/"; + + __debugfs = debugfs_find_mountpoint(); + if (__debugfs == NULL) + return -ENOTSUP; + } - ret = e_snprintf(buf, PATH_MAX, "%s/%s", __debugfs, trace_file); + ret = e_snprintf(buf, PATH_MAX, "%s/%s%s", + __debugfs, tracing_dir, trace_file); if (ret >= 0) { pr_debug("Opening %s write=%d\n", buf, readwrite); if (readwrite && !probe_event_dry_run) - ret = open(buf, O_RDWR, O_APPEND); + ret = open(buf, O_RDWR | O_APPEND, 0); else ret = open(buf, O_RDONLY, 0); @@ -1855,12 +2049,12 @@ static int open_probe_events(const char *trace_file, bool readwrite) static int open_kprobe_events(bool readwrite) { - return open_probe_events("tracing/kprobe_events", readwrite); + return open_probe_events("kprobe_events", readwrite); } static int open_uprobe_events(bool readwrite) { - return open_probe_events("tracing/uprobe_events", readwrite); + return open_probe_events("uprobe_events", readwrite); } /* Get raw string list of current kprobe_events or uprobe_events */ @@ -1895,9 +2089,120 @@ static struct strlist *get_probe_trace_command_rawlist(int fd) return sl; } -/* Show an event */ -static int show_perf_probe_event(struct perf_probe_event *pev, - const char *module) +struct kprobe_blacklist_node { + struct list_head list; + unsigned long start; + unsigned long end; + char *symbol; +}; + +static void kprobe_blacklist__delete(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + + while (!list_empty(blacklist)) { + node = list_first_entry(blacklist, + struct kprobe_blacklist_node, list); + list_del(&node->list); + free(node->symbol); + free(node); + } +} + +static int kprobe_blacklist__load(struct list_head *blacklist) +{ + struct kprobe_blacklist_node *node; + const char *__debugfs = debugfs_find_mountpoint(); + char buf[PATH_MAX], *p; + FILE *fp; + int ret; + + if (__debugfs == NULL) + return -ENOTSUP; + + ret = e_snprintf(buf, PATH_MAX, "%s/kprobes/blacklist", __debugfs); + if (ret < 0) + return ret; + + fp = fopen(buf, "r"); + if (!fp) + return -errno; + + ret = 0; + while (fgets(buf, PATH_MAX, fp)) { + node = zalloc(sizeof(*node)); + if (!node) { + ret = -ENOMEM; + break; + } + INIT_LIST_HEAD(&node->list); + list_add_tail(&node->list, blacklist); + if (sscanf(buf, "0x%lx-0x%lx", &node->start, &node->end) != 2) { + ret = -EINVAL; + break; + } + p = strchr(buf, '\t'); + if (p) { + p++; + if (p[strlen(p) - 1] == '\n') + p[strlen(p) - 1] = '\0'; + } else + p = (char *)"unknown"; + node->symbol = strdup(p); + if (!node->symbol) { + ret = -ENOMEM; + break; + } + pr_debug2("Blacklist: 0x%lx-0x%lx, %s\n", + node->start, node->end, node->symbol); + ret++; + } + if (ret < 0) + kprobe_blacklist__delete(blacklist); + fclose(fp); + + return ret; +} + +static struct kprobe_blacklist_node * +kprobe_blacklist__find_by_address(struct list_head *blacklist, + unsigned long address) +{ + struct kprobe_blacklist_node *node; + + list_for_each_entry(node, blacklist, list) { + if (node->start <= address && address <= node->end) + return node; + } + + return NULL; +} + +static LIST_HEAD(kprobe_blacklist); + +static void kprobe_blacklist__init(void) +{ + if (!list_empty(&kprobe_blacklist)) + return; + + if (kprobe_blacklist__load(&kprobe_blacklist) < 0) + pr_debug("No kprobe blacklist support, ignored\n"); +} + +static void kprobe_blacklist__release(void) +{ + kprobe_blacklist__delete(&kprobe_blacklist); +} + +static bool kprobe_blacklist__listed(unsigned long address) +{ + return !!kprobe_blacklist__find_by_address(&kprobe_blacklist, address); +} + +static int perf_probe_event__sprintf(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, + struct strbuf *result) { int i, ret; char buf[128]; @@ -1908,30 +2213,67 @@ static int show_perf_probe_event(struct perf_probe_event *pev, if (!place) return -EINVAL; - ret = e_snprintf(buf, 128, "%s:%s", pev->group, pev->event); + ret = e_snprintf(buf, 128, "%s:%s", group, event); if (ret < 0) - return ret; + goto out; - pr_info(" %-20s (on %s", buf, place); + strbuf_addf(result, " %-20s (on %s", buf, place); if (module) - pr_info(" in %s", module); + strbuf_addf(result, " in %s", module); if (pev->nargs > 0) { - pr_info(" with"); + strbuf_addstr(result, " with"); for (i = 0; i < pev->nargs; i++) { ret = synthesize_perf_probe_arg(&pev->args[i], buf, 128); if (ret < 0) - break; - pr_info(" %s", buf); + goto out; + strbuf_addf(result, " %s", buf); } } - pr_info(")\n"); + strbuf_addch(result, ')'); +out: free(place); return ret; } -static int __show_perf_probe_events(int fd, bool is_kprobe) +/* Show an event */ +static int show_perf_probe_event(const char *group, const char *event, + struct perf_probe_event *pev, + const char *module, bool use_stdout) +{ + struct strbuf buf = STRBUF_INIT; + int ret; + + ret = perf_probe_event__sprintf(group, event, pev, module, &buf); + if (ret >= 0) { + if (use_stdout) + printf("%s\n", buf.buf); + else + pr_info("%s\n", buf.buf); + } + strbuf_release(&buf); + + return ret; +} + +static bool filter_probe_trace_event(struct probe_trace_event *tev, + struct strfilter *filter) +{ + char tmp[128]; + + /* At first, check the event name itself */ + if (strfilter__compare(filter, tev->event)) + return true; + + /* Next, check the combination of name and group */ + if (e_snprintf(tmp, 128, "%s:%s", tev->group, tev->event) < 0) + return false; + return strfilter__compare(filter, tmp); +} + +static int __show_perf_probe_events(int fd, bool is_kprobe, + struct strfilter *filter) { int ret = 0; struct probe_trace_event tev; @@ -1949,24 +2291,31 @@ static int __show_perf_probe_events(int fd, bool is_kprobe) strlist__for_each(ent, rawlist) { ret = parse_probe_trace_command(ent->s, &tev); if (ret >= 0) { + if (!filter_probe_trace_event(&tev, filter)) + goto next; ret = convert_to_perf_probe_event(&tev, &pev, is_kprobe); - if (ret >= 0) - ret = show_perf_probe_event(&pev, - tev.point.module); + if (ret < 0) + goto next; + ret = show_perf_probe_event(pev.group, pev.event, + &pev, tev.point.module, + true); } +next: clear_perf_probe_event(&pev); clear_probe_trace_event(&tev); if (ret < 0) break; } strlist__delete(rawlist); + /* Cleanup cached debuginfo if needed */ + debuginfo_cache__exit(); return ret; } /* List up current perf-probe events */ -int show_perf_probe_events(void) +int show_perf_probe_events(struct strfilter *filter) { int kp_fd, up_fd, ret; @@ -1978,7 +2327,7 @@ int show_perf_probe_events(void) kp_fd = open_kprobe_events(false); if (kp_fd >= 0) { - ret = __show_perf_probe_events(kp_fd, true); + ret = __show_perf_probe_events(kp_fd, true, filter); close(kp_fd); if (ret < 0) goto out; @@ -1992,7 +2341,7 @@ int show_perf_probe_events(void) } if (up_fd >= 0) { - ret = __show_perf_probe_events(up_fd, false); + ret = __show_perf_probe_events(up_fd, false, filter); close(up_fd); } out: @@ -2066,6 +2415,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, struct strlist *namelist, bool allow_suffix) { int i, ret; + char *p; + + if (*base == '.') + base++; /* Try no suffix */ ret = e_snprintf(buf, len, "%s", base); @@ -2073,6 +2426,10 @@ static int get_new_event_name(char *buf, size_t len, const char *base, pr_debug("snprintf() failed: %d\n", ret); return ret; } + /* Cut off the postfixes (e.g. .const, .isra)*/ + p = strchr(buf, '.'); + if (p && p != buf) + *p = '\0'; if (!strlist__has_entry(namelist, buf)) return 0; @@ -2100,6 +2457,27 @@ static int get_new_event_name(char *buf, size_t len, const char *base, return ret; } +/* Warn if the current kernel's uprobe implementation is old */ +static void warn_uprobe_event_compat(struct probe_trace_event *tev) +{ + int i; + char *buf = synthesize_probe_trace_command(tev); + + /* Old uprobe event doesn't support memory dereference */ + if (!tev->uprobes || tev->nargs == 0 || !buf) + goto out; + + for (i = 0; i < tev->nargs; i++) + if (strglobmatch(tev->args[i].value, "[$@+-]*")) { + pr_warning("Please upgrade your kernel to at least " + "3.14 to have access to feature %s\n", + tev->args[i].value); + break; + } +out: + free(buf); +} + static int __add_probe_trace_events(struct perf_probe_event *pev, struct probe_trace_event *tevs, int ntevs, bool allow_suffix) @@ -2107,8 +2485,9 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, int i, fd, ret; struct probe_trace_event *tev = NULL; char buf[64]; - const char *event, *group; + const char *event = NULL, *group = NULL; struct strlist *namelist; + bool safename; if (pev->uprobes) fd = open_uprobe_events(true); @@ -2124,20 +2503,26 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, namelist = get_probe_trace_event_names(fd, false); if (!namelist) { pr_debug("Failed to get current event list.\n"); - return -EIO; + ret = -ENOMEM; + goto close_out; } + safename = (pev->point.function && !strisglob(pev->point.function)); ret = 0; pr_info("Added new event%s\n", (ntevs > 1) ? "s:" : ":"); for (i = 0; i < ntevs; i++) { tev = &tevs[i]; + /* Skip if the symbol is out of .text or blacklisted */ + if (!tev->point.symbol) + continue; + if (pev->event) event = pev->event; else - if (pev->point.function) + if (safename) event = pev->point.function; else - event = tev->point.symbol; + event = tev->point.realname; if (pev->group) group = pev->group; else @@ -2162,15 +2547,12 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, /* Add added event name to namelist */ strlist__add(namelist, event); - /* Trick here - save current event/group */ - event = pev->event; - group = pev->group; - pev->event = tev->event; - pev->group = tev->group; - show_perf_probe_event(pev, tev->point.module); - /* Trick here - restore current event/group */ - pev->event = (char *)event; - pev->group = (char *)group; + /* We use tev's name for showing new events */ + show_perf_probe_event(tev->group, tev->event, pev, + tev->point.module, false); + /* Save the last valid name */ + event = tev->event; + group = tev->group; /* * Probes after the first probe which comes from same @@ -2180,27 +2562,38 @@ static int __add_probe_trace_events(struct perf_probe_event *pev, */ allow_suffix = true; } + if (ret == -EINVAL && pev->uprobes) + warn_uprobe_event_compat(tev); - if (ret >= 0) { + /* Note that it is possible to skip all events because of blacklist */ + if (ret >= 0 && event) { /* Show how to use the event. */ pr_info("\nYou can now use it in all perf tools, such as:\n\n"); - pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", tev->group, - tev->event); + pr_info("\tperf record -e %s:%s -aR sleep 1\n\n", group, event); } strlist__delete(namelist); +close_out: close(fd); return ret; } -static int find_probe_functions(struct map *map, char *name) +static int find_probe_functions(struct map *map, char *name, + struct symbol **syms) { int found = 0; struct symbol *sym; + struct rb_node *tmp; + + if (map__load(map, NULL) < 0) + return 0; - map__for_each_symbol_by_name(map, name, sym) { - if (sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) + map__for_each_symbol(map, sym, tmp) { + if (strglobmatch(sym->name, name)) { found++; + if (syms && found < probe_conf.max_probes) + syms[found - 1] = sym; + } } return found; @@ -2209,54 +2602,58 @@ static int find_probe_functions(struct map *map, char *name) #define strdup_or_goto(str, label) \ ({ char *__p = strdup(str); if (!__p) goto label; __p; }) +void __weak arch__fix_tev_from_maps(struct perf_probe_event *pev __maybe_unused, + struct probe_trace_event *tev __maybe_unused, + struct map *map __maybe_unused) { } + /* * Find probe function addresses from map. * Return an error or the number of found probe_trace_event */ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, - struct probe_trace_event **tevs, - int max_tevs, const char *target) + struct probe_trace_event **tevs) { struct map *map = NULL; - struct kmap *kmap = NULL; struct ref_reloc_sym *reloc_sym = NULL; struct symbol *sym; + struct symbol **syms = NULL; struct probe_trace_event *tev; struct perf_probe_point *pp = &pev->point; struct probe_trace_point *tp; int num_matched_functions; - int ret, i; + int ret, i, j, skipped = 0; - /* Init maps of given executable or kernel */ - if (pev->uprobes) - map = dso__new_map(target); - else - map = kernel_get_module_map(target); + map = get_target_map(pev->target, pev->uprobes); if (!map) { ret = -EINVAL; goto out; } + syms = malloc(sizeof(struct symbol *) * probe_conf.max_probes); + if (!syms) { + ret = -ENOMEM; + goto out; + } + /* * Load matched symbols: Since the different local symbols may have * same name but different addresses, this lists all the symbols. */ - num_matched_functions = find_probe_functions(map, pp->function); + num_matched_functions = find_probe_functions(map, pp->function, syms); if (num_matched_functions == 0) { pr_err("Failed to find symbol %s in %s\n", pp->function, - target ? : "kernel"); + pev->target ? : "kernel"); ret = -ENOENT; goto out; - } else if (num_matched_functions > max_tevs) { + } else if (num_matched_functions > probe_conf.max_probes) { pr_err("Too many functions matched in %s\n", - target ? : "kernel"); + pev->target ? : "kernel"); ret = -E2BIG; goto out; } if (!pev->uprobes && !pp->retprobe) { - kmap = map__kmap(map); - reloc_sym = kmap->ref_reloc_sym; + reloc_sym = kernel_get_ref_reloc_sym(); if (!reloc_sym) { pr_warning("Relocated base symbol is not found!\n"); ret = -EINVAL; @@ -2273,7 +2670,9 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, ret = 0; - map__for_each_symbol_by_name(map, pp->function, sym) { + for (j = 0; j < num_matched_functions; j++) { + sym = syms[j]; + tev = (*tevs) + ret; tp = &tev->point; if (ret == num_matched_functions) { @@ -2290,16 +2689,24 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, } /* Add one probe point */ tp->address = map->unmap_ip(map, sym->start) + pp->offset; - if (reloc_sym) { + /* If we found a wrong one, mark it by NULL symbol */ + if (!pev->uprobes && + kprobe_warn_out_range(sym->name, tp->address)) { + tp->symbol = NULL; /* Skip it */ + skipped++; + } else if (reloc_sym) { tp->symbol = strdup_or_goto(reloc_sym->name, nomem_out); tp->offset = tp->address - reloc_sym->addr; } else { tp->symbol = strdup_or_goto(sym->name, nomem_out); tp->offset = pp->offset; } + tp->realname = strdup_or_goto(sym->name, nomem_out); + tp->retprobe = pp->retprobe; - if (target) - tev->point.module = strdup_or_goto(target, nomem_out); + if (pev->target) + tev->point.module = strdup_or_goto(pev->target, + nomem_out); tev->uprobes = pev->uprobes; tev->nargs = pev->nargs; if (tev->nargs) { @@ -2321,14 +2728,16 @@ static int find_probe_trace_events_from_map(struct perf_probe_event *pev, strdup_or_goto(pev->args[i].type, nomem_out); } + arch__fix_tev_from_maps(pev, tev, map); + } + if (ret == skipped) { + ret = -ENOENT; + goto err_out; } out: - if (map && pev->uprobes) { - /* Only when using uprobe(exec) map needs to be released */ - dso__delete(map->dso); - map__delete(map); - } + put_target_map(map, pev->uprobes); + free(syms); return ret; nomem_out: @@ -2339,27 +2748,34 @@ err_out: goto out; } +bool __weak arch__prefers_symtab(void) { return false; } + static int convert_to_probe_trace_events(struct perf_probe_event *pev, - struct probe_trace_event **tevs, - int max_tevs, const char *target) + struct probe_trace_event **tevs) { int ret; if (pev->uprobes && !pev->group) { /* Replace group name if not given */ - ret = convert_exec_to_group(target, &pev->group); + ret = convert_exec_to_group(pev->target, &pev->group); if (ret != 0) { pr_warning("Failed to make a group name.\n"); return ret; } } + if (arch__prefers_symtab() && !perf_probe_event_need_dwarf(pev)) { + ret = find_probe_trace_events_from_map(pev, tevs); + if (ret > 0) + return ret; /* Found in symbol table */ + } + /* Convert perf_probe_event with debuginfo */ - ret = try_to_find_probe_trace_events(pev, tevs, max_tevs, target); + ret = try_to_find_probe_trace_events(pev, tevs); if (ret != 0) return ret; /* Found in debuginfo or got an error */ - return find_probe_trace_events_from_map(pev, tevs, max_tevs, target); + return find_probe_trace_events_from_map(pev, tevs); } struct __event_package { @@ -2368,8 +2784,7 @@ struct __event_package { int ntevs; }; -int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_tevs, const char *target, bool force_add) +int add_perf_probe_events(struct perf_probe_event *pevs, int npevs) { int i, j, ret; struct __event_package *pkgs; @@ -2389,20 +2804,24 @@ int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, /* Loop 1: convert all events */ for (i = 0; i < npevs; i++) { pkgs[i].pev = &pevs[i]; + /* Init kprobe blacklist if needed */ + if (!pkgs[i].pev->uprobes) + kprobe_blacklist__init(); /* Convert with or without debuginfo */ ret = convert_to_probe_trace_events(pkgs[i].pev, - &pkgs[i].tevs, - max_tevs, - target); + &pkgs[i].tevs); if (ret < 0) goto end; pkgs[i].ntevs = ret; } + /* This just release blacklist only if allocated */ + kprobe_blacklist__release(); /* Loop 2: add all events */ for (i = 0; i < npevs; i++) { ret = __add_probe_trace_events(pkgs[i].pev, pkgs[i].tevs, - pkgs[i].ntevs, force_add); + pkgs[i].ntevs, + probe_conf.force_add); if (ret < 0) break; } @@ -2454,40 +2873,39 @@ error: return ret; } -static int del_trace_probe_event(int fd, const char *buf, - struct strlist *namelist) +static int del_trace_probe_events(int fd, struct strfilter *filter, + struct strlist *namelist) { - struct str_node *ent, *n; - int ret = -1; + struct str_node *ent; + const char *p; + int ret = -ENOENT; - if (strpbrk(buf, "*?")) { /* Glob-exp */ - strlist__for_each_safe(ent, n, namelist) - if (strglobmatch(ent->s, buf)) { - ret = __del_trace_probe_event(fd, ent); - if (ret < 0) - break; - strlist__remove(namelist, ent); - } - } else { - ent = strlist__find(namelist, buf); - if (ent) { + if (!namelist) + return -ENOENT; + + strlist__for_each(ent, namelist) { + p = strchr(ent->s, ':'); + if ((p && strfilter__compare(filter, p + 1)) || + strfilter__compare(filter, ent->s)) { ret = __del_trace_probe_event(fd, ent); - if (ret >= 0) - strlist__remove(namelist, ent); + if (ret < 0) + break; } } return ret; } -int del_perf_probe_events(struct strlist *dellist) +int del_perf_probe_events(struct strfilter *filter) { - int ret = -1, ufd = -1, kfd = -1; - char buf[128]; - const char *group, *event; - char *p, *str; - struct str_node *ent; + int ret, ret2, ufd = -1, kfd = -1; struct strlist *namelist = NULL, *unamelist = NULL; + char *str = strfilter__string(filter); + + if (!str) + return -EINVAL; + + pr_debug("Delete filter: \'%s\'\n", str); /* Get current event names */ kfd = open_kprobe_events(true); @@ -2500,49 +2918,23 @@ int del_perf_probe_events(struct strlist *dellist) if (kfd < 0 && ufd < 0) { print_both_open_warning(kfd, ufd); + ret = kfd; goto error; } - if (namelist == NULL && unamelist == NULL) + ret = del_trace_probe_events(kfd, filter, namelist); + if (ret < 0 && ret != -ENOENT) goto error; - strlist__for_each(ent, dellist) { - str = strdup(ent->s); - if (str == NULL) { - ret = -ENOMEM; - goto error; - } - pr_debug("Parsing: %s\n", str); - p = strchr(str, ':'); - if (p) { - group = str; - *p = '\0'; - event = p + 1; - } else { - group = "*"; - event = str; - } - - ret = e_snprintf(buf, 128, "%s:%s", group, event); - if (ret < 0) { - pr_err("Failed to copy event."); - free(str); - goto error; - } - - pr_debug("Group: %s, Event: %s\n", group, event); - - if (namelist) - ret = del_trace_probe_event(kfd, buf, namelist); - - if (unamelist && ret != 0) - ret = del_trace_probe_event(ufd, buf, unamelist); - - if (ret != 0) - pr_info("Info: Event \"%s\" does not exist.\n", buf); - - free(str); + ret2 = del_trace_probe_events(ufd, filter, unamelist); + if (ret2 < 0 && ret2 != -ENOENT) { + ret = ret2; + goto error; } + if (ret == -ENOENT && ret2 == -ENOENT) + pr_debug("\"%s\" does not hit any event.\n", str); + /* Note that this is silently ignored */ + ret = 0; error: if (kfd >= 0) { @@ -2554,6 +2946,7 @@ error: strlist__delete(unamelist); close(ufd); } + free(str); return ret; } @@ -2568,8 +2961,7 @@ static struct strfilter *available_func_filter; static int filter_available_functions(struct map *map __maybe_unused, struct symbol *sym) { - if ((sym->binding == STB_GLOBAL || sym->binding == STB_LOCAL) && - strfilter__compare(available_func_filter, sym->name)) + if (strfilter__compare(available_func_filter, sym->name)) return 0; return 1; } @@ -2608,8 +3000,7 @@ int show_available_funcs(const char *target, struct strfilter *_filter, dso__fprintf_symbols_by_name(map->dso, map->type, stdout); end: if (user) { - dso__delete(map->dso); - map__delete(map); + map__put(map); } exit_symbol_maps(); diff --git a/tools/perf/util/probe-event.h b/tools/perf/util/probe-event.h index e01e9943139f..31db6ee7db54 100644 --- a/tools/perf/util/probe-event.h +++ b/tools/perf/util/probe-event.h @@ -6,10 +6,20 @@ #include "strlist.h" #include "strfilter.h" +/* Probe related configurations */ +struct probe_conf { + bool show_ext_vars; + bool show_location_range; + bool force_add; + bool no_inlines; + int max_probes; +}; +extern struct probe_conf probe_conf; extern bool probe_event_dry_run; /* kprobe-tracer and uprobe-tracer tracing point */ struct probe_trace_point { + char *realname; /* function real name (if needed) */ char *symbol; /* Base symbol */ char *module; /* Module name */ unsigned long offset; /* Offset from symbol */ @@ -73,7 +83,8 @@ struct perf_probe_event { char *group; /* Group name */ struct perf_probe_point point; /* Probe point */ int nargs; /* Number of arguments */ - bool uprobes; + bool uprobes; /* Uprobe event flag */ + char *target; /* Target binary */ struct perf_probe_arg *args; /* Arguments */ }; @@ -120,21 +131,18 @@ extern void line_range__clear(struct line_range *lr); /* Initialize line range */ extern int line_range__init(struct line_range *lr); -/* Internal use: Return kernel/module path */ -extern const char *kernel_get_module_path(const char *module); - -extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs, - int max_probe_points, const char *module, - bool force_add); -extern int del_perf_probe_events(struct strlist *dellist); -extern int show_perf_probe_events(void); +extern int add_perf_probe_events(struct perf_probe_event *pevs, int npevs); +extern int del_perf_probe_events(struct strfilter *filter); +extern int show_perf_probe_events(struct strfilter *filter); extern int show_line_range(struct line_range *lr, const char *module, bool user); extern int show_available_vars(struct perf_probe_event *pevs, int npevs, - int max_probe_points, const char *module, - struct strfilter *filter, bool externs); + struct strfilter *filter); extern int show_available_funcs(const char *module, struct strfilter *filter, bool user); +bool arch__prefers_symtab(void); +void arch__fix_tev_from_maps(struct perf_probe_event *pev, + struct probe_trace_event *tev, struct map *map); /* Maximum index number of event-name postfix */ #define MAX_EVENT_INDEX 1024 diff --git a/tools/perf/util/probe-finder.c b/tools/perf/util/probe-finder.c index b5247d777f0e..2da65a710893 100644 --- a/tools/perf/util/probe-finder.c +++ b/tools/perf/util/probe-finder.c @@ -130,7 +130,7 @@ struct debuginfo *debuginfo__new(const char *path) continue; dinfo = __debuginfo__new(buf); } - dso__delete(dso); + dso__put(dso); out: /* if failed to open all distro debuginfo, open given binary */ @@ -177,7 +177,7 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, Dwarf_Word offs = 0; bool ref = false; const char *regs; - int ret; + int ret, ret2 = 0; if (dwarf_attr(vr_die, DW_AT_external, &attr) != NULL) goto static_var; @@ -187,9 +187,19 @@ static int convert_variable_location(Dwarf_Die *vr_die, Dwarf_Addr addr, return -EINVAL; /* Broken DIE ? */ if (dwarf_getlocation_addr(&attr, addr, &op, &nops, 1) <= 0) { ret = dwarf_entrypc(sp_die, &tmp); - if (ret || addr != tmp || - dwarf_tag(vr_die) != DW_TAG_formal_parameter || - dwarf_highpc(sp_die, &tmp)) + if (ret) + return -ENOENT; + + if (probe_conf.show_location_range && + (dwarf_tag(vr_die) == DW_TAG_variable)) { + ret2 = -ERANGE; + } else if (addr != tmp || + dwarf_tag(vr_die) != DW_TAG_formal_parameter) { + return -ENOENT; + } + + ret = dwarf_highpc(sp_die, &tmp); + if (ret) return -ENOENT; /* * This is fuzzed by fentry mcount. We try to find the @@ -210,7 +220,7 @@ found: if (op->atom == DW_OP_addr) { static_var: if (!tvar) - return 0; + return ret2; /* Static variables on memory (not stack), make @varname */ ret = strlen(dwarf_diename(vr_die)); tvar->value = zalloc(ret + 2); @@ -220,7 +230,7 @@ static_var: tvar->ref = alloc_trace_arg_ref((long)offs); if (tvar->ref == NULL) return -ENOMEM; - return 0; + return ret2; } /* If this is based on frame buffer, set the offset */ @@ -250,14 +260,14 @@ static_var: } if (!tvar) - return 0; + return ret2; regs = get_arch_regstr(regn); if (!regs) { /* This should be a bug in DWARF or this tool */ pr_warning("Mapping for the register number %u " "missing on this architecture.\n", regn); - return -ERANGE; + return -ENOTSUP; } tvar->value = strdup(regs); @@ -269,7 +279,7 @@ static_var: if (tvar->ref == NULL) return -ENOMEM; } - return 0; + return ret2; } #define BYTES_TO_BITS(nb) ((nb) * BITS_PER_LONG / sizeof(long)) @@ -456,11 +466,12 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, return -EINVAL; } if (field->name[0] == '[') { - pr_err("Semantic error: %s is not a pointor" + pr_err("Semantic error: %s is not a pointer" " nor array.\n", varname); return -EINVAL; } - if (field->ref) { + /* While prcessing unnamed field, we don't care about this */ + if (field->ref && dwarf_diename(vr_die)) { pr_err("Semantic error: %s must be referred by '.'\n", field->name); return -EINVAL; @@ -491,6 +502,11 @@ static int convert_variable_fields(Dwarf_Die *vr_die, const char *varname, } ref->offset += (long)offs; + /* If this member is unnamed, we need to reuse this field */ + if (!dwarf_diename(die_mem)) + return convert_variable_fields(die_mem, varname, field, + &ref, die_mem); + next: /* Converting next field */ if (field->next) @@ -511,10 +527,12 @@ static int convert_variable(Dwarf_Die *vr_die, struct probe_finder *pf) ret = convert_variable_location(vr_die, pf->addr, pf->fb_ops, &pf->sp_die, pf->tvar); - if (ret == -ENOENT || ret == -EINVAL) - pr_err("Failed to find the location of %s at this address.\n" - " Perhaps, it has been optimized out.\n", pf->pvar->var); - else if (ret == -ENOTSUP) + if (ret == -ENOENT || ret == -EINVAL) { + pr_err("Failed to find the location of the '%s' variable at this address.\n" + " Perhaps it has been optimized out.\n" + " Use -V with the --range option to show '%s' location range.\n", + pf->pvar->var, pf->pvar->var); + } else if (ret == -ENOTSUP) pr_err("Sorry, we don't support this variable location yet.\n"); else if (ret == 0 && pf->pvar->field) { ret = convert_variable_fields(vr_die, pf->pvar->var, @@ -572,10 +590,12 @@ static int find_variable(Dwarf_Die *sc_die, struct probe_finder *pf) /* Search child die for local variables and parameters. */ if (!die_find_variable_at(sc_die, pf->pvar->var, pf->addr, &vr_die)) { /* Search again in global variables */ - if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, 0, &vr_die)) + if (!die_find_variable_at(&pf->cu_die, pf->pvar->var, + 0, &vr_die)) { pr_warning("Failed to find '%s' in this function.\n", pf->pvar->var); ret = -ENOENT; + } } if (ret >= 0) ret = convert_variable(&vr_die, pf); @@ -654,9 +674,15 @@ static int call_probe_finder(Dwarf_Die *sc_die, struct probe_finder *pf) /* If not a real subprogram, find a real one */ if (!die_is_func_def(sc_die)) { if (!die_find_realfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { - pr_warning("Failed to find probe point in any " - "functions.\n"); - return -ENOENT; + if (die_find_tailfunc(&pf->cu_die, pf->addr, &pf->sp_die)) { + pr_warning("Ignoring tail call from %s\n", + dwarf_diename(&pf->sp_die)); + return 0; + } else { + pr_warning("Failed to find probe point in any " + "functions.\n"); + return -ENOENT; + } } } else memcpy(&pf->sp_die, sc_die, sizeof(Dwarf_Die)); @@ -711,7 +737,7 @@ static int find_best_scope_cb(Dwarf_Die *fn_die, void *data) } /* If the function name is given, that's what user expects */ if (fsp->function) { - if (die_compare_name(fn_die, fsp->function)) { + if (die_match_name(fn_die, fsp->function)) { memcpy(fsp->die_mem, fn_die, sizeof(Dwarf_Die)); fsp->found = true; return 1; @@ -849,11 +875,22 @@ static int probe_point_lazy_walker(const char *fname, int lineno, static int find_probe_point_lazy(Dwarf_Die *sp_die, struct probe_finder *pf) { int ret = 0; + char *fpath; if (intlist__empty(pf->lcache)) { + const char *comp_dir; + + comp_dir = cu_get_comp_dir(&pf->cu_die); + ret = get_real_path(pf->fname, comp_dir, &fpath); + if (ret < 0) { + pr_warning("Failed to find source file path.\n"); + return ret; + } + /* Matching lazy line pattern */ - ret = find_lazy_match_lines(pf->lcache, pf->fname, + ret = find_lazy_match_lines(pf->lcache, fpath, pf->pev->point.lazy_line); + free(fpath); if (ret <= 0) return ret; } @@ -903,37 +940,44 @@ static int probe_point_search_cb(Dwarf_Die *sp_die, void *data) /* Check tag and diename */ if (!die_is_func_def(sp_die) || - !die_compare_name(sp_die, pp->function)) + !die_match_name(sp_die, pp->function)) return DWARF_CB_OK; /* Check declared file */ if (pp->file && strtailcmp(pp->file, dwarf_decl_file(sp_die))) return DWARF_CB_OK; + pr_debug("Matched function: %s\n", dwarf_diename(sp_die)); pf->fname = dwarf_decl_file(sp_die); if (pp->line) { /* Function relative line */ dwarf_decl_line(sp_die, &pf->lno); pf->lno += pp->line; param->retval = find_probe_point_by_line(pf); - } else if (!dwarf_func_inline(sp_die)) { + } else if (die_is_func_instance(sp_die)) { + /* Instances always have the entry address */ + dwarf_entrypc(sp_die, &pf->addr); /* Real function */ if (pp->lazy_line) param->retval = find_probe_point_lazy(sp_die, pf); else { - if (dwarf_entrypc(sp_die, &pf->addr) != 0) { - pr_warning("Failed to get entry address of " - "%s.\n", dwarf_diename(sp_die)); - param->retval = -ENOENT; - return DWARF_CB_ABORT; - } pf->addr += pp->offset; /* TODO: Check the address in this function */ param->retval = call_probe_finder(sp_die, pf); } - } else + } else if (!probe_conf.no_inlines) { /* Inlined function: search instances */ param->retval = die_walk_instances(sp_die, probe_point_inline_cb, (void *)pf); + /* This could be a non-existed inline definition */ + if (param->retval == -ENOENT && strisglob(pp->function)) + param->retval = 0; + } + + /* We need to find other candidates */ + if (strisglob(pp->function) && param->retval >= 0) { + param->retval = 0; /* We have to clear the result */ + return DWARF_CB_OK; + } return DWARF_CB_ABORT; /* Exit; no same symbol in this CU. */ } @@ -962,7 +1006,7 @@ static int pubname_search_cb(Dwarf *dbg, Dwarf_Global *gl, void *data) if (dwarf_tag(param->sp_die) != DW_TAG_subprogram) return DWARF_CB_OK; - if (die_compare_name(param->sp_die, param->function)) { + if (die_match_name(param->sp_die, param->function)) { if (!dwarf_offdie(dbg, gl->cu_offset, param->cu_die)) return DWARF_CB_OK; @@ -1015,7 +1059,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, return -ENOMEM; /* Fastpath: lookup by function name from .debug_pubnames section */ - if (pp->function) { + if (pp->function && !strisglob(pp->function)) { struct pubname_callback_param pubname_param = { .function = pp->function, .file = pp->file, @@ -1053,7 +1097,7 @@ static int debuginfo__find_probes(struct debuginfo *dbg, if (pp->function) ret = find_probe_point_by_func(pf); else if (pp->lazy_line) - ret = find_probe_point_lazy(NULL, pf); + ret = find_probe_point_lazy(&pf->cu_die, pf); else { pf->lno = pp->line; ret = find_probe_point_by_line(pf); @@ -1074,6 +1118,7 @@ found: struct local_vars_finder { struct probe_finder *pf; struct perf_probe_arg *args; + bool vars; int max_args; int nargs; int ret; @@ -1088,7 +1133,7 @@ static int copy_variables_cb(Dwarf_Die *die_mem, void *data) tag = dwarf_tag(die_mem); if (tag == DW_TAG_formal_parameter || - tag == DW_TAG_variable) { + (tag == DW_TAG_variable && vf->vars)) { if (convert_variable_location(die_mem, vf->pf->addr, vf->pf->fb_ops, &pf->sp_die, NULL) == 0) { @@ -1114,26 +1159,28 @@ static int expand_probe_args(Dwarf_Die *sc_die, struct probe_finder *pf, Dwarf_Die die_mem; int i; int n = 0; - struct local_vars_finder vf = {.pf = pf, .args = args, + struct local_vars_finder vf = {.pf = pf, .args = args, .vars = false, .max_args = MAX_PROBE_ARGS, .ret = 0}; for (i = 0; i < pf->pev->nargs; i++) { /* var never be NULL */ - if (strcmp(pf->pev->args[i].var, "$vars") == 0) { - pr_debug("Expanding $vars into:"); - vf.nargs = n; - /* Special local variables */ - die_find_child(sc_die, copy_variables_cb, (void *)&vf, - &die_mem); - pr_debug(" (%d)\n", vf.nargs - n); - if (vf.ret < 0) - return vf.ret; - n = vf.nargs; - } else { + if (strcmp(pf->pev->args[i].var, PROBE_ARG_VARS) == 0) + vf.vars = true; + else if (strcmp(pf->pev->args[i].var, PROBE_ARG_PARAMS) != 0) { /* Copy normal argument */ args[n] = pf->pev->args[i]; n++; + continue; } + pr_debug("Expanding %s into:", pf->pev->args[i].var); + vf.nargs = n; + /* Special local variables */ + die_find_child(sc_die, copy_variables_cb, (void *)&vf, + &die_mem); + pr_debug(" (%d)\n", vf.nargs - n); + if (vf.ret < 0) + return vf.ret; + n = vf.nargs; } return n; } @@ -1161,6 +1208,10 @@ static int add_probe_trace_event(Dwarf_Die *sc_die, struct probe_finder *pf) if (ret < 0) return ret; + tev->point.realname = strdup(dwarf_diename(sc_die)); + if (!tev->point.realname) + return -ENOMEM; + pr_debug("Probe point found: %s+%lu\n", tev->point.symbol, tev->point.offset); @@ -1198,15 +1249,15 @@ end: /* Find probe_trace_events specified by perf_probe_event from debuginfo */ int debuginfo__find_trace_events(struct debuginfo *dbg, struct perf_probe_event *pev, - struct probe_trace_event **tevs, int max_tevs) + struct probe_trace_event **tevs) { struct trace_event_finder tf = { .pf = {.pev = pev, .callback = add_probe_trace_event}, - .mod = dbg->mod, .max_tevs = max_tevs}; + .max_tevs = probe_conf.max_probes, .mod = dbg->mod}; int ret; /* Allocate result tevs array */ - *tevs = zalloc(sizeof(struct probe_trace_event) * max_tevs); + *tevs = zalloc(sizeof(struct probe_trace_event) * tf.max_tevs); if (*tevs == NULL) return -ENOMEM; @@ -1222,14 +1273,11 @@ int debuginfo__find_trace_events(struct debuginfo *dbg, return (ret < 0) ? ret : tf.ntevs; } -#define MAX_VAR_LEN 64 - /* Collect available variables in this scope */ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) { struct available_var_finder *af = data; struct variable_list *vl; - char buf[MAX_VAR_LEN]; int tag, ret; vl = &af->vls[af->nvls - 1]; @@ -1240,11 +1288,38 @@ static int collect_variables_cb(Dwarf_Die *die_mem, void *data) ret = convert_variable_location(die_mem, af->pf.addr, af->pf.fb_ops, &af->pf.sp_die, NULL); - if (ret == 0) { - ret = die_get_varname(die_mem, buf, MAX_VAR_LEN); - pr_debug2("Add new var: %s\n", buf); - if (ret > 0) - strlist__add(vl->vars, buf); + if (ret == 0 || ret == -ERANGE) { + int ret2; + bool externs = !af->child; + struct strbuf buf; + + strbuf_init(&buf, 64); + + if (probe_conf.show_location_range) { + if (!externs) { + if (ret) + strbuf_addf(&buf, "[INV]\t"); + else + strbuf_addf(&buf, "[VAL]\t"); + } else + strbuf_addf(&buf, "[EXT]\t"); + } + + ret2 = die_get_varname(die_mem, &buf); + + if (!ret2 && probe_conf.show_location_range && + !externs) { + strbuf_addf(&buf, "\t"); + ret2 = die_get_var_range(&af->pf.sp_die, + die_mem, &buf); + } + + pr_debug("Add new var: %s\n", buf.buf); + if (ret2 == 0) { + strlist__add(vl->vars, + strbuf_detach(&buf, NULL)); + } + strbuf_release(&buf); } } @@ -1287,9 +1362,9 @@ static int add_available_vars(Dwarf_Die *sc_die, struct probe_finder *pf) die_find_child(sc_die, collect_variables_cb, (void *)af, &die_mem); /* Find external variables */ - if (!af->externs) + if (!probe_conf.show_ext_vars) goto out; - /* Don't need to search child DIE for externs. */ + /* Don't need to search child DIE for external vars. */ af->child = false; die_find_child(&pf->cu_die, collect_variables_cb, (void *)af, &die_mem); @@ -1309,17 +1384,16 @@ out: */ int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, - struct variable_list **vls, - int max_vls, bool externs) + struct variable_list **vls) { struct available_var_finder af = { .pf = {.pev = pev, .callback = add_available_vars}, .mod = dbg->mod, - .max_vls = max_vls, .externs = externs}; + .max_vls = probe_conf.max_probes}; int ret; /* Allocate result vls array */ - *vls = zalloc(sizeof(struct variable_list) * max_vls); + *vls = zalloc(sizeof(struct variable_list) * af.max_vls); if (*vls == NULL) return -ENOMEM; @@ -1349,11 +1423,8 @@ int debuginfo__find_probe_point(struct debuginfo *dbg, unsigned long addr, const char *fname = NULL, *func = NULL, *basefunc = NULL, *tmp; int baseline = 0, lineno = 0, ret = 0; - /* Adjust address with bias */ - addr += dbg->bias; - /* Find cu die */ - if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr - dbg->bias, &cudie)) { + if (!dwarf_addrdie(dbg->dbg, (Dwarf_Addr)addr, &cudie)) { pr_warning("Failed to find debug information for address %lx\n", addr); ret = -EINVAL; @@ -1523,7 +1594,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) return DWARF_CB_OK; if (die_is_func_def(sp_die) && - die_compare_name(sp_die, lr->function)) { + die_match_name(sp_die, lr->function)) { lf->fname = dwarf_decl_file(sp_die); dwarf_decl_line(sp_die, &lr->offset); pr_debug("fname: %s, lineno:%d\n", lf->fname, lr->offset); @@ -1536,7 +1607,7 @@ static int line_range_search_cb(Dwarf_Die *sp_die, void *data) pr_debug("New line range: %d to %d\n", lf->lno_s, lf->lno_e); lr->start = lf->lno_s; lr->end = lf->lno_e; - if (dwarf_func_inline(sp_die)) + if (!die_is_func_instance(sp_die)) param->retval = die_walk_instances(sp_die, line_range_inline_cb, lf); else @@ -1623,3 +1694,61 @@ found: return (ret < 0) ? ret : lf.found; } +/* + * Find a src file from a DWARF tag path. Prepend optional source path prefix + * and chop off leading directories that do not exist. Result is passed back as + * a newly allocated path on success. + * Return 0 if file was found and readable, -errno otherwise. + */ +int get_real_path(const char *raw_path, const char *comp_dir, + char **new_path) +{ + const char *prefix = symbol_conf.source_prefix; + + if (!prefix) { + if (raw_path[0] != '/' && comp_dir) + /* If not an absolute path, try to use comp_dir */ + prefix = comp_dir; + else { + if (access(raw_path, R_OK) == 0) { + *new_path = strdup(raw_path); + return *new_path ? 0 : -ENOMEM; + } else + return -errno; + } + } + + *new_path = malloc((strlen(prefix) + strlen(raw_path) + 2)); + if (!*new_path) + return -ENOMEM; + + for (;;) { + sprintf(*new_path, "%s/%s", prefix, raw_path); + + if (access(*new_path, R_OK) == 0) + return 0; + + if (!symbol_conf.source_prefix) { + /* In case of searching comp_dir, don't retry */ + zfree(new_path); + return -errno; + } + + switch (errno) { + case ENAMETOOLONG: + case ENOENT: + case EROFS: + case EFAULT: + raw_path = strchr(++raw_path, '/'); + if (!raw_path) { + zfree(new_path); + return -ENOENT; + } + continue; + + default: + zfree(new_path); + return -errno; + } + } +} diff --git a/tools/perf/util/probe-finder.h b/tools/perf/util/probe-finder.h index 92590b2c7e1c..bed82716e1b4 100644 --- a/tools/perf/util/probe-finder.h +++ b/tools/perf/util/probe-finder.h @@ -10,6 +10,9 @@ #define MAX_PROBES 128 #define MAX_PROBE_ARGS 128 +#define PROBE_ARG_VARS "$vars" +#define PROBE_ARG_PARAMS "$params" + static inline int is_c_varname(const char *name) { /* TODO */ @@ -37,8 +40,7 @@ extern void debuginfo__delete(struct debuginfo *dbg); /* Find probe_trace_events specified by perf_probe_event from debuginfo */ extern int debuginfo__find_trace_events(struct debuginfo *dbg, struct perf_probe_event *pev, - struct probe_trace_event **tevs, - int max_tevs); + struct probe_trace_event **tevs); /* Find a perf_probe_point from debuginfo */ extern int debuginfo__find_probe_point(struct debuginfo *dbg, @@ -52,8 +54,11 @@ extern int debuginfo__find_line_range(struct debuginfo *dbg, /* Find available variables */ extern int debuginfo__find_available_vars_at(struct debuginfo *dbg, struct perf_probe_event *pev, - struct variable_list **vls, - int max_points, bool externs); + struct variable_list **vls); + +/* Find a src file from a DWARF tag path */ +int get_real_path(const char *raw_path, const char *comp_dir, + char **new_path); struct probe_finder { struct perf_probe_event *pev; /* Target probe event */ @@ -92,7 +97,6 @@ struct available_var_finder { struct variable_list *vls; /* Found variable lists */ int nvls; /* Number of variable lists */ int max_vls; /* Max no. of variable lists */ - bool externs; /* Find external vars too */ bool child; /* Search child scopes */ }; diff --git a/tools/perf/util/pstack.c b/tools/perf/util/pstack.c index a126e6cc6e73..b234a6e3d0d4 100644 --- a/tools/perf/util/pstack.c +++ b/tools/perf/util/pstack.c @@ -74,3 +74,10 @@ void *pstack__pop(struct pstack *pstack) pstack->entries[pstack->top] = NULL; return ret; } + +void *pstack__peek(struct pstack *pstack) +{ + if (pstack->top == 0) + return NULL; + return pstack->entries[pstack->top - 1]; +} diff --git a/tools/perf/util/pstack.h b/tools/perf/util/pstack.h index c3cb6584d527..ded7f2e36624 100644 --- a/tools/perf/util/pstack.h +++ b/tools/perf/util/pstack.h @@ -10,5 +10,6 @@ bool pstack__empty(const struct pstack *pstack); void pstack__remove(struct pstack *pstack, void *key); void pstack__push(struct pstack *pstack, void *key); void *pstack__pop(struct pstack *pstack); +void *pstack__peek(struct pstack *pstack); #endif /* _PERF_PSTACK_ */ diff --git a/tools/perf/util/python-ext-sources b/tools/perf/util/python-ext-sources index 6c6a6953fa93..0766d98c5da5 100644 --- a/tools/perf/util/python-ext-sources +++ b/tools/perf/util/python-ext-sources @@ -10,13 +10,14 @@ util/ctype.c util/evlist.c util/evsel.c util/cpumap.c -../../lib/hweight.c +../lib/hweight.c util/thread_map.c util/util.c util/xyarray.c util/cgroup.c util/rblist.c +util/stat.c util/strlist.c -../lib/api/fs/fs.c util/trace-event.c -../../lib/rbtree.c +../lib/rbtree.c +util/string.c diff --git a/tools/perf/util/python.c b/tools/perf/util/python.c index d906d0ad5d40..626422eda727 100644 --- a/tools/perf/util/python.c +++ b/tools/perf/util/python.c @@ -384,7 +384,7 @@ static int pyrf_cpu_map__init(struct pyrf_cpu_map *pcpus, static void pyrf_cpu_map__delete(struct pyrf_cpu_map *pcpus) { - cpu_map__delete(pcpus->cpus); + cpu_map__put(pcpus->cpus); pcpus->ob_type->tp_free((PyObject*)pcpus); } @@ -453,7 +453,7 @@ static int pyrf_thread_map__init(struct pyrf_thread_map *pthreads, static void pyrf_thread_map__delete(struct pyrf_thread_map *pthreads) { - thread_map__delete(pthreads->threads); + thread_map__put(pthreads->threads); pthreads->ob_type->tp_free((PyObject*)pthreads); } diff --git a/tools/perf/util/record.c b/tools/perf/util/record.c index 8acd0df88b5c..1f7becbe5e18 100644 --- a/tools/perf/util/record.c +++ b/tools/perf/util/record.c @@ -20,7 +20,7 @@ static int perf_do_probe_api(setup_probe_fn_t fn, int cpu, const char *str) if (!evlist) return -ENOMEM; - if (parse_events(evlist, str)) + if (parse_events(evlist, str, NULL)) goto out_delete; evsel = perf_evlist__first(evlist); @@ -64,7 +64,7 @@ static bool perf_probe_api(setup_probe_fn_t fn) if (!cpus) return false; cpu = cpus->map[0]; - cpu_map__delete(cpus); + cpu_map__put(cpus); do { ret = perf_do_probe_api(fn, cpu, try[i++]); @@ -119,7 +119,16 @@ void perf_evlist__config(struct perf_evlist *evlist, struct record_opts *opts) evsel->attr.comm_exec = 1; } - if (evlist->nr_entries > 1) { + if (opts->full_auxtrace) { + /* + * Need to be able to synthesize and parse selected events with + * arbitrary sample types, which requires always being able to + * match the id. + */ + use_sample_identifier = perf_can_sample_identifier(); + evlist__for_each(evlist, evsel) + perf_evsel__set_sample_id(evsel, use_sample_identifier); + } else if (evlist->nr_entries > 1) { struct perf_evsel *first = perf_evlist__first(evlist); evlist__for_each(evlist, evsel) { @@ -207,7 +216,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str) if (!temp_evlist) return false; - err = parse_events(temp_evlist, str); + err = parse_events(temp_evlist, str, NULL); if (err) goto out_delete; @@ -217,7 +226,7 @@ bool perf_evlist__can_select_event(struct perf_evlist *evlist, const char *str) struct cpu_map *cpus = cpu_map__new(NULL); cpu = cpus ? cpus->map[0] : 0; - cpu_map__delete(cpus); + cpu_map__put(cpus); } else { cpu = evlist->cpus->map[0]; } diff --git a/tools/perf/util/scripting-engines/Build b/tools/perf/util/scripting-engines/Build new file mode 100644 index 000000000000..6516e220c247 --- /dev/null +++ b/tools/perf/util/scripting-engines/Build @@ -0,0 +1,6 @@ +libperf-$(CONFIG_LIBPERL) += trace-event-perl.o +libperf-$(CONFIG_LIBPYTHON) += trace-event-python.o + +CFLAGS_trace-event-perl.o += $(PERL_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow -Wno-undef -Wno-switch-default + +CFLAGS_trace-event-python.o += $(PYTHON_EMBED_CCOPTS) -Wno-redundant-decls -Wno-strict-prototypes -Wno-unused-parameter -Wno-shadow diff --git a/tools/perf/util/scripting-engines/trace-event-perl.c b/tools/perf/util/scripting-engines/trace-event-perl.c index 22ebc46226e7..1bd593bbf7a5 100644 --- a/tools/perf/util/scripting-engines/trace-event-perl.c +++ b/tools/perf/util/scripting-engines/trace-event-perl.c @@ -55,10 +55,10 @@ void xs_init(pTHX) INTERP my_perl; -#define FTRACE_MAX_EVENT \ +#define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) -static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT); +static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); extern struct scripting_context *scripting_context; @@ -214,6 +214,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_BSTRING: case PRINT_DYNAMIC_ARRAY: case PRINT_STRING: @@ -355,10 +360,9 @@ static void perl_process_event_generic(union perf_event *event, static void perl_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al __maybe_unused) + struct addr_location *al) { - perl_process_tracepoint(sample, evsel, thread); + perl_process_tracepoint(sample, evsel, al->thread); perl_process_event_generic(event, sample, evsel); } diff --git a/tools/perf/util/scripting-engines/trace-event-python.c b/tools/perf/util/scripting-engines/trace-event-python.c index 0c815a40a6e8..ace2484985cb 100644 --- a/tools/perf/util/scripting-engines/trace-event-python.c +++ b/tools/perf/util/scripting-engines/trace-event-python.c @@ -44,10 +44,10 @@ PyMODINIT_FUNC initperf_trace_context(void); -#define FTRACE_MAX_EVENT \ +#define TRACE_EVENT_TYPE_MAX \ ((1 << (sizeof(unsigned short) * 8)) - 1) -static DECLARE_BITMAP(events_defined, FTRACE_MAX_EVENT); +static DECLARE_BITMAP(events_defined, TRACE_EVENT_TYPE_MAX); #define MAX_FIELDS 64 #define N_COMMON_FIELDS 7 @@ -231,6 +231,11 @@ static void define_event_symbols(struct event_format *event, define_event_symbols(event, ev_name, args->hex.field); define_event_symbols(event, ev_name, args->hex.size); break; + case PRINT_INT_ARRAY: + define_event_symbols(event, ev_name, args->int_array.field); + define_event_symbols(event, ev_name, args->int_array.count); + define_event_symbols(event, ev_name, args->int_array.el_size); + break; case PRINT_STRING: break; case PRINT_TYPE: @@ -376,7 +381,6 @@ exit: static void python_process_tracepoint(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct event_format *event = evsel->tp_format; @@ -390,7 +394,7 @@ static void python_process_tracepoint(struct perf_sample *sample, int cpu = sample->cpu; void *data = sample->raw_data; unsigned long long nsecs = sample->time; - const char *comm = thread__comm_str(thread); + const char *comm = thread__comm_str(al->thread); t = PyTuple_New(MAX_FIELDS); if (!t) @@ -675,7 +679,7 @@ static int python_export_sample(struct db_export *dbe, tuple_set_u64(t, 0, es->db_id); tuple_set_u64(t, 1, es->evsel->db_id); tuple_set_u64(t, 2, es->al->machine->db_id); - tuple_set_u64(t, 3, es->thread->db_id); + tuple_set_u64(t, 3, es->al->thread->db_id); tuple_set_u64(t, 4, es->comm_db_id); tuple_set_u64(t, 5, es->dso_db_id); tuple_set_u64(t, 6, es->sym_db_id); @@ -761,7 +765,6 @@ static int python_process_call_return(struct call_return *cr, void *data) static void python_process_general_event(struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { PyObject *handler, *t, *dict, *callchain, *dict_sample; @@ -811,7 +814,7 @@ static void python_process_general_event(struct perf_sample *sample, pydict_set_item_string_decref(dict, "raw_buf", PyString_FromStringAndSize( (const char *)sample->raw_data, sample->raw_size)); pydict_set_item_string_decref(dict, "comm", - PyString_FromString(thread__comm_str(thread))); + PyString_FromString(thread__comm_str(al->thread))); if (al->map) { pydict_set_item_string_decref(dict, "dso", PyString_FromString(al->map->dso->name)); @@ -838,22 +841,20 @@ exit: static void python_process_event(union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, struct addr_location *al) { struct tables *tables = &tables_global; switch (evsel->attr.type) { case PERF_TYPE_TRACEPOINT: - python_process_tracepoint(sample, evsel, thread, al); + python_process_tracepoint(sample, evsel, al); break; /* Reserve for future process_hw/sw/raw APIs */ default: if (tables->db_export_mode) - db_export__sample(&tables->dbe, event, sample, evsel, - thread, al); + db_export__sample(&tables->dbe, event, sample, evsel, al); else - python_process_general_event(sample, evsel, thread, al); + python_process_general_event(sample, evsel, al); } } diff --git a/tools/perf/util/session.c b/tools/perf/util/session.c index 0baf75f12b7c..ed9dc2555ec7 100644 --- a/tools/perf/util/session.c +++ b/tools/perf/util/session.c @@ -15,6 +15,14 @@ #include "cpumap.h" #include "perf_regs.h" #include "asm/bug.h" +#include "auxtrace.h" +#include "thread-stack.h" + +static int perf_session__deliver_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, + u64 file_offset); static int perf_session__open(struct perf_session *session) { @@ -86,6 +94,23 @@ static void perf_session__set_comm_exec(struct perf_session *session) machines__set_comm_exec(&session->machines, comm_exec); } +static int ordered_events__deliver_event(struct ordered_events *oe, + struct ordered_event *event) +{ + struct perf_sample sample; + struct perf_session *session = container_of(oe, struct perf_session, + ordered_events); + int ret = perf_evlist__parse_sample(session->evlist, event->event, &sample); + + if (ret) { + pr_err("Can't parse sample, err = %d\n", ret); + return ret; + } + + return perf_session__deliver_event(session, event->event, &sample, + session->tool, event->file_offset); +} + struct perf_session *perf_session__new(struct perf_data_file *file, bool repipe, struct perf_tool *tool) { @@ -95,8 +120,10 @@ struct perf_session *perf_session__new(struct perf_data_file *file, goto out; session->repipe = repipe; - ordered_events__init(&session->ordered_events); + session->tool = tool; + INIT_LIST_HEAD(&session->auxtrace_index); machines__init(&session->machines); + ordered_events__init(&session->ordered_events, ordered_events__deliver_event); if (file) { if (perf_data_file__open(file)) @@ -138,11 +165,6 @@ struct perf_session *perf_session__new(struct perf_data_file *file, return NULL; } -static void perf_session__delete_dead_threads(struct perf_session *session) -{ - machine__delete_dead_threads(&session->machines.host); -} - static void perf_session__delete_threads(struct perf_session *session) { machine__delete_threads(&session->machines.host); @@ -166,8 +188,9 @@ static void perf_session_env__delete(struct perf_session_env *env) void perf_session__delete(struct perf_session *session) { + auxtrace__free(session); + auxtrace_index__free(&session->auxtrace_index); perf_session__destroy_kernel_maps(session); - perf_session__delete_dead_threads(session); perf_session__delete_threads(session); perf_session_env__delete(&session->header.env); machines__exit(&session->machines); @@ -215,10 +238,17 @@ static int process_event_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_build_id_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *perf_session - __maybe_unused) + struct ordered_events *oe __maybe_unused) { dump_printf(": unhandled!\n"); return 0; @@ -226,7 +256,7 @@ static int process_finished_round_stub(struct perf_tool *tool __maybe_unused, static int process_finished_round(struct perf_tool *tool, union perf_event *event, - struct perf_session *session); + struct ordered_events *oe); static int process_id_index_stub(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, @@ -237,6 +267,49 @@ static int process_id_index_stub(struct perf_tool *tool __maybe_unused, return 0; } +static int process_event_auxtrace_info_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + +static int skipn(int fd, off_t n) +{ + char buf[4096]; + ssize_t ret; + + while (n > 0) { + ret = read(fd, buf, min(n, (off_t)sizeof(buf))); + if (ret <= 0) + return ret; + n -= ret; + } + + return 0; +} + +static s64 process_event_auxtrace_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event, + struct perf_session *session + __maybe_unused) +{ + dump_printf(": unhandled!\n"); + if (perf_data_file__is_pipe(session->file)) + skipn(perf_data_file__fd(session->file), event->auxtrace.size); + return event->auxtrace.size; +} + +static +int process_event_auxtrace_error_stub(struct perf_tool *tool __maybe_unused, + union perf_event *event __maybe_unused, + struct perf_session *session __maybe_unused) +{ + dump_printf(": unhandled!\n"); + return 0; +} + void perf_tool__fill_defaults(struct perf_tool *tool) { if (tool->sample == NULL) @@ -253,6 +326,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool) tool->exit = process_event_stub; if (tool->lost == NULL) tool->lost = perf_event__process_lost; + if (tool->lost_samples == NULL) + tool->lost_samples = perf_event__process_lost_samples; + if (tool->aux == NULL) + tool->aux = perf_event__process_aux; + if (tool->itrace_start == NULL) + tool->itrace_start = perf_event__process_itrace_start; if (tool->read == NULL) tool->read = process_event_sample_stub; if (tool->throttle == NULL) @@ -264,7 +343,7 @@ void perf_tool__fill_defaults(struct perf_tool *tool) if (tool->tracing_data == NULL) tool->tracing_data = process_event_synth_tracing_data_stub; if (tool->build_id == NULL) - tool->build_id = process_finished_round_stub; + tool->build_id = process_build_id_stub; if (tool->finished_round == NULL) { if (tool->ordered_events) tool->finished_round = process_finished_round; @@ -273,6 +352,12 @@ void perf_tool__fill_defaults(struct perf_tool *tool) } if (tool->id_index == NULL) tool->id_index = process_id_index_stub; + if (tool->auxtrace_info == NULL) + tool->auxtrace_info = process_event_auxtrace_info_stub; + if (tool->auxtrace == NULL) + tool->auxtrace = process_event_auxtrace_stub; + if (tool->auxtrace_error == NULL) + tool->auxtrace_error = process_event_auxtrace_error_stub; } static void swap_sample_id_all(union perf_event *event, void *data) @@ -365,6 +450,26 @@ static void perf_event__read_swap(union perf_event *event, bool sample_id_all) swap_sample_id_all(event, &event->read + 1); } +static void perf_event__aux_swap(union perf_event *event, bool sample_id_all) +{ + event->aux.aux_offset = bswap_64(event->aux.aux_offset); + event->aux.aux_size = bswap_64(event->aux.aux_size); + event->aux.flags = bswap_64(event->aux.flags); + + if (sample_id_all) + swap_sample_id_all(event, &event->aux + 1); +} + +static void perf_event__itrace_start_swap(union perf_event *event, + bool sample_id_all) +{ + event->itrace_start.pid = bswap_32(event->itrace_start.pid); + event->itrace_start.tid = bswap_32(event->itrace_start.tid); + + if (sample_id_all) + swap_sample_id_all(event, &event->itrace_start + 1); +} + static void perf_event__throttle_swap(union perf_event *event, bool sample_id_all) { @@ -413,19 +518,42 @@ void perf_event__attr_swap(struct perf_event_attr *attr) { attr->type = bswap_32(attr->type); attr->size = bswap_32(attr->size); - attr->config = bswap_64(attr->config); - attr->sample_period = bswap_64(attr->sample_period); - attr->sample_type = bswap_64(attr->sample_type); - attr->read_format = bswap_64(attr->read_format); - attr->wakeup_events = bswap_32(attr->wakeup_events); - attr->bp_type = bswap_32(attr->bp_type); - attr->bp_addr = bswap_64(attr->bp_addr); - attr->bp_len = bswap_64(attr->bp_len); - attr->branch_sample_type = bswap_64(attr->branch_sample_type); - attr->sample_regs_user = bswap_64(attr->sample_regs_user); - attr->sample_stack_user = bswap_32(attr->sample_stack_user); - swap_bitfield((u8 *) (&attr->read_format + 1), sizeof(u64)); +#define bswap_safe(f, n) \ + (attr->size > (offsetof(struct perf_event_attr, f) + \ + sizeof(attr->f) * (n))) +#define bswap_field(f, sz) \ +do { \ + if (bswap_safe(f, 0)) \ + attr->f = bswap_##sz(attr->f); \ +} while(0) +#define bswap_field_32(f) bswap_field(f, 32) +#define bswap_field_64(f) bswap_field(f, 64) + + bswap_field_64(config); + bswap_field_64(sample_period); + bswap_field_64(sample_type); + bswap_field_64(read_format); + bswap_field_32(wakeup_events); + bswap_field_32(bp_type); + bswap_field_64(bp_addr); + bswap_field_64(bp_len); + bswap_field_64(branch_sample_type); + bswap_field_64(sample_regs_user); + bswap_field_32(sample_stack_user); + bswap_field_32(aux_watermark); + + /* + * After read_format are bitfields. Check read_format because + * we are unable to use offsetof on bitfield. + */ + if (bswap_safe(read_format, 1)) + swap_bitfield((u8 *) (&attr->read_format + 1), + sizeof(u64)); +#undef bswap_field_64 +#undef bswap_field_32 +#undef bswap_field +#undef bswap_safe } static void perf_event__hdr_attr_swap(union perf_event *event, @@ -453,6 +581,40 @@ static void perf_event__tracing_data_swap(union perf_event *event, event->tracing_data.size = bswap_32(event->tracing_data.size); } +static void perf_event__auxtrace_info_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + size_t size; + + event->auxtrace_info.type = bswap_32(event->auxtrace_info.type); + + size = event->header.size; + size -= (void *)&event->auxtrace_info.priv - (void *)event; + mem_bswap_64(event->auxtrace_info.priv, size); +} + +static void perf_event__auxtrace_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->auxtrace.size = bswap_64(event->auxtrace.size); + event->auxtrace.offset = bswap_64(event->auxtrace.offset); + event->auxtrace.reference = bswap_64(event->auxtrace.reference); + event->auxtrace.idx = bswap_32(event->auxtrace.idx); + event->auxtrace.tid = bswap_32(event->auxtrace.tid); + event->auxtrace.cpu = bswap_32(event->auxtrace.cpu); +} + +static void perf_event__auxtrace_error_swap(union perf_event *event, + bool sample_id_all __maybe_unused) +{ + event->auxtrace_error.type = bswap_32(event->auxtrace_error.type); + event->auxtrace_error.code = bswap_32(event->auxtrace_error.code); + event->auxtrace_error.cpu = bswap_32(event->auxtrace_error.cpu); + event->auxtrace_error.pid = bswap_32(event->auxtrace_error.pid); + event->auxtrace_error.tid = bswap_32(event->auxtrace_error.tid); + event->auxtrace_error.ip = bswap_64(event->auxtrace_error.ip); +} + typedef void (*perf_event__swap_op)(union perf_event *event, bool sample_id_all); @@ -467,11 +629,17 @@ static perf_event__swap_op perf_event__swap_ops[] = { [PERF_RECORD_THROTTLE] = perf_event__throttle_swap, [PERF_RECORD_UNTHROTTLE] = perf_event__throttle_swap, [PERF_RECORD_SAMPLE] = perf_event__all64_swap, + [PERF_RECORD_AUX] = perf_event__aux_swap, + [PERF_RECORD_ITRACE_START] = perf_event__itrace_start_swap, + [PERF_RECORD_LOST_SAMPLES] = perf_event__all64_swap, [PERF_RECORD_HEADER_ATTR] = perf_event__hdr_attr_swap, [PERF_RECORD_HEADER_EVENT_TYPE] = perf_event__event_type_swap, [PERF_RECORD_HEADER_TRACING_DATA] = perf_event__tracing_data_swap, [PERF_RECORD_HEADER_BUILD_ID] = NULL, [PERF_RECORD_ID_INDEX] = perf_event__all64_swap, + [PERF_RECORD_AUXTRACE_INFO] = perf_event__auxtrace_info_swap, + [PERF_RECORD_AUXTRACE] = perf_event__auxtrace_swap, + [PERF_RECORD_AUXTRACE_ERROR] = perf_event__auxtrace_error_swap, [PERF_RECORD_HEADER_MAX] = NULL, }; @@ -514,54 +682,82 @@ static perf_event__swap_op perf_event__swap_ops[] = { * Flush every events below timestamp 7 * etc... */ -static int process_finished_round(struct perf_tool *tool, +static int process_finished_round(struct perf_tool *tool __maybe_unused, union perf_event *event __maybe_unused, - struct perf_session *session) + struct ordered_events *oe) { - return ordered_events__flush(session, tool, OE_FLUSH__ROUND); + if (dump_trace) + fprintf(stdout, "\n"); + return ordered_events__flush(oe, OE_FLUSH__ROUND); } -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset) +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset) { - struct ordered_events *oe = &s->ordered_events; - u64 timestamp = sample->time; - struct ordered_event *new; - - if (!timestamp || timestamp == ~0ULL) - return -ETIME; + return ordered_events__queue(&s->ordered_events, event, sample, file_offset); +} - if (timestamp < oe->last_flush) { - pr_oe_time(timestamp, "out of order event\n"); - pr_oe_time(oe->last_flush, "last flush, last_flush_type %d\n", - oe->last_flush_type); +static void callchain__lbr_callstack_printf(struct perf_sample *sample) +{ + struct ip_callchain *callchain = sample->callchain; + struct branch_stack *lbr_stack = sample->branch_stack; + u64 kernel_callchain_nr = callchain->nr; + unsigned int i; - s->stats.nr_unordered_events++; + for (i = 0; i < kernel_callchain_nr; i++) { + if (callchain->ips[i] == PERF_CONTEXT_USER) + break; } - new = ordered_events__new(oe, timestamp, event); - if (!new) { - ordered_events__flush(s, tool, OE_FLUSH__HALF); - new = ordered_events__new(oe, timestamp, event); - } + if ((i != kernel_callchain_nr) && lbr_stack->nr) { + u64 total_nr; + /* + * LBR callstack can only get user call chain, + * i is kernel call chain number, + * 1 is PERF_CONTEXT_USER. + * + * The user call chain is stored in LBR registers. + * LBR are pair registers. The caller is stored + * in "from" register, while the callee is stored + * in "to" register. + * For example, there is a call stack + * "A"->"B"->"C"->"D". + * The LBR registers will recorde like + * "C"->"D", "B"->"C", "A"->"B". + * So only the first "to" register and all "from" + * registers are needed to construct the whole stack. + */ + total_nr = i + 1 + lbr_stack->nr + 1; + kernel_callchain_nr = i + 1; - if (!new) - return -ENOMEM; + printf("... LBR call chain: nr:%" PRIu64 "\n", total_nr); - new->file_offset = file_offset; - return 0; + for (i = 0; i < kernel_callchain_nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + i, callchain->ips[i]); + + printf("..... %2d: %016" PRIx64 "\n", + (int)(kernel_callchain_nr), lbr_stack->entries[0].to); + for (i = 0; i < lbr_stack->nr; i++) + printf("..... %2d: %016" PRIx64 "\n", + (int)(i + kernel_callchain_nr + 1), lbr_stack->entries[i].from); + } } -static void callchain__printf(struct perf_sample *sample) +static void callchain__printf(struct perf_evsel *evsel, + struct perf_sample *sample) { unsigned int i; + struct ip_callchain *callchain = sample->callchain; - printf("... chain: nr:%" PRIu64 "\n", sample->callchain->nr); + if (has_branch_callstack(evsel)) + callchain__lbr_callstack_printf(sample); - for (i = 0; i < sample->callchain->nr; i++) + printf("... FP chain: nr:%" PRIu64 "\n", callchain->nr); + + for (i = 0; i < callchain->nr; i++) printf("..... %2d: %016" PRIx64 "\n", - i, sample->callchain->ips[i]); + i, callchain->ips[i]); } static void branch_stack__printf(struct perf_sample *sample) @@ -636,14 +832,14 @@ static void stack_user__printf(struct stack_dump *dump) dump->size, dump->offset); } -static void perf_session__print_tstamp(struct perf_session *session, +static void perf_evlist__print_tstamp(struct perf_evlist *evlist, union perf_event *event, struct perf_sample *sample) { - u64 sample_type = __perf_evlist__combined_sample_type(session->evlist); + u64 sample_type = __perf_evlist__combined_sample_type(evlist); if (event->header.type != PERF_RECORD_SAMPLE && - !perf_evlist__sample_id_all(session->evlist)) { + !perf_evlist__sample_id_all(evlist)) { fputs("-1 -1 ", stdout); return; } @@ -685,7 +881,7 @@ static void sample_read__printf(struct perf_sample *sample, u64 read_format) sample->read.one.id, sample->read.one.value); } -static void dump_event(struct perf_session *session, union perf_event *event, +static void dump_event(struct perf_evlist *evlist, union perf_event *event, u64 file_offset, struct perf_sample *sample) { if (!dump_trace) @@ -697,7 +893,7 @@ static void dump_event(struct perf_session *session, union perf_event *event, trace_event(event); if (sample) - perf_session__print_tstamp(session, event, sample); + perf_evlist__print_tstamp(evlist, event, sample); printf("%#" PRIx64 " [%#x]: PERF_RECORD_%s", file_offset, event->header.size, perf_event__name(event->header.type)); @@ -718,9 +914,9 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_type = evsel->attr.sample_type; if (sample_type & PERF_SAMPLE_CALLCHAIN) - callchain__printf(sample); + callchain__printf(evsel, sample); - if (sample_type & PERF_SAMPLE_BRANCH_STACK) + if ((sample_type & PERF_SAMPLE_BRANCH_STACK) && !has_branch_callstack(evsel)) branch_stack__printf(sample); if (sample_type & PERF_SAMPLE_REGS_USER) @@ -745,8 +941,7 @@ static void dump_sample(struct perf_evsel *evsel, union perf_event *event, sample_read__printf(sample, evsel->attr.read_format); } -static struct machine * - perf_session__find_machine_for_cpumode(struct perf_session *session, +static struct machine *machines__find_for_cpumode(struct machines *machines, union perf_event *event, struct perf_sample *sample) { @@ -764,26 +959,24 @@ static struct machine * else pid = sample->pid; - machine = perf_session__find_machine(session, pid); + machine = machines__find(machines, pid); if (!machine) - machine = perf_session__findnew_machine(session, - DEFAULT_GUEST_KERNEL_ID); + machine = machines__find(machines, DEFAULT_GUEST_KERNEL_ID); return machine; } - return &session->machines.host; + return &machines->host; } -static int deliver_sample_value(struct perf_session *session, +static int deliver_sample_value(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, struct sample_read_value *v, struct machine *machine) { - struct perf_sample_id *sid; + struct perf_sample_id *sid = perf_evlist__id2sid(evlist, v->id); - sid = perf_evlist__id2sid(session->evlist, v->id); if (sid) { sample->id = v->id; sample->period = v->value - sid->period; @@ -791,14 +984,14 @@ static int deliver_sample_value(struct perf_session *session, } if (!sid || sid->evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } return tool->sample(tool, event, sample, sid->evsel, machine); } -static int deliver_sample_group(struct perf_session *session, +static int deliver_sample_group(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -808,7 +1001,7 @@ static int deliver_sample_group(struct perf_session *session, u64 i; for (i = 0; i < sample->read.group.nr; i++) { - ret = deliver_sample_value(session, tool, event, sample, + ret = deliver_sample_value(evlist, tool, event, sample, &sample->read.group.values[i], machine); if (ret) @@ -819,7 +1012,7 @@ static int deliver_sample_group(struct perf_session *session, } static int -perf_session__deliver_sample(struct perf_session *session, + perf_evlist__deliver_sample(struct perf_evlist *evlist, struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -836,44 +1029,45 @@ perf_session__deliver_sample(struct perf_session *session, /* For PERF_SAMPLE_READ we have either single or group mode. */ if (read_format & PERF_FORMAT_GROUP) - return deliver_sample_group(session, tool, event, sample, + return deliver_sample_group(evlist, tool, event, sample, machine); else - return deliver_sample_value(session, tool, event, sample, + return deliver_sample_value(evlist, tool, event, sample, &sample->read.one, machine); } -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset) +static int machines__deliver_event(struct machines *machines, + struct perf_evlist *evlist, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, u64 file_offset) { struct perf_evsel *evsel; struct machine *machine; - dump_event(session, event, file_offset, sample); + dump_event(evlist, event, file_offset, sample); - evsel = perf_evlist__id2evsel(session->evlist, sample->id); + evsel = perf_evlist__id2evsel(evlist, sample->id); - machine = perf_session__find_machine_for_cpumode(session, event, - sample); + machine = machines__find_for_cpumode(machines, event, sample); switch (event->header.type) { case PERF_RECORD_SAMPLE: dump_sample(evsel, event, sample); if (evsel == NULL) { - ++session->stats.nr_unknown_id; + ++evlist->stats.nr_unknown_id; return 0; } if (machine == NULL) { - ++session->stats.nr_unprocessable_samples; + ++evlist->stats.nr_unprocessable_samples; return 0; } - return perf_session__deliver_sample(session, tool, event, - sample, evsel, machine); + return perf_evlist__deliver_sample(evlist, tool, event, sample, evsel, machine); case PERF_RECORD_MMAP: return tool->mmap(tool, event, sample, machine); case PERF_RECORD_MMAP2: + if (event->header.misc & PERF_RECORD_MISC_PROC_MAP_PARSE_TIMEOUT) + ++evlist->stats.nr_proc_map_timeout; return tool->mmap2(tool, event, sample, machine); case PERF_RECORD_COMM: return tool->comm(tool, event, sample, machine); @@ -883,29 +1077,56 @@ int perf_session__deliver_event(struct perf_session *session, return tool->exit(tool, event, sample, machine); case PERF_RECORD_LOST: if (tool->lost == perf_event__process_lost) - session->stats.total_lost += event->lost.lost; + evlist->stats.total_lost += event->lost.lost; return tool->lost(tool, event, sample, machine); + case PERF_RECORD_LOST_SAMPLES: + if (tool->lost_samples == perf_event__process_lost_samples) + evlist->stats.total_lost_samples += event->lost_samples.lost; + return tool->lost_samples(tool, event, sample, machine); case PERF_RECORD_READ: return tool->read(tool, event, sample, evsel, machine); case PERF_RECORD_THROTTLE: return tool->throttle(tool, event, sample, machine); case PERF_RECORD_UNTHROTTLE: return tool->unthrottle(tool, event, sample, machine); + case PERF_RECORD_AUX: + return tool->aux(tool, event, sample, machine); + case PERF_RECORD_ITRACE_START: + return tool->itrace_start(tool, event, sample, machine); default: - ++session->stats.nr_unknown_events; + ++evlist->stats.nr_unknown_events; return -1; } } +static int perf_session__deliver_event(struct perf_session *session, + union perf_event *event, + struct perf_sample *sample, + struct perf_tool *tool, + u64 file_offset) +{ + int ret; + + ret = auxtrace__process_event(session, event, sample, tool); + if (ret < 0) + return ret; + if (ret > 0) + return 0; + + return machines__deliver_event(&session->machines, session->evlist, + event, sample, tool, file_offset); +} + static s64 perf_session__process_user_event(struct perf_session *session, union perf_event *event, - struct perf_tool *tool, u64 file_offset) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); int err; - dump_event(session, event, file_offset, NULL); + dump_event(session->evlist, event, file_offset, NULL); /* These events are processed right away */ switch (event->header.type) { @@ -929,9 +1150,18 @@ static s64 perf_session__process_user_event(struct perf_session *session, case PERF_RECORD_HEADER_BUILD_ID: return tool->build_id(tool, event, session); case PERF_RECORD_FINISHED_ROUND: - return tool->finished_round(tool, event, session); + return tool->finished_round(tool, event, oe); case PERF_RECORD_ID_INDEX: return tool->id_index(tool, event, session); + case PERF_RECORD_AUXTRACE_INFO: + return tool->auxtrace_info(tool, event, session); + case PERF_RECORD_AUXTRACE: + /* setup for reading amidst mmap */ + lseek(fd, file_offset + event->header.size, SEEK_SET); + return tool->auxtrace(tool, event, session); + case PERF_RECORD_AUXTRACE_ERROR: + perf_session__auxtrace_error_inc(session, event); + return tool->auxtrace_error(tool, event, session); default: return -EINVAL; } @@ -939,15 +1169,17 @@ static s64 perf_session__process_user_event(struct perf_session *session, int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool) + struct perf_sample *sample) { - events_stats__inc(&session->stats, event->header.type); + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; + + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, 0); + return perf_session__process_user_event(session, event, 0); - return perf_session__deliver_event(session, event, sample, tool, 0); + return machines__deliver_event(&session->machines, evlist, event, sample, tool, 0); } static void event_swap(union perf_event *event, bool sample_id_all) @@ -984,7 +1216,7 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, return -1; if (lseek(fd, file_offset, SEEK_SET) == (off_t)-1 || - readn(fd, &buf, hdr_sz) != (ssize_t)hdr_sz) + readn(fd, buf, hdr_sz) != (ssize_t)hdr_sz) return -1; event = (union perf_event *)buf; @@ -992,12 +1224,12 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, if (session->header.needs_swap) perf_event_header__bswap(&event->header); - if (event->header.size < hdr_sz) + if (event->header.size < hdr_sz || event->header.size > buf_sz) return -1; rest = event->header.size - hdr_sz; - if (readn(fd, &buf, rest) != (ssize_t)rest) + if (readn(fd, buf, rest) != (ssize_t)rest) return -1; if (session->header.needs_swap) @@ -1015,34 +1247,33 @@ out_parse_sample: } static s64 perf_session__process_event(struct perf_session *session, - union perf_event *event, - struct perf_tool *tool, - u64 file_offset) + union perf_event *event, u64 file_offset) { + struct perf_evlist *evlist = session->evlist; + struct perf_tool *tool = session->tool; struct perf_sample sample; int ret; if (session->header.needs_swap) - event_swap(event, perf_evlist__sample_id_all(session->evlist)); + event_swap(event, perf_evlist__sample_id_all(evlist)); if (event->header.type >= PERF_RECORD_HEADER_MAX) return -EINVAL; - events_stats__inc(&session->stats, event->header.type); + events_stats__inc(&evlist->stats, event->header.type); if (event->header.type >= PERF_RECORD_USER_TYPE_START) - return perf_session__process_user_event(session, event, tool, file_offset); + return perf_session__process_user_event(session, event, file_offset); /* * For all kernel events we get the sample data */ - ret = perf_evlist__parse_sample(session->evlist, event, &sample); + ret = perf_evlist__parse_sample(evlist, event, &sample); if (ret) return ret; if (tool->ordered_events) { - ret = perf_session_queue_event(session, event, tool, &sample, - file_offset); + ret = perf_session__queue_event(session, event, &sample, file_offset); if (ret != -ETIME) return ret; } @@ -1076,54 +1307,95 @@ static struct thread *perf_session__register_idle_thread(struct perf_session *se return thread; } -static void perf_session__warn_about_errors(const struct perf_session *session, - const struct perf_tool *tool) +static void perf_session__warn_about_errors(const struct perf_session *session) { - if (tool->lost == perf_event__process_lost && - session->stats.nr_events[PERF_RECORD_LOST] != 0) { + const struct events_stats *stats = &session->evlist->stats; + const struct ordered_events *oe = &session->ordered_events; + + if (session->tool->lost == perf_event__process_lost && + stats->nr_events[PERF_RECORD_LOST] != 0) { ui__warning("Processed %d events and lost %d chunks!\n\n" "Check IO/CPU overload!\n\n", - session->stats.nr_events[0], - session->stats.nr_events[PERF_RECORD_LOST]); + stats->nr_events[0], + stats->nr_events[PERF_RECORD_LOST]); + } + + if (session->tool->lost_samples == perf_event__process_lost_samples) { + double drop_rate; + + drop_rate = (double)stats->total_lost_samples / + (double) (stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples); + if (drop_rate > 0.05) { + ui__warning("Processed %" PRIu64 " samples and lost %3.2f%% samples!\n\n", + stats->nr_events[PERF_RECORD_SAMPLE] + stats->total_lost_samples, + drop_rate * 100.0); + } } - if (session->stats.nr_unknown_events != 0) { + if (stats->nr_unknown_events != 0) { ui__warning("Found %u unknown events!\n\n" "Is this an older tool processing a perf.data " "file generated by a more recent tool?\n\n" "If that is not the case, consider " "reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_unknown_events); + stats->nr_unknown_events); } - if (session->stats.nr_unknown_id != 0) { + if (stats->nr_unknown_id != 0) { ui__warning("%u samples with id not present in the header\n", - session->stats.nr_unknown_id); + stats->nr_unknown_id); } - if (session->stats.nr_invalid_chains != 0) { - ui__warning("Found invalid callchains!\n\n" - "%u out of %u events were discarded for this reason.\n\n" - "Consider reporting to linux-kernel@vger.kernel.org.\n\n", - session->stats.nr_invalid_chains, - session->stats.nr_events[PERF_RECORD_SAMPLE]); - } + if (stats->nr_invalid_chains != 0) { + ui__warning("Found invalid callchains!\n\n" + "%u out of %u events were discarded for this reason.\n\n" + "Consider reporting to linux-kernel@vger.kernel.org.\n\n", + stats->nr_invalid_chains, + stats->nr_events[PERF_RECORD_SAMPLE]); + } - if (session->stats.nr_unprocessable_samples != 0) { + if (stats->nr_unprocessable_samples != 0) { ui__warning("%u unprocessable samples recorded.\n" "Do you have a KVM guest running and not using 'perf kvm'?\n", - session->stats.nr_unprocessable_samples); + stats->nr_unprocessable_samples); } - if (session->stats.nr_unordered_events != 0) - ui__warning("%u out of order events recorded.\n", session->stats.nr_unordered_events); + if (oe->nr_unordered_events != 0) + ui__warning("%u out of order events recorded.\n", oe->nr_unordered_events); + + events_stats__auxtrace_error_warn(stats); + + if (stats->nr_proc_map_timeout != 0) { + ui__warning("%d map information files for pre-existing threads were\n" + "not processed, if there are samples for addresses they\n" + "will not be resolved, you may find out which are these\n" + "threads by running with -v and redirecting the output\n" + "to a file.\n" + "The time limit to process proc map is too short?\n" + "Increase it by --proc-map-timeout\n", + stats->nr_proc_map_timeout); + } +} + +static int perf_session__flush_thread_stack(struct thread *thread, + void *p __maybe_unused) +{ + return thread_stack__flush(thread); +} + +static int perf_session__flush_thread_stacks(struct perf_session *session) +{ + return machines__for_each_thread(&session->machines, + perf_session__flush_thread_stack, + NULL); } volatile int session_done; -static int __perf_session__process_pipe_events(struct perf_session *session, - struct perf_tool *tool) +static int __perf_session__process_pipe_events(struct perf_session *session) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); union perf_event *event; uint32_t size, cur_size = 0; @@ -1187,7 +1459,7 @@ more: } } - if ((skip = perf_session__process_event(session, event, tool, head)) < 0) { + if ((skip = perf_session__process_event(session, event, head)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", head, event->header.size, event->header.type); err = -EINVAL; @@ -1203,11 +1475,18 @@ more: goto more; done: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); + if (err) + goto out_err; + err = auxtrace__flush_events(session, tool); + if (err) + goto out_err; + err = perf_session__flush_thread_stacks(session); out_err: free(buf); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); + auxtrace__free_events(session); return err; } @@ -1253,8 +1532,10 @@ fetch_mmaped_event(struct perf_session *session, static int __perf_session__process_events(struct perf_session *session, u64 data_offset, u64 data_size, - u64 file_size, struct perf_tool *tool) + u64 file_size) { + struct ordered_events *oe = &session->ordered_events; + struct perf_tool *tool = session->tool; int fd = perf_data_file__fd(session->file); u64 head, page_offset, file_offset, file_pos, size; int err, mmap_prot, mmap_flags, map_idx = 0; @@ -1323,8 +1604,7 @@ more: size = event->header.size; if (size < sizeof(struct perf_event_header) || - (skip = perf_session__process_event(session, event, tool, file_pos)) - < 0) { + (skip = perf_session__process_event(session, event, file_pos)) < 0) { pr_err("%#" PRIx64 " [%#x]: failed to process type: %d\n", file_offset + head, event->header.size, event->header.type); @@ -1348,17 +1628,23 @@ more: out: /* do the final flush for ordered samples */ - err = ordered_events__flush(session, tool, OE_FLUSH__FINAL); + err = ordered_events__flush(oe, OE_FLUSH__FINAL); + if (err) + goto out_err; + err = auxtrace__flush_events(session, tool); + if (err) + goto out_err; + err = perf_session__flush_thread_stacks(session); out_err: ui_progress__finish(); - perf_session__warn_about_errors(session, tool); + perf_session__warn_about_errors(session); ordered_events__free(&session->ordered_events); + auxtrace__free_events(session); session->one_mmap = false; return err; } -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool) +int perf_session__process_events(struct perf_session *session) { u64 size = perf_data_file__size(session->file); int err; @@ -1369,10 +1655,9 @@ int perf_session__process_events(struct perf_session *session, if (!perf_data_file__is_pipe(session->file)) err = __perf_session__process_events(session, session->header.data_offset, - session->header.data_size, - size, tool); + session->header.data_size, size); else - err = __perf_session__process_pipe_events(session, tool); + err = __perf_session__process_pipe_events(session); return err; } @@ -1415,6 +1700,9 @@ int maps__set_kallsyms_ref_reloc_sym(struct map **maps, for (i = 0; i < MAP__NR_TYPES; ++i) { struct kmap *kmap = map__kmap(maps[i]); + + if (!kmap) + continue; kmap->ref_reloc_sym = ref; } @@ -1434,9 +1722,15 @@ size_t perf_session__fprintf_dsos_buildid(struct perf_session *session, FILE *fp size_t perf_session__fprintf_nr_events(struct perf_session *session, FILE *fp) { - size_t ret = fprintf(fp, "Aggregated stats:\n"); + size_t ret; + const char *msg = ""; + + if (perf_header__has_feat(&session->header, HEADER_AUXTRACE)) + msg = " (excludes AUX area (e.g. instruction trace) decoded / synthesized events)"; + + ret = fprintf(fp, "\nAggregated stats:%s\n", msg); - ret += events_stats__fprintf(&session->stats, fp); + ret += events_stats__fprintf(&session->evlist->stats, fp); return ret; } @@ -1601,7 +1895,7 @@ int perf_session__cpu_bitmap(struct perf_session *session, err = 0; out_delete_map: - cpu_map__delete(map); + cpu_map__put(map); return err; } diff --git a/tools/perf/util/session.h b/tools/perf/util/session.h index 6d663dc76404..b44afc75d1cc 100644 --- a/tools/perf/util/session.h +++ b/tools/perf/util/session.h @@ -15,18 +15,24 @@ struct ip_callchain; struct thread; +struct auxtrace; +struct itrace_synth_opts; + struct perf_session { struct perf_header header; struct machines machines; struct perf_evlist *evlist; + struct auxtrace *auxtrace; + struct itrace_synth_opts *itrace_synth_opts; + struct list_head auxtrace_index; struct trace_event tevent; - struct events_stats stats; bool repipe; bool one_mmap; void *one_mmap_addr; u64 one_mmap_offset; struct ordered_events ordered_events; struct perf_data_file *file; + struct perf_tool *tool; }; #define PRINT_IP_OPT_IP (1<<0) @@ -49,20 +55,13 @@ int perf_session__peek_event(struct perf_session *session, off_t file_offset, union perf_event **event_ptr, struct perf_sample *sample); -int perf_session__process_events(struct perf_session *session, - struct perf_tool *tool); +int perf_session__process_events(struct perf_session *session); -int perf_session_queue_event(struct perf_session *s, union perf_event *event, - struct perf_tool *tool, struct perf_sample *sample, - u64 file_offset); +int perf_session__queue_event(struct perf_session *s, union perf_event *event, + struct perf_sample *sample, u64 file_offset); void perf_tool__fill_defaults(struct perf_tool *tool); -int perf_session__deliver_event(struct perf_session *session, - union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool, u64 file_offset); - int perf_session__resolve_callchain(struct perf_session *session, struct perf_evsel *evsel, struct thread *thread, @@ -126,8 +125,7 @@ extern volatile int session_done; int perf_session__deliver_synth_event(struct perf_session *session, union perf_event *event, - struct perf_sample *sample, - struct perf_tool *tool); + struct perf_sample *sample); int perf_event__process_id_index(struct perf_tool *tool, union perf_event *event, diff --git a/tools/perf/util/setup.py b/tools/perf/util/setup.py index d0aee4b9dfd4..1833103768cb 100644 --- a/tools/perf/util/setup.py +++ b/tools/perf/util/setup.py @@ -25,7 +25,7 @@ cflags += ['-fno-strict-aliasing', '-Wno-write-strings', '-Wno-unused-parameter' build_lib = getenv('PYTHON_EXTBUILD_LIB') build_tmp = getenv('PYTHON_EXTBUILD_TMP') libtraceevent = getenv('LIBTRACEEVENT') -libapikfs = getenv('LIBAPIKFS') +libapikfs = getenv('LIBAPI') ext_sources = [f.strip() for f in file('util/python-ext-sources') if len(f.strip()) > 0 and f[0] != '#'] diff --git a/tools/perf/util/sort.c b/tools/perf/util/sort.c index 7a39c1ed8d37..4c65a143a34c 100644 --- a/tools/perf/util/sort.c +++ b/tools/perf/util/sort.c @@ -89,14 +89,14 @@ static int64_t sort__comm_cmp(struct hist_entry *left, struct hist_entry *right) { /* Compare the addr that should be unique among comm */ - return comm__str(right->comm) - comm__str(left->comm); + return strcmp(comm__str(right->comm), comm__str(left->comm)); } static int64_t sort__comm_collapse(struct hist_entry *left, struct hist_entry *right) { /* Compare the addr that should be unique among comm */ - return comm__str(right->comm) - comm__str(left->comm); + return strcmp(comm__str(right->comm), comm__str(left->comm)); } static int64_t @@ -182,18 +182,16 @@ static int64_t _sort__addr_cmp(u64 left_ip, u64 right_ip) static int64_t _sort__sym_cmp(struct symbol *sym_l, struct symbol *sym_r) { - u64 ip_l, ip_r; - if (!sym_l || !sym_r) return cmp_null(sym_l, sym_r); if (sym_l == sym_r) return 0; - ip_l = sym_l->start; - ip_r = sym_r->start; + if (sym_l->start != sym_r->start) + return (int64_t)(sym_r->start - sym_l->start); - return (int64_t)(ip_r - ip_l); + return (int64_t)(sym_r->end - sym_l->end); } static int64_t @@ -1463,6 +1461,15 @@ int sort_dimension__add(const char *tok) sort__has_parent = 1; } else if (sd->entry == &sort_sym) { sort__has_sym = 1; + /* + * perf diff displays the performance difference amongst + * two or more perf.data files. Those files could come + * from different binaries. So we should not compare + * their ips, but the name of symbol. + */ + if (sort__mode == SORT_MODE__DIFF) + sd->entry->se_collapse = sort__sym_sort; + } else if (sd->entry == &sort_dso) { sort__has_dso = 1; } diff --git a/tools/perf/util/sort.h b/tools/perf/util/sort.h index c03e4ff8beff..e97cd476d336 100644 --- a/tools/perf/util/sort.h +++ b/tools/perf/util/sort.h @@ -44,6 +44,7 @@ extern struct sort_entry sort_dso_to; extern struct sort_entry sort_sym_from; extern struct sort_entry sort_sym_to; extern enum sort_type sort__first_dimension; +extern const char default_mem_sort_order[]; struct he_stat { u64 period; @@ -57,15 +58,16 @@ struct he_stat { struct hist_entry_diff { bool computed; + union { + /* PERF_HPP__DELTA */ + double period_ratio_delta; - /* PERF_HPP__DELTA */ - double period_ratio_delta; - - /* PERF_HPP__RATIO */ - double period_ratio; + /* PERF_HPP__RATIO */ + double period_ratio; - /* HISTC_WEIGHTED_DIFF */ - s64 wdiff; + /* HISTC_WEIGHTED_DIFF */ + s64 wdiff; + }; }; /** @@ -91,22 +93,28 @@ struct hist_entry { s32 cpu; u8 cpumode; - struct hist_entry_diff diff; - /* We are added by hists__add_dummy_entry. */ bool dummy; - /* XXX These two should move to some tree widget lib */ - u16 row_offset; - u16 nr_rows; - - bool init_have_children; char level; - bool used; u8 filtered; + union { + /* + * Since perf diff only supports the stdio output, TUI + * fields are only accessed from perf report (or perf + * top). So make it an union to reduce memory usage. + */ + struct hist_entry_diff diff; + struct /* for TUI */ { + u16 row_offset; + u16 nr_rows; + bool init_have_children; + bool unfolded; + bool has_children; + }; + }; char *srcline; struct symbol *parent; - unsigned long position; struct rb_root sorted_chain; struct branch_info *branch_info; struct hists *hists; diff --git a/tools/perf/util/stat-shadow.c b/tools/perf/util/stat-shadow.c new file mode 100644 index 000000000000..53e8bb7bc852 --- /dev/null +++ b/tools/perf/util/stat-shadow.c @@ -0,0 +1,434 @@ +#include <stdio.h> +#include "evsel.h" +#include "stat.h" +#include "color.h" + +enum { + CTX_BIT_USER = 1 << 0, + CTX_BIT_KERNEL = 1 << 1, + CTX_BIT_HV = 1 << 2, + CTX_BIT_HOST = 1 << 3, + CTX_BIT_IDLE = 1 << 4, + CTX_BIT_MAX = 1 << 5, +}; + +#define NUM_CTX CTX_BIT_MAX + +static struct stats runtime_nsecs_stats[MAX_NR_CPUS]; +static struct stats runtime_cycles_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_front_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_stalled_cycles_back_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_branches_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_cacherefs_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_l1_dcache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_l1_icache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_ll_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_itlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_dtlb_cache_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_cycles_in_tx_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_transaction_stats[NUM_CTX][MAX_NR_CPUS]; +static struct stats runtime_elision_stats[NUM_CTX][MAX_NR_CPUS]; + +struct stats walltime_nsecs_stats; + +static int evsel_context(struct perf_evsel *evsel) +{ + int ctx = 0; + + if (evsel->attr.exclude_kernel) + ctx |= CTX_BIT_KERNEL; + if (evsel->attr.exclude_user) + ctx |= CTX_BIT_USER; + if (evsel->attr.exclude_hv) + ctx |= CTX_BIT_HV; + if (evsel->attr.exclude_host) + ctx |= CTX_BIT_HOST; + if (evsel->attr.exclude_idle) + ctx |= CTX_BIT_IDLE; + + return ctx; +} + +void perf_stat__reset_shadow_stats(void) +{ + memset(runtime_nsecs_stats, 0, sizeof(runtime_nsecs_stats)); + memset(runtime_cycles_stats, 0, sizeof(runtime_cycles_stats)); + memset(runtime_stalled_cycles_front_stats, 0, sizeof(runtime_stalled_cycles_front_stats)); + memset(runtime_stalled_cycles_back_stats, 0, sizeof(runtime_stalled_cycles_back_stats)); + memset(runtime_branches_stats, 0, sizeof(runtime_branches_stats)); + memset(runtime_cacherefs_stats, 0, sizeof(runtime_cacherefs_stats)); + memset(runtime_l1_dcache_stats, 0, sizeof(runtime_l1_dcache_stats)); + memset(runtime_l1_icache_stats, 0, sizeof(runtime_l1_icache_stats)); + memset(runtime_ll_cache_stats, 0, sizeof(runtime_ll_cache_stats)); + memset(runtime_itlb_cache_stats, 0, sizeof(runtime_itlb_cache_stats)); + memset(runtime_dtlb_cache_stats, 0, sizeof(runtime_dtlb_cache_stats)); + memset(runtime_cycles_in_tx_stats, 0, + sizeof(runtime_cycles_in_tx_stats)); + memset(runtime_transaction_stats, 0, + sizeof(runtime_transaction_stats)); + memset(runtime_elision_stats, 0, sizeof(runtime_elision_stats)); + memset(&walltime_nsecs_stats, 0, sizeof(walltime_nsecs_stats)); +} + +/* + * Update various tracking values we maintain to print + * more semantic information such as miss/hit ratios, + * instruction rates, etc: + */ +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu) +{ + int ctx = evsel_context(counter); + + if (perf_evsel__match(counter, SOFTWARE, SW_TASK_CLOCK)) + update_stats(&runtime_nsecs_stats[cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_CPU_CYCLES)) + update_stats(&runtime_cycles_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, CYCLES_IN_TX)) + update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, TRANSACTION_START)) + update_stats(&runtime_transaction_stats[ctx][cpu], count[0]); + else if (perf_stat_evsel__is(counter, ELISION_START)) + update_stats(&runtime_elision_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) + update_stats(&runtime_stalled_cycles_front_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_STALLED_CYCLES_BACKEND)) + update_stats(&runtime_stalled_cycles_back_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_BRANCH_INSTRUCTIONS)) + update_stats(&runtime_branches_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HARDWARE, HW_CACHE_REFERENCES)) + update_stats(&runtime_cacherefs_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1D)) + update_stats(&runtime_l1_dcache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_L1I)) + update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_LL)) + update_stats(&runtime_ll_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_DTLB)) + update_stats(&runtime_dtlb_cache_stats[ctx][cpu], count[0]); + else if (perf_evsel__match(counter, HW_CACHE, HW_CACHE_ITLB)) + update_stats(&runtime_itlb_cache_stats[ctx][cpu], count[0]); +} + +/* used for get_ratio_color() */ +enum grc_type { + GRC_STALLED_CYCLES_FE, + GRC_STALLED_CYCLES_BE, + GRC_CACHE_MISSES, + GRC_MAX_NR +}; + +static const char *get_ratio_color(enum grc_type type, double ratio) +{ + static const double grc_table[GRC_MAX_NR][3] = { + [GRC_STALLED_CYCLES_FE] = { 50.0, 30.0, 10.0 }, + [GRC_STALLED_CYCLES_BE] = { 75.0, 50.0, 20.0 }, + [GRC_CACHE_MISSES] = { 20.0, 10.0, 5.0 }, + }; + const char *color = PERF_COLOR_NORMAL; + + if (ratio > grc_table[type][0]) + color = PERF_COLOR_RED; + else if (ratio > grc_table[type][1]) + color = PERF_COLOR_MAGENTA; + else if (ratio > grc_table[type][2]) + color = PERF_COLOR_YELLOW; + + return color; +} + +static void print_stalled_cycles_frontend(FILE *out, int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_STALLED_CYCLES_FE, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " frontend cycles idle "); +} + +static void print_stalled_cycles_backend(FILE *out, int cpu, + struct perf_evsel *evsel + __maybe_unused, double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_STALLED_CYCLES_BE, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " backend cycles idle "); +} + +static void print_branch_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_branches_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all branches "); +} + +static void print_l1_dcache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_l1_dcache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-dcache hits "); +} + +static void print_l1_icache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_l1_icache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all L1-icache hits "); +} + +static void print_dtlb_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_dtlb_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all dTLB cache hits "); +} + +static void print_itlb_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_itlb_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all iTLB cache hits "); +} + +static void print_ll_cache_misses(FILE *out, int cpu, + struct perf_evsel *evsel __maybe_unused, + double avg) +{ + double total, ratio = 0.0; + const char *color; + int ctx = evsel_context(evsel); + + total = avg_stats(&runtime_ll_cache_stats[ctx][cpu]); + + if (total) + ratio = avg / total * 100.0; + + color = get_ratio_color(GRC_CACHE_MISSES, ratio); + + fprintf(out, " # "); + color_fprintf(out, color, "%6.2f%%", ratio); + fprintf(out, " of all LL-cache hits "); +} + +void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, + double avg, int cpu, enum aggr_mode aggr) +{ + double total, ratio = 0.0, total2; + int ctx = evsel_context(evsel); + + if (perf_evsel__match(evsel, HARDWARE, HW_INSTRUCTIONS)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + if (total) { + ratio = avg / total; + fprintf(out, " # %5.2f insns per cycle ", ratio); + } else { + fprintf(out, " "); + } + total = avg_stats(&runtime_stalled_cycles_front_stats[ctx][cpu]); + total = max(total, avg_stats(&runtime_stalled_cycles_back_stats[ctx][cpu])); + + if (total && avg) { + ratio = total / avg; + fprintf(out, "\n"); + if (aggr == AGGR_NONE) + fprintf(out, " "); + fprintf(out, " # %5.2f stalled cycles per insn", ratio); + } + + } else if (perf_evsel__match(evsel, HARDWARE, HW_BRANCH_MISSES) && + runtime_branches_stats[ctx][cpu].n != 0) { + print_branch_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1D | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_l1_dcache_stats[ctx][cpu].n != 0) { + print_l1_dcache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_L1I | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_l1_icache_stats[ctx][cpu].n != 0) { + print_l1_icache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_DTLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_dtlb_cache_stats[ctx][cpu].n != 0) { + print_dtlb_cache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_ITLB | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_itlb_cache_stats[ctx][cpu].n != 0) { + print_itlb_cache_misses(out, cpu, evsel, avg); + } else if ( + evsel->attr.type == PERF_TYPE_HW_CACHE && + evsel->attr.config == ( PERF_COUNT_HW_CACHE_LL | + ((PERF_COUNT_HW_CACHE_OP_READ) << 8) | + ((PERF_COUNT_HW_CACHE_RESULT_MISS) << 16)) && + runtime_ll_cache_stats[ctx][cpu].n != 0) { + print_ll_cache_misses(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CACHE_MISSES) && + runtime_cacherefs_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cacherefs_stats[ctx][cpu]); + + if (total) + ratio = avg * 100 / total; + + fprintf(out, " # %8.3f %% of all cache refs ", ratio); + + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_FRONTEND)) { + print_stalled_cycles_frontend(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_STALLED_CYCLES_BACKEND)) { + print_stalled_cycles_backend(out, cpu, evsel, avg); + } else if (perf_evsel__match(evsel, HARDWARE, HW_CPU_CYCLES)) { + total = avg_stats(&runtime_nsecs_stats[cpu]); + + if (total) { + ratio = avg / total; + fprintf(out, " # %8.3f GHz ", ratio); + } else { + fprintf(out, " "); + } + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + if (total) + fprintf(out, + " # %5.2f%% transactional cycles ", + 100.0 * (avg / total)); + } else if (perf_stat_evsel__is(evsel, CYCLES_IN_TX_CP)) { + total = avg_stats(&runtime_cycles_stats[ctx][cpu]); + total2 = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + if (total2 < avg) + total2 = avg; + if (total) + fprintf(out, + " # %5.2f%% aborted cycles ", + 100.0 * ((total2-avg) / total)); + } else if (perf_stat_evsel__is(evsel, TRANSACTION_START) && + avg > 0 && + runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + + if (total) + ratio = total / avg; + + fprintf(out, " # %8.0f cycles / transaction ", ratio); + } else if (perf_stat_evsel__is(evsel, ELISION_START) && + avg > 0 && + runtime_cycles_in_tx_stats[ctx][cpu].n != 0) { + total = avg_stats(&runtime_cycles_in_tx_stats[ctx][cpu]); + + if (total) + ratio = total / avg; + + fprintf(out, " # %8.0f cycles / elision ", ratio); + } else if (runtime_nsecs_stats[cpu].n != 0) { + char unit = 'M'; + + total = avg_stats(&runtime_nsecs_stats[cpu]); + + if (total) + ratio = 1000.0 * avg / total; + if (ratio < 0.001) { + ratio *= 1000; + unit = 'K'; + } + + fprintf(out, " # %8.3f %c/sec ", ratio, unit); + } else { + fprintf(out, " "); + } +} diff --git a/tools/perf/util/stat.c b/tools/perf/util/stat.c index 6506b3dfb605..f2a0d1521e26 100644 --- a/tools/perf/util/stat.c +++ b/tools/perf/util/stat.c @@ -1,6 +1,8 @@ #include <math.h> - #include "stat.h" +#include "evlist.h" +#include "evsel.h" +#include "thread_map.h" void update_stats(struct stats *stats, u64 val) { @@ -61,3 +63,178 @@ double rel_stddev_stats(double stddev, double avg) return pct; } + +bool __perf_evsel_stat__is(struct perf_evsel *evsel, + enum perf_stat_evsel_id id) +{ + struct perf_stat *ps = evsel->priv; + + return ps->id == id; +} + +#define ID(id, name) [PERF_STAT_EVSEL_ID__##id] = #name +static const char *id_str[PERF_STAT_EVSEL_ID__MAX] = { + ID(NONE, x), + ID(CYCLES_IN_TX, cpu/cycles-t/), + ID(TRANSACTION_START, cpu/tx-start/), + ID(ELISION_START, cpu/el-start/), + ID(CYCLES_IN_TX_CP, cpu/cycles-ct/), +}; +#undef ID + +void perf_stat_evsel_id_init(struct perf_evsel *evsel) +{ + struct perf_stat *ps = evsel->priv; + int i; + + /* ps->id is 0 hence PERF_STAT_EVSEL_ID__NONE by default */ + + for (i = 0; i < PERF_STAT_EVSEL_ID__MAX; i++) { + if (!strcmp(perf_evsel__name(evsel), id_str[i])) { + ps->id = i; + break; + } + } +} + +struct perf_counts *perf_counts__new(int ncpus, int nthreads) +{ + struct perf_counts *counts = zalloc(sizeof(*counts)); + + if (counts) { + struct xyarray *values; + + values = xyarray__new(ncpus, nthreads, sizeof(struct perf_counts_values)); + if (!values) { + free(counts); + return NULL; + } + + counts->values = values; + } + + return counts; +} + +void perf_counts__delete(struct perf_counts *counts) +{ + if (counts) { + xyarray__delete(counts->values); + free(counts); + } +} + +static void perf_counts__reset(struct perf_counts *counts) +{ + xyarray__reset(counts->values); +} + +void perf_evsel__reset_counts(struct perf_evsel *evsel) +{ + perf_counts__reset(evsel->counts); +} + +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads) +{ + evsel->counts = perf_counts__new(ncpus, nthreads); + return evsel->counts != NULL ? 0 : -ENOMEM; +} + +void perf_evsel__free_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->counts); + evsel->counts = NULL; +} + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel) +{ + int i; + struct perf_stat *ps = evsel->priv; + + for (i = 0; i < 3; i++) + init_stats(&ps->res_stats[i]); + + perf_stat_evsel_id_init(evsel); +} + +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel) +{ + evsel->priv = zalloc(sizeof(struct perf_stat)); + if (evsel->priv == NULL) + return -ENOMEM; + perf_evsel__reset_stat_priv(evsel); + return 0; +} + +void perf_evsel__free_stat_priv(struct perf_evsel *evsel) +{ + zfree(&evsel->priv); +} + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads) +{ + struct perf_counts *counts; + + counts = perf_counts__new(ncpus, nthreads); + if (counts) + evsel->prev_raw_counts = counts; + + return counts ? 0 : -ENOMEM; +} + +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel) +{ + perf_counts__delete(evsel->prev_raw_counts); + evsel->prev_raw_counts = NULL; +} + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw) +{ + int ncpus = perf_evsel__nr_cpus(evsel); + int nthreads = thread_map__nr(evsel->threads); + + if (perf_evsel__alloc_stat_priv(evsel) < 0 || + perf_evsel__alloc_counts(evsel, ncpus, nthreads) < 0 || + (alloc_raw && perf_evsel__alloc_prev_raw_counts(evsel, ncpus, nthreads) < 0)) + return -ENOMEM; + + return 0; +} + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + if (perf_evsel__alloc_stats(evsel, alloc_raw)) + goto out_free; + } + + return 0; + +out_free: + perf_evlist__free_stats(evlist); + return -1; +} + +void perf_evlist__free_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__free_stat_priv(evsel); + perf_evsel__free_counts(evsel); + perf_evsel__free_prev_raw_counts(evsel); + } +} + +void perf_evlist__reset_stats(struct perf_evlist *evlist) +{ + struct perf_evsel *evsel; + + evlist__for_each(evlist, evsel) { + perf_evsel__reset_stat_priv(evsel); + perf_evsel__reset_counts(evsel); + } +} diff --git a/tools/perf/util/stat.h b/tools/perf/util/stat.h index 5667fc3e39cf..1cfbe0a980ac 100644 --- a/tools/perf/util/stat.h +++ b/tools/perf/util/stat.h @@ -2,6 +2,8 @@ #define __PERF_STATS_H #include <linux/types.h> +#include <stdio.h> +#include "xyarray.h" struct stats { @@ -9,6 +11,51 @@ struct stats u64 max, min; }; +enum perf_stat_evsel_id { + PERF_STAT_EVSEL_ID__NONE = 0, + PERF_STAT_EVSEL_ID__CYCLES_IN_TX, + PERF_STAT_EVSEL_ID__TRANSACTION_START, + PERF_STAT_EVSEL_ID__ELISION_START, + PERF_STAT_EVSEL_ID__CYCLES_IN_TX_CP, + PERF_STAT_EVSEL_ID__MAX, +}; + +struct perf_stat { + struct stats res_stats[3]; + enum perf_stat_evsel_id id; +}; + +enum aggr_mode { + AGGR_NONE, + AGGR_GLOBAL, + AGGR_SOCKET, + AGGR_CORE, + AGGR_THREAD, +}; + +struct perf_counts_values { + union { + struct { + u64 val; + u64 ena; + u64 run; + }; + u64 values[3]; + }; +}; + +struct perf_counts { + s8 scaled; + struct perf_counts_values aggr; + struct xyarray *values; +}; + +static inline struct perf_counts_values* +perf_counts(struct perf_counts *counts, int cpu, int thread) +{ + return xyarray__entry(counts->values, cpu, thread); +} + void update_stats(struct stats *stats, u64 val); double avg_stats(struct stats *stats); double stddev_stats(struct stats *stats); @@ -22,4 +69,44 @@ static inline void init_stats(struct stats *stats) stats->min = (u64) -1; stats->max = 0; } + +struct perf_evsel; +struct perf_evlist; + +bool __perf_evsel_stat__is(struct perf_evsel *evsel, + enum perf_stat_evsel_id id); + +#define perf_stat_evsel__is(evsel, id) \ + __perf_evsel_stat__is(evsel, PERF_STAT_EVSEL_ID__ ## id) + +void perf_stat_evsel_id_init(struct perf_evsel *evsel); + +extern struct stats walltime_nsecs_stats; + +void perf_stat__reset_shadow_stats(void); +void perf_stat__update_shadow_stats(struct perf_evsel *counter, u64 *count, + int cpu); +void perf_stat__print_shadow_stats(FILE *out, struct perf_evsel *evsel, + double avg, int cpu, enum aggr_mode aggr); + +struct perf_counts *perf_counts__new(int ncpus, int nthreads); +void perf_counts__delete(struct perf_counts *counts); + +void perf_evsel__reset_counts(struct perf_evsel *evsel); +int perf_evsel__alloc_counts(struct perf_evsel *evsel, int ncpus, int nthreads); +void perf_evsel__free_counts(struct perf_evsel *evsel); + +void perf_evsel__reset_stat_priv(struct perf_evsel *evsel); +int perf_evsel__alloc_stat_priv(struct perf_evsel *evsel); +void perf_evsel__free_stat_priv(struct perf_evsel *evsel); + +int perf_evsel__alloc_prev_raw_counts(struct perf_evsel *evsel, + int ncpus, int nthreads); +void perf_evsel__free_prev_raw_counts(struct perf_evsel *evsel); + +int perf_evsel__alloc_stats(struct perf_evsel *evsel, bool alloc_raw); + +int perf_evlist__alloc_stats(struct perf_evlist *evlist, bool alloc_raw); +void perf_evlist__free_stats(struct perf_evlist *evlist); +void perf_evlist__reset_stats(struct perf_evlist *evlist); #endif diff --git a/tools/perf/util/strfilter.c b/tools/perf/util/strfilter.c index 79a757a2a15c..bcae659b6546 100644 --- a/tools/perf/util/strfilter.c +++ b/tools/perf/util/strfilter.c @@ -170,6 +170,46 @@ struct strfilter *strfilter__new(const char *rules, const char **err) return filter; } +static int strfilter__append(struct strfilter *filter, bool _or, + const char *rules, const char **err) +{ + struct strfilter_node *right, *root; + const char *ep = NULL; + + if (!filter || !rules) + return -EINVAL; + + right = strfilter_node__new(rules, &ep); + if (!right || *ep != '\0') { + if (err) + *err = ep; + goto error; + } + root = strfilter_node__alloc(_or ? OP_or : OP_and, filter->root, right); + if (!root) { + ep = NULL; + goto error; + } + + filter->root = root; + return 0; + +error: + strfilter_node__delete(right); + return ep ? -EINVAL : -ENOMEM; +} + +int strfilter__or(struct strfilter *filter, const char *rules, const char **err) +{ + return strfilter__append(filter, true, rules, err); +} + +int strfilter__and(struct strfilter *filter, const char *rules, + const char **err) +{ + return strfilter__append(filter, false, rules, err); +} + static bool strfilter_node__compare(struct strfilter_node *node, const char *str) { @@ -197,3 +237,70 @@ bool strfilter__compare(struct strfilter *filter, const char *str) return false; return strfilter_node__compare(filter->root, str); } + +static int strfilter_node__sprint(struct strfilter_node *node, char *buf); + +/* sprint node in parenthesis if needed */ +static int strfilter_node__sprint_pt(struct strfilter_node *node, char *buf) +{ + int len; + int pt = node->r ? 2 : 0; /* don't need to check node->l */ + + if (buf && pt) + *buf++ = '('; + len = strfilter_node__sprint(node, buf); + if (len < 0) + return len; + if (buf && pt) + *(buf + len) = ')'; + return len + pt; +} + +static int strfilter_node__sprint(struct strfilter_node *node, char *buf) +{ + int len = 0, rlen; + + if (!node || !node->p) + return -EINVAL; + + switch (*node->p) { + case '|': + case '&': + len = strfilter_node__sprint_pt(node->l, buf); + if (len < 0) + return len; + case '!': + if (buf) { + *(buf + len++) = *node->p; + buf += len; + } else + len++; + rlen = strfilter_node__sprint_pt(node->r, buf); + if (rlen < 0) + return rlen; + len += rlen; + break; + default: + len = strlen(node->p); + if (buf) + strcpy(buf, node->p); + } + + return len; +} + +char *strfilter__string(struct strfilter *filter) +{ + int len; + char *ret = NULL; + + len = strfilter_node__sprint(filter->root, NULL); + if (len < 0) + return NULL; + + ret = malloc(len + 1); + if (ret) + strfilter_node__sprint(filter->root, ret); + + return ret; +} diff --git a/tools/perf/util/strfilter.h b/tools/perf/util/strfilter.h index fe611f3c9e39..cff5eda88728 100644 --- a/tools/perf/util/strfilter.h +++ b/tools/perf/util/strfilter.h @@ -29,6 +29,32 @@ struct strfilter { struct strfilter *strfilter__new(const char *rules, const char **err); /** + * strfilter__or - Append an additional rule by logical-or + * @filter: Original string filter + * @rules: Filter rule to be appended at left of the root of + * @filter by using logical-or. + * @err: Pointer which points an error detected on @rules + * + * Parse @rules and join it to the @filter by using logical-or. + * Return 0 if success, or return the error code. + */ +int strfilter__or(struct strfilter *filter, + const char *rules, const char **err); + +/** + * strfilter__add - Append an additional rule by logical-and + * @filter: Original string filter + * @rules: Filter rule to be appended at left of the root of + * @filter by using logical-and. + * @err: Pointer which points an error detected on @rules + * + * Parse @rules and join it to the @filter by using logical-and. + * Return 0 if success, or return the error code. + */ +int strfilter__and(struct strfilter *filter, + const char *rules, const char **err); + +/** * strfilter__compare - compare given string and a string filter * @filter: String filter * @str: target string @@ -45,4 +71,13 @@ bool strfilter__compare(struct strfilter *filter, const char *str); */ void strfilter__delete(struct strfilter *filter); +/** + * strfilter__string - Reconstruct a rule string from filter + * @filter: String filter to reconstruct + * + * Reconstruct a rule string from @filter. This will be good for + * debug messages. Note that returning string must be freed afterward. + */ +char *strfilter__string(struct strfilter *filter); + #endif diff --git a/tools/perf/util/svghelper.c b/tools/perf/util/svghelper.c index 283d3e73e2f2..eec6c1149f44 100644 --- a/tools/perf/util/svghelper.c +++ b/tools/perf/util/svghelper.c @@ -748,7 +748,7 @@ static int str_to_bitmap(char *s, cpumask_t *b) set_bit(c, cpumask_bits(b)); } - cpu_map__delete(m); + cpu_map__put(m); return ret; } diff --git a/tools/perf/util/symbol-elf.c b/tools/perf/util/symbol-elf.c index 33b7a2aef713..65f7e389ae09 100644 --- a/tools/perf/util/symbol-elf.c +++ b/tools/perf/util/symbol-elf.c @@ -74,6 +74,10 @@ static inline uint8_t elf_sym__type(const GElf_Sym *sym) return GELF_ST_TYPE(sym->st_info); } +#ifndef STT_GNU_IFUNC +#define STT_GNU_IFUNC 10 +#endif + static inline int elf_sym__is_function(const GElf_Sym *sym) { return (elf_sym__type(sym) == STT_FUNC || @@ -575,32 +579,37 @@ static int dso__swap_init(struct dso *dso, unsigned char eidata) static int decompress_kmodule(struct dso *dso, const char *name, enum dso_binary_type type) { - int fd; - const char *ext = strrchr(name, '.'); + int fd = -1; char tmpbuf[] = "/tmp/perf-kmod-XXXXXX"; + struct kmod_path m; if (type != DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP && type != DSO_BINARY_TYPE__GUEST_KMODULE_COMP && type != DSO_BINARY_TYPE__BUILD_ID_CACHE) return -1; - if (!ext || !is_supported_compression(ext + 1)) { - ext = strrchr(dso->name, '.'); - if (!ext || !is_supported_compression(ext + 1)) - return -1; - } + if (type == DSO_BINARY_TYPE__BUILD_ID_CACHE) + name = dso->long_name; - fd = mkstemp(tmpbuf); - if (fd < 0) + if (kmod_path__parse_ext(&m, name) || !m.comp) return -1; - if (!decompress_to_file(ext + 1, name, fd)) { + fd = mkstemp(tmpbuf); + if (fd < 0) { + dso->load_errno = errno; + goto out; + } + + if (!decompress_to_file(m.ext, name, fd)) { + dso->load_errno = DSO_LOAD_ERRNO__DECOMPRESSION_FAILURE; close(fd); fd = -1; } unlink(tmpbuf); +out: + free(m.ext); return fd; } @@ -621,6 +630,11 @@ void symsrc__destroy(struct symsrc *ss) close(ss->fd); } +bool __weak elf__needs_adjust_symbols(GElf_Ehdr ehdr) +{ + return ehdr.e_type == ET_EXEC || ehdr.e_type == ET_REL; +} + int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { @@ -629,37 +643,50 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, Elf *elf; int fd; - if (dso__needs_decompress(dso)) + if (dso__needs_decompress(dso)) { fd = decompress_kmodule(dso, name, type); - else + if (fd < 0) + return -1; + } else { fd = open(name, O_RDONLY); - - if (fd < 0) - return -1; + if (fd < 0) { + dso->load_errno = errno; + return -1; + } + } elf = elf_begin(fd, PERF_ELF_C_READ_MMAP, NULL); if (elf == NULL) { pr_debug("%s: cannot read %s ELF file.\n", __func__, name); + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; goto out_close; } if (gelf_getehdr(elf, &ehdr) == NULL) { + dso->load_errno = DSO_LOAD_ERRNO__INVALID_ELF; pr_debug("%s: cannot get elf header.\n", __func__); goto out_elf_end; } - if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) + if (dso__swap_init(dso, ehdr.e_ident[EI_DATA])) { + dso->load_errno = DSO_LOAD_ERRNO__INTERNAL_ERROR; goto out_elf_end; + } /* Always reject images with a mismatched build-id: */ if (dso->has_build_id) { u8 build_id[BUILD_ID_SIZE]; - if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) + if (elf_read_build_id(elf, build_id, BUILD_ID_SIZE) < 0) { + dso->load_errno = DSO_LOAD_ERRNO__CANNOT_READ_BUILDID; goto out_elf_end; + } - if (!dso__build_id_equal(dso, build_id)) + if (!dso__build_id_equal(dso, build_id)) { + pr_debug("%s: build id mismatch for %s.\n", __func__, name); + dso->load_errno = DSO_LOAD_ERRNO__MISMATCHING_BUILDID; goto out_elf_end; + } } ss->is_64_bit = (gelf_getclass(elf) == ELFCLASS64); @@ -690,13 +717,14 @@ int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, ".gnu.prelink_undo", NULL) != NULL); } else { - ss->adjust_symbols = ehdr.e_type == ET_EXEC || - ehdr.e_type == ET_REL; + ss->adjust_symbols = elf__needs_adjust_symbols(ehdr); } ss->name = strdup(name); - if (!ss->name) + if (!ss->name) { + dso->load_errno = errno; goto out_elf_end; + } ss->elf = elf; ss->fd = fd; @@ -748,11 +776,14 @@ static bool want_demangle(bool is_kernel_sym) return is_kernel_sym ? symbol_conf.demangle_kernel : symbol_conf.demangle; } +void __weak arch__elf_sym_adjust(GElf_Sym *sym __maybe_unused) { } + int dso__load_sym(struct dso *dso, struct map *map, struct symsrc *syms_ss, struct symsrc *runtime_ss, symbol_filter_t filter, int kmodule) { struct kmap *kmap = dso->kernel ? map__kmap(map) : NULL; + struct map_groups *kmaps = kmap ? map__kmaps(map) : NULL; struct map *curr_map = map; struct dso *curr_dso = dso; Elf_Data *symstrs, *secstrs; @@ -768,6 +799,9 @@ int dso__load_sym(struct dso *dso, struct map *map, int nr = 0; bool remap_kernel = false, adjust_kernel_syms = false; + if (kmap && !kmaps) + return -1; + dso->symtab_type = syms_ss->type; dso->is_64_bit = syms_ss->is_64_bit; dso->rel = syms_ss->ehdr.e_type == ET_REL; @@ -864,10 +898,9 @@ int dso__load_sym(struct dso *dso, struct map *map, /* Reject ARM ELF "mapping symbols": these aren't unique and * don't identify functions, so will confuse the profile * output: */ - if (ehdr.e_machine == EM_ARM) { - if (!strcmp(elf_name, "$a") || - !strcmp(elf_name, "$d") || - !strcmp(elf_name, "$t")) + if (ehdr.e_machine == EM_ARM || ehdr.e_machine == EM_AARCH64) { + if (elf_name[0] == '$' && strchr("adtx", elf_name[1]) + && (elf_name[2] == '\0' || elf_name[2] == '.')) continue; } @@ -909,6 +942,8 @@ int dso__load_sym(struct dso *dso, struct map *map, (sym.st_value & 1)) --sym.st_value; + arch__elf_sym_adjust(&sym); + if (dso->kernel || kmodule) { char dso_name[PATH_MAX]; @@ -936,8 +971,12 @@ int dso__load_sym(struct dso *dso, struct map *map, map->map_ip = map__map_ip; map->unmap_ip = map__unmap_ip; /* Ensure maps are correctly ordered */ - map_groups__remove(kmap->kmaps, map); - map_groups__insert(kmap->kmaps, map); + if (kmaps) { + map__get(map); + map_groups__remove(kmaps, map); + map_groups__insert(kmaps, map); + map__put(map); + } } /* @@ -961,7 +1000,7 @@ int dso__load_sym(struct dso *dso, struct map *map, snprintf(dso_name, sizeof(dso_name), "%s%s", dso->short_name, section_name); - curr_map = map_groups__find_by_name(kmap->kmaps, map->type, dso_name); + curr_map = map_groups__find_by_name(kmaps, map->type, dso_name); if (curr_map == NULL) { u64 start = sym.st_value; @@ -977,7 +1016,7 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_map = map__new2(start, curr_dso, map->type); if (curr_map == NULL) { - dso__delete(curr_dso); + dso__put(curr_dso); goto out_elf_end; } if (adjust_kernel_syms) { @@ -991,12 +1030,8 @@ int dso__load_sym(struct dso *dso, struct map *map, curr_map->unmap_ip = identity__map_ip; } curr_dso->symtab_type = dso->symtab_type; - map_groups__insert(kmap->kmaps, curr_map); - /* - * The new DSO should go to the kernel DSOS - */ - dsos__add(&map->groups->machine->kernel_dsos, - curr_dso); + map_groups__insert(kmaps, curr_map); + dsos__add(&map->groups->machine->dsos, curr_dso); dso__set_loaded(curr_dso, map->type); } else curr_dso = curr_map->dso; @@ -1045,14 +1080,15 @@ new_symbol: * For misannotated, zeroed, ASM function sizes. */ if (nr > 0) { - symbols__fixup_duplicate(&dso->symbols[map->type]); + if (!symbol_conf.allow_aliases) + symbols__fixup_duplicate(&dso->symbols[map->type]); symbols__fixup_end(&dso->symbols[map->type]); if (kmap) { /* * We need to fixup this here too because we create new * maps here, for things like vsyscall sections. */ - __map_groups__fixup_end(kmap->kmaps, map->type); + __map_groups__fixup_end(kmaps, map->type); } } err = nr; diff --git a/tools/perf/util/symbol-minimal.c b/tools/perf/util/symbol-minimal.c index d7efb03b3f9a..fd8477cacf88 100644 --- a/tools/perf/util/symbol-minimal.c +++ b/tools/perf/util/symbol-minimal.c @@ -246,13 +246,12 @@ out: return ret; } -int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, - const char *name, +int symsrc__init(struct symsrc *ss, struct dso *dso, const char *name, enum dso_binary_type type) { int fd = open(name, O_RDONLY); if (fd < 0) - return -1; + goto out_errno; ss->name = strdup(name); if (!ss->name) @@ -264,6 +263,8 @@ int symsrc__init(struct symsrc *ss, struct dso *dso __maybe_unused, return 0; out_close: close(fd); +out_errno: + dso->load_errno = errno; return -1; } diff --git a/tools/perf/util/symbol.c b/tools/perf/util/symbol.c index a69066865a55..60f11414bb5c 100644 --- a/tools/perf/util/symbol.c +++ b/tools/perf/util/symbol.c @@ -15,6 +15,7 @@ #include "machine.h" #include "symbol.h" #include "strlist.h" +#include "intlist.h" #include "header.h" #include <elf.h> @@ -84,8 +85,17 @@ static int prefix_underscores_count(const char *str) return tail - str; } -#define SYMBOL_A 0 -#define SYMBOL_B 1 +int __weak arch__choose_best_symbol(struct symbol *syma, + struct symbol *symb __maybe_unused) +{ + /* Avoid "SyS" kernel syscall aliases */ + if (strlen(syma->name) >= 3 && !strncmp(syma->name, "SyS", 3)) + return SYMBOL_B; + if (strlen(syma->name) >= 10 && !strncmp(syma->name, "compat_SyS", 10)) + return SYMBOL_B; + + return SYMBOL_A; +} static int choose_best_symbol(struct symbol *syma, struct symbol *symb) { @@ -133,13 +143,7 @@ static int choose_best_symbol(struct symbol *syma, struct symbol *symb) else if (na < nb) return SYMBOL_B; - /* Avoid "SyS" kernel syscall aliases */ - if (na >= 3 && !strncmp(syma->name, "SyS", 3)) - return SYMBOL_B; - if (na >= 10 && !strncmp(syma->name, "compat_SyS", 10)) - return SYMBOL_B; - - return SYMBOL_A; + return arch__choose_best_symbol(syma, symb); } void symbols__fixup_duplicate(struct rb_root *symbols) @@ -198,18 +202,18 @@ void symbols__fixup_end(struct rb_root *symbols) void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) { - struct map *prev, *curr; - struct rb_node *nd, *prevnd = rb_first(&mg->maps[type]); + struct maps *maps = &mg->maps[type]; + struct map *next, *curr; - if (prevnd == NULL) - return; + pthread_rwlock_wrlock(&maps->lock); - curr = rb_entry(prevnd, struct map, rb_node); + curr = maps__first(maps); + if (curr == NULL) + goto out_unlock; - for (nd = rb_next(prevnd); nd; nd = rb_next(nd)) { - prev = curr; - curr = rb_entry(nd, struct map, rb_node); - prev->end = curr->start; + for (next = map__next(curr); next; next = map__next(curr)) { + curr->end = next->start; + curr = next; } /* @@ -217,6 +221,9 @@ void __map_groups__fixup_end(struct map_groups *mg, enum map_type type) * last map final address. */ curr->end = ~0ULL; + +out_unlock: + pthread_rwlock_unlock(&maps->lock); } struct symbol *symbol__new(u64 start, u64 len, u8 binding, const char *name) @@ -396,7 +403,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, const char *name) { struct rb_node *n; - struct symbol_name_rb_node *s; + struct symbol_name_rb_node *s = NULL; if (symbols == NULL) return NULL; @@ -407,7 +414,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, int cmp; s = rb_entry(n, struct symbol_name_rb_node, rb_node); - cmp = strcmp(name, s->sym.name); + cmp = arch__compare_symbol_names(name, s->sym.name); if (cmp < 0) n = n->rb_left; @@ -425,7 +432,7 @@ static struct symbol *symbols__find_by_name(struct rb_root *symbols, struct symbol_name_rb_node *tmp; tmp = rb_entry(n, struct symbol_name_rb_node, rb_node); - if (strcmp(tmp->sym.name, s->sym.name)) + if (arch__compare_symbol_names(tmp->sym.name, s->sym.name)) break; s = tmp; @@ -629,13 +636,16 @@ static int dso__load_all_kallsyms(struct dso *dso, const char *filename, static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); struct map *curr_map; struct symbol *pos; int count = 0, moved = 0; struct rb_root *root = &dso->symbols[map->type]; struct rb_node *next = rb_first(root); + if (!kmaps) + return -1; + while (next) { char *module; @@ -649,14 +659,14 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, curr_map = map_groups__find(kmaps, map->type, pos->start); if (!curr_map || (filter && filter(curr_map, pos))) { - rb_erase(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, root); symbol__delete(pos); } else { pos->start -= curr_map->start - curr_map->pgoff; if (pos->end) pos->end -= curr_map->start - curr_map->pgoff; if (curr_map != map) { - rb_erase(&pos->rb_node, root); + rb_erase_init(&pos->rb_node, root); symbols__insert( &curr_map->dso->symbols[curr_map->type], pos); @@ -681,8 +691,8 @@ static int dso__split_kallsyms_for_kcore(struct dso *dso, struct map *map, static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, symbol_filter_t filter) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct map *curr_map = map; struct symbol *pos; int count = 0, moved = 0; @@ -690,6 +700,11 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, struct rb_node *next = rb_first(root); int kernel_range = 0; + if (!kmaps) + return -1; + + machine = kmaps->machine; + while (next) { char *module; @@ -771,7 +786,7 @@ static int dso__split_kallsyms(struct dso *dso, struct map *map, u64 delta, curr_map = map__new2(pos->start, ndso, map->type); if (curr_map == NULL) { - dso__delete(ndso); + dso__put(ndso); return -1; } @@ -1024,9 +1039,12 @@ static bool filename_from_kallsyms_filename(char *filename, static int validate_kcore_modules(const char *kallsyms_filename, struct map *map) { - struct map_groups *kmaps = map__kmap(map)->kmaps; + struct map_groups *kmaps = map__kmaps(map); char modules_filename[PATH_MAX]; + if (!kmaps) + return -EINVAL; + if (!filename_from_kallsyms_filename(modules_filename, "modules", kallsyms_filename)) return -EINVAL; @@ -1042,6 +1060,9 @@ static int validate_kcore_addresses(const char *kallsyms_filename, { struct kmap *kmap = map__kmap(map); + if (!kmap) + return -EINVAL; + if (kmap->ref_reloc_sym && kmap->ref_reloc_sym->name) { u64 start; @@ -1080,8 +1101,8 @@ static int kcore_mapfn(u64 start, u64 len, u64 pgoff, void *data) static int dso__load_kcore(struct dso *dso, struct map *map, const char *kallsyms_filename) { - struct map_groups *kmaps = map__kmap(map)->kmaps; - struct machine *machine = kmaps->machine; + struct map_groups *kmaps = map__kmaps(map); + struct machine *machine; struct kcore_mapfn_data md; struct map *old_map, *new_map, *replacement_map = NULL; bool is_64_bit; @@ -1089,6 +1110,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, char kcore_filename[PATH_MAX]; struct symbol *sym; + if (!kmaps) + return -EINVAL; + + machine = kmaps->machine; + /* This function requires that the map is the kernel map */ if (map != machine->vmlinux_maps[map->type]) return -EINVAL; @@ -1106,8 +1132,11 @@ static int dso__load_kcore(struct dso *dso, struct map *map, INIT_LIST_HEAD(&md.maps); fd = open(kcore_filename, O_RDONLY); - if (fd < 0) + if (fd < 0) { + pr_err("%s requires CAP_SYS_RAWIO capability to access.\n", + kcore_filename); return -EINVAL; + } /* Read new maps into temporary lists */ err = file__read_maps(fd, md.type == MAP__FUNCTION, kcore_mapfn, &md, @@ -1147,20 +1176,23 @@ static int dso__load_kcore(struct dso *dso, struct map *map, /* Add new maps */ while (!list_empty(&md.maps)) { new_map = list_entry(md.maps.next, struct map, node); - list_del(&new_map->node); + list_del_init(&new_map->node); if (new_map == replacement_map) { map->start = new_map->start; map->end = new_map->end; map->pgoff = new_map->pgoff; map->map_ip = new_map->map_ip; map->unmap_ip = new_map->unmap_ip; - map__delete(new_map); /* Ensure maps are correctly ordered */ + map__get(map); map_groups__remove(kmaps, map); map_groups__insert(kmaps, map); + map__put(map); } else { map_groups__insert(kmaps, new_map); } + + map__put(new_map); } /* @@ -1185,8 +1217,8 @@ static int dso__load_kcore(struct dso *dso, struct map *map, out_err: while (!list_empty(&md.maps)) { map = list_entry(md.maps.next, struct map, node); - list_del(&map->node); - map__delete(map); + list_del_init(&map->node); + map__put(map); } close(fd); return -EINVAL; @@ -1201,6 +1233,9 @@ static int kallsyms__delta(struct map *map, const char *filename, u64 *delta) struct kmap *kmap = map__kmap(map); u64 addr; + if (!kmap) + return -1; + if (!kmap->ref_reloc_sym || !kmap->ref_reloc_sym->name) return 0; @@ -1332,7 +1367,7 @@ static bool dso__is_compatible_symtab_type(struct dso *dso, bool kmod, case DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP: /* * kernel modules know their symtab type - it's set when - * creating a module dso in machine__new_module(). + * creating a module dso in machine__findnew_module_map(). */ return kmod && dso->symtab_type == type; @@ -1357,12 +1392,22 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) struct symsrc *syms_ss = NULL, *runtime_ss = NULL; bool kmod; - dso__set_loaded(dso, map->type); + pthread_mutex_lock(&dso->lock); + + /* check again under the dso->lock */ + if (dso__loaded(dso, map->type)) { + ret = 1; + goto out; + } - if (dso->kernel == DSO_TYPE_KERNEL) - return dso__load_kernel_sym(dso, map, filter); - else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) - return dso__load_guest_kernel_sym(dso, map, filter); + if (dso->kernel) { + if (dso->kernel == DSO_TYPE_KERNEL) + ret = dso__load_kernel_sym(dso, map, filter); + else if (dso->kernel == DSO_TYPE_GUEST_KERNEL) + ret = dso__load_guest_kernel_sym(dso, map, filter); + + goto out; + } if (map->groups && map->groups->machine) machine = map->groups->machine; @@ -1375,18 +1420,18 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) struct stat st; if (lstat(dso->name, &st) < 0) - return -1; + goto out; if (st.st_uid && (st.st_uid != geteuid())) { pr_warning("File %s not owned by current user or root, " "ignoring it.\n", dso->name); - return -1; + goto out; } ret = dso__load_perf_map(dso, map, filter); dso->symtab_type = ret > 0 ? DSO_BINARY_TYPE__JAVA_JIT : DSO_BINARY_TYPE__NOT_FOUND; - return ret; + goto out; } if (machine) @@ -1394,7 +1439,7 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) name = malloc(PATH_MAX); if (!name) - return -1; + goto out; kmod = dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE || dso->symtab_type == DSO_BINARY_TYPE__SYSTEM_PATH_KMODULE_COMP || @@ -1475,23 +1520,32 @@ int dso__load(struct dso *dso, struct map *map, symbol_filter_t filter) out_free: free(name); if (ret < 0 && strstr(dso->name, " (deleted)") != NULL) - return 0; + ret = 0; +out: + dso__set_loaded(dso, map->type); + pthread_mutex_unlock(&dso->lock); + return ret; } struct map *map_groups__find_by_name(struct map_groups *mg, enum map_type type, const char *name) { - struct rb_node *nd; + struct maps *maps = &mg->maps[type]; + struct map *map; - for (nd = rb_first(&mg->maps[type]); nd; nd = rb_next(nd)) { - struct map *map = rb_entry(nd, struct map, rb_node); + pthread_rwlock_rdlock(&maps->lock); + for (map = maps__first(maps); map; map = map__next(map)) { if (map->dso && strcmp(map->dso->short_name, name) == 0) - return map; + goto out_unlock; } - return NULL; + map = NULL; + +out_unlock: + pthread_rwlock_unlock(&maps->lock); + return map; } int dso__load_vmlinux(struct dso *dso, struct map *map, @@ -1779,6 +1833,7 @@ static void vmlinux_path__exit(void) { while (--vmlinux_path__nr_entries >= 0) zfree(&vmlinux_path[vmlinux_path__nr_entries]); + vmlinux_path__nr_entries = 0; zfree(&vmlinux_path); } @@ -1856,6 +1911,22 @@ int setup_list(struct strlist **list, const char *list_str, pr_err("problems parsing %s list\n", list_name); return -1; } + + symbol_conf.has_filter = true; + return 0; +} + +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name) +{ + if (list_str == NULL) + return 0; + + *list = intlist__new(list_str); + if (!*list) { + pr_err("problems parsing %s list\n", list_name); + return -1; + } return 0; } @@ -1909,9 +1980,17 @@ int symbol__init(struct perf_session_env *env) symbol_conf.comm_list_str, "comm") < 0) goto out_free_dso_list; + if (setup_intlist(&symbol_conf.pid_list, + symbol_conf.pid_list_str, "pid") < 0) + goto out_free_comm_list; + + if (setup_intlist(&symbol_conf.tid_list, + symbol_conf.tid_list_str, "tid") < 0) + goto out_free_pid_list; + if (setup_list(&symbol_conf.sym_list, symbol_conf.sym_list_str, "symbol") < 0) - goto out_free_comm_list; + goto out_free_tid_list; /* * A path to symbols of "/" is identical to "" @@ -1930,6 +2009,10 @@ int symbol__init(struct perf_session_env *env) symbol_conf.initialized = true; return 0; +out_free_tid_list: + intlist__delete(symbol_conf.tid_list); +out_free_pid_list: + intlist__delete(symbol_conf.pid_list); out_free_comm_list: strlist__delete(symbol_conf.comm_list); out_free_dso_list: @@ -1944,6 +2027,8 @@ void symbol__exit(void) strlist__delete(symbol_conf.sym_list); strlist__delete(symbol_conf.dso_list); strlist__delete(symbol_conf.comm_list); + intlist__delete(symbol_conf.tid_list); + intlist__delete(symbol_conf.pid_list); vmlinux_path__exit(); symbol_conf.sym_list = symbol_conf.dso_list = symbol_conf.comm_list = NULL; symbol_conf.initialized = false; diff --git a/tools/perf/util/symbol.h b/tools/perf/util/symbol.h index 1650dcb3a67b..b98ce51af142 100644 --- a/tools/perf/util/symbol.h +++ b/tools/perf/util/symbol.h @@ -78,6 +78,7 @@ static inline size_t symbol__size(const struct symbol *sym) } struct strlist; +struct intlist; struct symbol_conf { unsigned short priv_size; @@ -87,6 +88,7 @@ struct symbol_conf { ignore_vmlinux_buildid, show_kernel_path, use_modules, + allow_aliases, sort_by_name, show_nr_samples, show_total_period, @@ -103,7 +105,8 @@ struct symbol_conf { demangle_kernel, filter_relative, show_hist_headers, - branch_callstack; + branch_callstack, + has_filter; const char *vmlinux_name, *kallsyms_name, *source_prefix, @@ -114,6 +117,8 @@ struct symbol_conf { const char *guestmount; const char *dso_list_str, *comm_list_str, + *pid_list_str, + *tid_list_str, *sym_list_str, *col_width_list_str; struct strlist *dso_list, @@ -123,6 +128,8 @@ struct symbol_conf { *dso_to_list, *sym_from_list, *sym_to_list; + struct intlist *pid_list, + *tid_list; const char *symfs; }; @@ -152,8 +159,6 @@ struct ref_reloc_sym { struct map_symbol { struct map *map; struct symbol *sym; - bool unfolded; - bool has_children; }; struct addr_map_symbol { @@ -294,5 +299,17 @@ int compare_proc_modules(const char *from, const char *to); int setup_list(struct strlist **list, const char *list_str, const char *list_name); +int setup_intlist(struct intlist **list, const char *list_str, + const char *list_name); + +#ifdef HAVE_LIBELF_SUPPORT +bool elf__needs_adjust_symbols(GElf_Ehdr ehdr); +void arch__elf_sym_adjust(GElf_Sym *sym); +#endif + +#define SYMBOL_A 0 +#define SYMBOL_B 1 + +int arch__choose_best_symbol(struct symbol *syma, struct symbol *symb); #endif /* __PERF_SYMBOL */ diff --git a/tools/perf/util/target.c b/tools/perf/util/target.c index e74c5963dc7a..a53603b27e52 100644 --- a/tools/perf/util/target.c +++ b/tools/perf/util/target.c @@ -123,11 +123,8 @@ int target__strerror(struct target *target, int errnum, if (errnum >= 0) { const char *err = strerror_r(errnum, buf, buflen); - if (err != buf) { - size_t len = strlen(err); - memcpy(buf, err, min(buflen - 1, len)); - *(buf + min(buflen - 1, len)) = '\0'; - } + if (err != buf) + scnprintf(buf, buflen, "%s", err); return 0; } diff --git a/tools/perf/util/thread-stack.c b/tools/perf/util/thread-stack.c index 9ed59a452d1f..679688e70ae7 100644 --- a/tools/perf/util/thread-stack.c +++ b/tools/perf/util/thread-stack.c @@ -219,7 +219,7 @@ static int thread_stack__call_return(struct thread *thread, return crp->process(&cr, crp->data); } -static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) +static int __thread_stack__flush(struct thread *thread, struct thread_stack *ts) { struct call_return_processor *crp = ts->crp; int err; @@ -242,6 +242,14 @@ static int thread_stack__flush(struct thread *thread, struct thread_stack *ts) return 0; } +int thread_stack__flush(struct thread *thread) +{ + if (thread->ts) + return __thread_stack__flush(thread, thread->ts); + + return 0; +} + int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, u64 to_ip, u16 insn_len, u64 trace_nr) { @@ -264,7 +272,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, */ if (trace_nr != thread->ts->trace_nr) { if (thread->ts->trace_nr) - thread_stack__flush(thread, thread->ts); + __thread_stack__flush(thread, thread->ts); thread->ts->trace_nr = trace_nr; } @@ -297,7 +305,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) if (trace_nr != thread->ts->trace_nr) { if (thread->ts->trace_nr) - thread_stack__flush(thread, thread->ts); + __thread_stack__flush(thread, thread->ts); thread->ts->trace_nr = trace_nr; } } @@ -305,7 +313,7 @@ void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr) void thread_stack__free(struct thread *thread) { if (thread->ts) { - thread_stack__flush(thread, thread->ts); + __thread_stack__flush(thread, thread->ts); zfree(&thread->ts->stack); zfree(&thread->ts); } @@ -689,7 +697,7 @@ int thread_stack__process(struct thread *thread, struct comm *comm, /* Flush stack on exec */ if (ts->comm != comm && thread->pid_ == thread->tid) { - err = thread_stack__flush(thread, ts); + err = __thread_stack__flush(thread, ts); if (err) return err; ts->comm = comm; diff --git a/tools/perf/util/thread-stack.h b/tools/perf/util/thread-stack.h index b843bbef8ba2..e1528f1374c3 100644 --- a/tools/perf/util/thread-stack.h +++ b/tools/perf/util/thread-stack.h @@ -96,6 +96,7 @@ int thread_stack__event(struct thread *thread, u32 flags, u64 from_ip, void thread_stack__set_trace_nr(struct thread *thread, u64 trace_nr); void thread_stack__sample(struct thread *thread, struct ip_callchain *chain, size_t sz, u64 ip); +int thread_stack__flush(struct thread *thread); void thread_stack__free(struct thread *thread); struct call_return_processor * diff --git a/tools/perf/util/thread.c b/tools/perf/util/thread.c index 9ebc8b1f9be5..28c4b746baa1 100644 --- a/tools/perf/util/thread.c +++ b/tools/perf/util/thread.c @@ -18,7 +18,7 @@ int thread__init_map_groups(struct thread *thread, struct machine *machine) if (pid == thread->tid || pid == -1) { thread->mg = map_groups__new(machine); } else { - leader = machine__findnew_thread(machine, pid, pid); + leader = __machine__findnew_thread(machine, pid, pid); if (leader) thread->mg = map_groups__get(leader->mg); } @@ -53,7 +53,8 @@ struct thread *thread__new(pid_t pid, pid_t tid) goto err_thread; list_add(&comm->list, &thread->comm_list); - + atomic_set(&thread->refcnt, 0); + RB_CLEAR_NODE(&thread->rb_node); } return thread; @@ -67,6 +68,8 @@ void thread__delete(struct thread *thread) { struct comm *comm, *tmp; + BUG_ON(!RB_EMPTY_NODE(&thread->rb_node)); + thread_stack__free(thread); if (thread->mg) { @@ -82,6 +85,21 @@ void thread__delete(struct thread *thread) free(thread); } +struct thread *thread__get(struct thread *thread) +{ + if (thread) + atomic_inc(&thread->refcnt); + return thread; +} + +void thread__put(struct thread *thread) +{ + if (thread && atomic_dec_and_test(&thread->refcnt)) { + list_del_init(&thread->node); + thread__delete(thread); + } +} + struct comm *thread__comm(const struct thread *thread) { if (list_empty(&thread->comm_list)) @@ -192,7 +210,6 @@ int thread__fork(struct thread *thread, struct thread *parent, u64 timestamp) err = thread__set_comm(thread, comm, timestamp); if (err) return err; - thread->comm_set = true; } thread->ppid = parent->tid; diff --git a/tools/perf/util/thread.h b/tools/perf/util/thread.h index 160fd066a7d1..a0ac0317affb 100644 --- a/tools/perf/util/thread.h +++ b/tools/perf/util/thread.h @@ -1,12 +1,14 @@ #ifndef __PERF_THREAD_H #define __PERF_THREAD_H +#include <linux/atomic.h> #include <linux/rbtree.h> #include <linux/list.h> #include <unistd.h> #include <sys/types.h> #include "symbol.h" #include <strlist.h> +#include <intlist.h> struct thread_stack; @@ -20,11 +22,12 @@ struct thread { pid_t tid; pid_t ppid; int cpu; + atomic_t refcnt; char shortname[3]; bool comm_set; + int comm_len; bool dead; /* if set thread has exited */ struct list_head comm_list; - int comm_len; u64 db_id; void *priv; @@ -37,6 +40,18 @@ struct comm; struct thread *thread__new(pid_t pid, pid_t tid); int thread__init_map_groups(struct thread *thread, struct machine *machine); void thread__delete(struct thread *thread); + +struct thread *thread__get(struct thread *thread); +void thread__put(struct thread *thread); + +static inline void __thread__zput(struct thread **thread) +{ + thread__put(*thread); + *thread = NULL; +} + +#define thread__zput(thread) __thread__zput(&thread) + static inline void thread__exited(struct thread *thread) { thread->dead = true; @@ -87,6 +102,16 @@ static inline bool thread__is_filtered(struct thread *thread) return true; } + if (symbol_conf.pid_list && + !intlist__has_entry(symbol_conf.pid_list, thread->pid_)) { + return true; + } + + if (symbol_conf.tid_list && + !intlist__has_entry(symbol_conf.tid_list, thread->tid)) { + return true; + } + return false; } diff --git a/tools/perf/util/thread_map.c b/tools/perf/util/thread_map.c index f93b9734735b..292ae2c90e06 100644 --- a/tools/perf/util/thread_map.c +++ b/tools/perf/util/thread_map.c @@ -8,8 +8,11 @@ #include <unistd.h> #include "strlist.h" #include <string.h> +#include <api/fs/fs.h> +#include "asm/bug.h" #include "thread_map.h" #include "util.h" +#include "debug.h" /* Skip "." and ".." directories */ static int filter(const struct dirent *dir) @@ -20,6 +23,30 @@ static int filter(const struct dirent *dir) return 1; } +static void thread_map__reset(struct thread_map *map, int start, int nr) +{ + size_t size = (nr - start) * sizeof(map->map[0]); + + memset(&map->map[start], 0, size); +} + +static struct thread_map *thread_map__realloc(struct thread_map *map, int nr) +{ + size_t size = sizeof(*map) + sizeof(map->map[0]) * nr; + int start = map ? map->nr : 0; + + map = realloc(map, size); + /* + * We only realloc to add more items, let's reset new items. + */ + if (map) + thread_map__reset(map, start, nr); + + return map; +} + +#define thread_map__alloc(__nr) thread_map__realloc(NULL, __nr) + struct thread_map *thread_map__new_by_pid(pid_t pid) { struct thread_map *threads; @@ -33,11 +60,12 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) if (items <= 0) return NULL; - threads = malloc(sizeof(*threads) + sizeof(pid_t) * items); + threads = thread_map__alloc(items); if (threads != NULL) { for (i = 0; i < items; i++) - threads->map[i] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, i, atoi(namelist[i]->d_name)); threads->nr = items; + atomic_set(&threads->refcnt, 1); } for (i=0; i<items; i++) @@ -49,11 +77,12 @@ struct thread_map *thread_map__new_by_pid(pid_t pid) struct thread_map *thread_map__new_by_tid(pid_t tid) { - struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t)); + struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = tid; - threads->nr = 1; + thread_map__set_pid(threads, 0, tid); + threads->nr = 1; + atomic_set(&threads->refcnt, 1); } return threads; @@ -65,8 +94,8 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) int max_threads = 32, items, i; char path[256]; struct dirent dirent, *next, **namelist = NULL; - struct thread_map *threads = malloc(sizeof(*threads) + - max_threads * sizeof(pid_t)); + struct thread_map *threads = thread_map__alloc(max_threads); + if (threads == NULL) goto out; @@ -75,6 +104,7 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) goto out_free_threads; threads->nr = 0; + atomic_set(&threads->refcnt, 1); while (!readdir_r(proc, &dirent, &next) && next) { char *end; @@ -106,16 +136,17 @@ struct thread_map *thread_map__new_by_uid(uid_t uid) if (grow) { struct thread_map *tmp; - tmp = realloc(threads, (sizeof(*threads) + - max_threads * sizeof(pid_t))); + tmp = thread_map__realloc(threads, max_threads); if (tmp == NULL) goto out_free_namelist; threads = tmp; } - for (i = 0; i < items; i++) - threads->map[threads->nr + i] = atoi(namelist[i]->d_name); + for (i = 0; i < items; i++) { + thread_map__set_pid(threads, threads->nr + i, + atoi(namelist[i]->d_name)); + } for (i = 0; i < items; i++) zfree(&namelist[i]); @@ -185,15 +216,14 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) goto out_free_threads; total_tasks += items; - nt = realloc(threads, (sizeof(*threads) + - sizeof(pid_t) * total_tasks)); + nt = thread_map__realloc(threads, total_tasks); if (nt == NULL) goto out_free_namelist; threads = nt; for (i = 0; i < items; i++) { - threads->map[j++] = atoi(namelist[i]->d_name); + thread_map__set_pid(threads, j++, atoi(namelist[i]->d_name)); zfree(&namelist[i]); } threads->nr = total_tasks; @@ -202,6 +232,8 @@ static struct thread_map *thread_map__new_by_pid_str(const char *pid_str) out: strlist__delete(slist); + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_namelist: @@ -216,11 +248,12 @@ out_free_threads: struct thread_map *thread_map__new_dummy(void) { - struct thread_map *threads = malloc(sizeof(*threads) + sizeof(pid_t)); + struct thread_map *threads = thread_map__alloc(1); if (threads != NULL) { - threads->map[0] = -1; - threads->nr = 1; + thread_map__set_pid(threads, 0, -1); + threads->nr = 1; + atomic_set(&threads->refcnt, 1); } return threads; } @@ -253,16 +286,18 @@ static struct thread_map *thread_map__new_by_tid_str(const char *tid_str) continue; ntasks++; - nt = realloc(threads, sizeof(*threads) + sizeof(pid_t) * ntasks); + nt = thread_map__realloc(threads, ntasks); if (nt == NULL) goto out_free_threads; threads = nt; - threads->map[ntasks - 1] = tid; - threads->nr = ntasks; + thread_map__set_pid(threads, ntasks - 1, tid); + threads->nr = ntasks; } out: + if (threads) + atomic_set(&threads->refcnt, 1); return threads; out_free_threads: @@ -282,9 +317,30 @@ struct thread_map *thread_map__new_str(const char *pid, const char *tid, return thread_map__new_by_tid_str(tid); } -void thread_map__delete(struct thread_map *threads) +static void thread_map__delete(struct thread_map *threads) { - free(threads); + if (threads) { + int i; + + WARN_ONCE(atomic_read(&threads->refcnt) != 0, + "thread map refcnt unbalanced\n"); + for (i = 0; i < threads->nr; i++) + free(thread_map__comm(threads, i)); + free(threads); + } +} + +struct thread_map *thread_map__get(struct thread_map *map) +{ + if (map) + atomic_inc(&map->refcnt); + return map; +} + +void thread_map__put(struct thread_map *map) +{ + if (map && atomic_dec_and_test(&map->refcnt)) + thread_map__delete(map); } size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) @@ -293,7 +349,60 @@ size_t thread_map__fprintf(struct thread_map *threads, FILE *fp) size_t printed = fprintf(fp, "%d thread%s: ", threads->nr, threads->nr > 1 ? "s" : ""); for (i = 0; i < threads->nr; ++i) - printed += fprintf(fp, "%s%d", i ? ", " : "", threads->map[i]); + printed += fprintf(fp, "%s%d", i ? ", " : "", thread_map__pid(threads, i)); return printed + fprintf(fp, "\n"); } + +static int get_comm(char **comm, pid_t pid) +{ + char *path; + size_t size; + int err; + + if (asprintf(&path, "%s/%d/comm", procfs__mountpoint(), pid) == -1) + return -ENOMEM; + + err = filename__read_str(path, comm, &size); + if (!err) { + /* + * We're reading 16 bytes, while filename__read_str + * allocates data per BUFSIZ bytes, so we can safely + * mark the end of the string. + */ + (*comm)[size] = 0; + rtrim(*comm); + } + + free(path); + return err; +} + +static void comm_init(struct thread_map *map, int i) +{ + pid_t pid = thread_map__pid(map, i); + char *comm = NULL; + + /* dummy pid comm initialization */ + if (pid == -1) { + map->map[i].comm = strdup("dummy"); + return; + } + + /* + * The comm name is like extra bonus ;-), + * so just warn if we fail for any reason. + */ + if (get_comm(&comm, pid)) + pr_warning("Couldn't resolve comm name for pid %d\n", pid); + + map->map[i].comm = comm; +} + +void thread_map__read_comms(struct thread_map *threads) +{ + int i; + + for (i = 0; i < threads->nr; ++i) + comm_init(threads, i); +} diff --git a/tools/perf/util/thread_map.h b/tools/perf/util/thread_map.h index 95313f43cc0f..af679d8a50f8 100644 --- a/tools/perf/util/thread_map.h +++ b/tools/perf/util/thread_map.h @@ -3,10 +3,17 @@ #include <sys/types.h> #include <stdio.h> +#include <linux/atomic.h> + +struct thread_map_data { + pid_t pid; + char *comm; +}; struct thread_map { + atomic_t refcnt; int nr; - pid_t map[]; + struct thread_map_data map[]; }; struct thread_map *thread_map__new_dummy(void); @@ -15,11 +22,12 @@ struct thread_map *thread_map__new_by_tid(pid_t tid); struct thread_map *thread_map__new_by_uid(uid_t uid); struct thread_map *thread_map__new(pid_t pid, pid_t tid, uid_t uid); +struct thread_map *thread_map__get(struct thread_map *map); +void thread_map__put(struct thread_map *map); + struct thread_map *thread_map__new_str(const char *pid, const char *tid, uid_t uid); -void thread_map__delete(struct thread_map *threads); - size_t thread_map__fprintf(struct thread_map *threads, FILE *fp); static inline int thread_map__nr(struct thread_map *threads) @@ -27,4 +35,21 @@ static inline int thread_map__nr(struct thread_map *threads) return threads ? threads->nr : 1; } +static inline pid_t thread_map__pid(struct thread_map *map, int thread) +{ + return map->map[thread].pid; +} + +static inline void +thread_map__set_pid(struct thread_map *map, int thread, pid_t pid) +{ + map->map[thread].pid = pid; +} + +static inline char *thread_map__comm(struct thread_map *map, int thread) +{ + return map->map[thread].comm; +} + +void thread_map__read_comms(struct thread_map *threads); #endif /* __PERF_THREAD_MAP_H */ diff --git a/tools/perf/util/tool.h b/tools/perf/util/tool.h index bb2708bbfaca..c307dd438286 100644 --- a/tools/perf/util/tool.h +++ b/tools/perf/util/tool.h @@ -3,6 +3,8 @@ #include <stdbool.h> +#include <linux/types.h> + struct perf_session; union perf_event; struct perf_evlist; @@ -10,6 +12,7 @@ struct perf_evsel; struct perf_sample; struct perf_tool; struct machine; +struct ordered_events; typedef int (*event_sample)(struct perf_tool *tool, union perf_event *event, struct perf_sample *sample, @@ -25,6 +28,12 @@ typedef int (*event_attr_op)(struct perf_tool *tool, typedef int (*event_op2)(struct perf_tool *tool, union perf_event *event, struct perf_session *session); +typedef int (*event_oe)(struct perf_tool *tool, union perf_event *event, + struct ordered_events *oe); + +typedef s64 (*event_op3)(struct perf_tool *tool, union perf_event *event, + struct perf_session *session); + struct perf_tool { event_sample sample, read; @@ -34,13 +43,19 @@ struct perf_tool { fork, exit, lost, + lost_samples, + aux, + itrace_start, throttle, unthrottle; event_attr_op attr; event_op2 tracing_data; - event_op2 finished_round, - build_id, - id_index; + event_oe finished_round; + event_op2 build_id, + id_index, + auxtrace_info, + auxtrace_error; + event_op3 auxtrace; bool ordered_events; bool ordering_requires_timestamps; }; diff --git a/tools/perf/util/trace-event-parse.c b/tools/perf/util/trace-event-parse.c index c36636fd825b..d4957418657e 100644 --- a/tools/perf/util/trace-event-parse.c +++ b/tools/perf/util/trace-event-parse.c @@ -112,8 +112,8 @@ unsigned long long read_size(struct event_format *event, void *ptr, int size) return pevent_read_number(event->pevent, ptr, size); } -void event_format__print(struct event_format *event, - int cpu, void *data, int size) +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp) { struct pevent_record record; struct trace_seq s; @@ -125,10 +125,16 @@ void event_format__print(struct event_format *event, trace_seq_init(&s); pevent_event_info(&s, event, &record); - trace_seq_do_printf(&s); + trace_seq_do_fprintf(&s, fp); trace_seq_destroy(&s); } +void event_format__print(struct event_format *event, + int cpu, void *data, int size) +{ + return event_format__fprintf(event, cpu, data, size, stdout); +} + void parse_proc_kallsyms(struct pevent *pevent, char *file, unsigned int size __maybe_unused) { @@ -167,7 +173,7 @@ void parse_ftrace_printk(struct pevent *pevent, char *line; char *next = NULL; char *addr_str; - char *fmt; + char *fmt = NULL; line = strtok_r(file, "\n", &next); while (line) { diff --git a/tools/perf/util/trace-event-scripting.c b/tools/perf/util/trace-event-scripting.c index 5c9bdd1591a9..9df61059a85d 100644 --- a/tools/perf/util/trace-event-scripting.c +++ b/tools/perf/util/trace-event-scripting.c @@ -43,7 +43,6 @@ static int stop_script_unsupported(void) static void process_event_unsupported(union perf_event *event __maybe_unused, struct perf_sample *sample __maybe_unused, struct perf_evsel *evsel __maybe_unused, - struct thread *thread __maybe_unused, struct addr_location *al __maybe_unused) { } diff --git a/tools/perf/util/trace-event.h b/tools/perf/util/trace-event.h index 52aaa19e1eb1..d5168f0be4ec 100644 --- a/tools/perf/util/trace-event.h +++ b/tools/perf/util/trace-event.h @@ -23,6 +23,9 @@ trace_event__tp_format(const char *sys, const char *name); int bigendian(void); +void event_format__fprintf(struct event_format *event, + int cpu, void *data, int size, FILE *fp); + void event_format__print(struct event_format *event, int cpu, void *data, int size); @@ -69,8 +72,7 @@ struct scripting_ops { void (*process_event) (union perf_event *event, struct perf_sample *sample, struct perf_evsel *evsel, - struct thread *thread, - struct addr_location *al); + struct addr_location *al); int (*generate_script) (struct pevent *pevent, const char *outfile); }; diff --git a/tools/perf/util/unwind-libunwind.c b/tools/perf/util/unwind-libunwind.c index e3c40a520a25..4c00507ee3fd 100644 --- a/tools/perf/util/unwind-libunwind.c +++ b/tools/perf/util/unwind-libunwind.c @@ -266,16 +266,17 @@ static int read_unwind_spec_eh_frame(struct dso *dso, struct machine *machine, u64 *fde_count) { int ret = -EINVAL, fd; - u64 offset = dso->data.frame_offset; + u64 offset = dso->data.eh_frame_hdr_offset; if (offset == 0) { - fd = dso__data_fd(dso, machine); + fd = dso__data_get_fd(dso, machine); if (fd < 0) return -EINVAL; /* Check the .eh_frame section for unwinding info */ offset = elf_section_offset(fd, ".eh_frame_hdr"); - dso->data.frame_offset = offset; + dso->data.eh_frame_hdr_offset = offset; + dso__data_put_fd(dso); } if (offset) @@ -291,16 +292,17 @@ static int read_unwind_spec_debug_frame(struct dso *dso, struct machine *machine, u64 *offset) { int fd; - u64 ofs = dso->data.frame_offset; + u64 ofs = dso->data.debug_frame_offset; if (ofs == 0) { - fd = dso__data_fd(dso, machine); + fd = dso__data_get_fd(dso, machine); if (fd < 0) return -EINVAL; /* Check the .debug_frame section for unwinding info */ ofs = elf_section_offset(fd, ".debug_frame"); - dso->data.frame_offset = ofs; + dso->data.debug_frame_offset = ofs; + dso__data_put_fd(dso); } *offset = ofs; @@ -353,10 +355,13 @@ find_proc_info(unw_addr_space_t as, unw_word_t ip, unw_proc_info_t *pi, #ifndef NO_LIBUNWIND_DEBUG_FRAME /* Check the .debug_frame section for unwinding info */ if (!read_unwind_spec_debug_frame(map->dso, ui->machine, &segbase)) { - int fd = dso__data_fd(map->dso, ui->machine); + int fd = dso__data_get_fd(map->dso, ui->machine); int is_exec = elf_is_exec(fd, map->dso->name); unw_word_t base = is_exec ? 0 : map->start; + if (fd >= 0) + dso__data_put_fd(map->dso); + memset(&di, 0, sizeof(di)); if (dwarf_find_debug_frame(0, &di, ip, base, map->dso->name, map->start, map->end)) diff --git a/tools/perf/util/util.c b/tools/perf/util/util.c index b86744f29eef..edc2d633b332 100644 --- a/tools/perf/util/util.c +++ b/tools/perf/util/util.c @@ -72,20 +72,60 @@ int mkdir_p(char *path, mode_t mode) return (stat(path, &st) && mkdir(path, mode)) ? -1 : 0; } -static int slow_copyfile(const char *from, const char *to, mode_t mode) +int rm_rf(char *path) +{ + DIR *dir; + int ret = 0; + struct dirent *d; + char namebuf[PATH_MAX]; + + dir = opendir(path); + if (dir == NULL) + return 0; + + while ((d = readdir(dir)) != NULL && !ret) { + struct stat statbuf; + + if (!strcmp(d->d_name, ".") || !strcmp(d->d_name, "..")) + continue; + + scnprintf(namebuf, sizeof(namebuf), "%s/%s", + path, d->d_name); + + ret = stat(namebuf, &statbuf); + if (ret < 0) { + pr_debug("stat failed: %s\n", namebuf); + break; + } + + if (S_ISREG(statbuf.st_mode)) + ret = unlink(namebuf); + else if (S_ISDIR(statbuf.st_mode)) + ret = rm_rf(namebuf); + else { + pr_debug("unknown file: %s\n", namebuf); + ret = -1; + } + } + closedir(dir); + + if (ret < 0) + return ret; + + return rmdir(path); +} + +static int slow_copyfile(const char *from, const char *to) { int err = -1; char *line = NULL; size_t n; FILE *from_fp = fopen(from, "r"), *to_fp; - mode_t old_umask; if (from_fp == NULL) goto out; - old_umask = umask(mode ^ 0777); to_fp = fopen(to, "w"); - umask(old_umask); if (to_fp == NULL) goto out_fclose_from; @@ -102,42 +142,81 @@ out: return err; } +int copyfile_offset(int ifd, loff_t off_in, int ofd, loff_t off_out, u64 size) +{ + void *ptr; + loff_t pgoff; + + pgoff = off_in & ~(page_size - 1); + off_in -= pgoff; + + ptr = mmap(NULL, off_in + size, PROT_READ, MAP_PRIVATE, ifd, pgoff); + if (ptr == MAP_FAILED) + return -1; + + while (size) { + ssize_t ret = pwrite(ofd, ptr + off_in, size, off_out); + if (ret < 0 && errno == EINTR) + continue; + if (ret <= 0) + break; + + size -= ret; + off_in += ret; + off_out -= ret; + } + munmap(ptr, off_in + size); + + return size ? -1 : 0; +} + int copyfile_mode(const char *from, const char *to, mode_t mode) { int fromfd, tofd; struct stat st; - void *addr; int err = -1; + char *tmp = NULL, *ptr = NULL; if (stat(from, &st)) goto out; - if (st.st_size == 0) /* /proc? do it slowly... */ - return slow_copyfile(from, to, mode); - - fromfd = open(from, O_RDONLY); - if (fromfd < 0) + /* extra 'x' at the end is to reserve space for '.' */ + if (asprintf(&tmp, "%s.XXXXXXx", to) < 0) { + tmp = NULL; goto out; + } + ptr = strrchr(tmp, '/'); + if (!ptr) + goto out; + ptr = memmove(ptr + 1, ptr, strlen(ptr) - 1); + *ptr = '.'; - tofd = creat(to, mode); + tofd = mkstemp(tmp); if (tofd < 0) - goto out_close_from; + goto out; - addr = mmap(NULL, st.st_size, PROT_READ, MAP_PRIVATE, fromfd, 0); - if (addr == MAP_FAILED) + if (fchmod(tofd, mode)) goto out_close_to; - if (write(tofd, addr, st.st_size) == st.st_size) - err = 0; + if (st.st_size == 0) { /* /proc? do it slowly... */ + err = slow_copyfile(from, tmp); + goto out_close_to; + } + + fromfd = open(from, O_RDONLY); + if (fromfd < 0) + goto out_close_to; + + err = copyfile_offset(fromfd, 0, tofd, 0, st.st_size); - munmap(addr, st.st_size); + close(fromfd); out_close_to: close(tofd); - if (err) - unlink(to); -out_close_from: - close(fromfd); + if (!err) + err = link(tmp, to); + unlink(tmp); out: + free(tmp); return err; } @@ -269,6 +348,13 @@ void dump_stack(void) void dump_stack(void) {} #endif +void sighandler_dump_stack(int sig) +{ + psignal(sig, "perf"); + dump_stack(); + exit(sig); +} + void get_term_dimensions(struct winsize *ws) { char *s = getenv("LINES"); @@ -303,13 +389,26 @@ void set_term_quiet_input(struct termios *old) tcsetattr(0, TCSANOW, &tc); } -static void set_tracing_events_path(const char *mountpoint) +static void set_tracing_events_path(const char *tracing, const char *mountpoint) { - snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s", - mountpoint, "tracing/events"); + snprintf(tracing_events_path, sizeof(tracing_events_path), "%s/%s%s", + mountpoint, tracing, "events"); } -const char *perf_debugfs_mount(const char *mountpoint) +static const char *__perf_tracefs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = tracefs_mount(mountpoint); + if (!mnt) + return NULL; + + set_tracing_events_path("", mnt); + + return mnt; +} + +static const char *__perf_debugfs_mount(const char *mountpoint) { const char *mnt; @@ -317,7 +416,20 @@ const char *perf_debugfs_mount(const char *mountpoint) if (!mnt) return NULL; - set_tracing_events_path(mnt); + set_tracing_events_path("tracing/", mnt); + + return mnt; +} + +const char *perf_debugfs_mount(const char *mountpoint) +{ + const char *mnt; + + mnt = __perf_tracefs_mount(mountpoint); + if (mnt) + return mnt; + + mnt = __perf_debugfs_mount(mountpoint); return mnt; } @@ -325,12 +437,19 @@ const char *perf_debugfs_mount(const char *mountpoint) void perf_debugfs_set_path(const char *mntpt) { snprintf(debugfs_mountpoint, strlen(debugfs_mountpoint), "%s", mntpt); - set_tracing_events_path(mntpt); + set_tracing_events_path("tracing/", mntpt); +} + +static const char *find_tracefs(void) +{ + const char *path = __perf_tracefs_mount(NULL); + + return path; } static const char *find_debugfs(void) { - const char *path = perf_debugfs_mount(NULL); + const char *path = __perf_debugfs_mount(NULL); if (!path) fprintf(stderr, "Your kernel does not support the debugfs filesystem"); @@ -344,6 +463,7 @@ static const char *find_debugfs(void) */ const char *find_tracing_dir(void) { + const char *tracing_dir = ""; static char *tracing; static int tracing_found; const char *debugfs; @@ -351,11 +471,15 @@ const char *find_tracing_dir(void) if (tracing_found) return tracing; - debugfs = find_debugfs(); - if (!debugfs) - return NULL; + debugfs = find_tracefs(); + if (!debugfs) { + tracing_dir = "/tracing"; + debugfs = find_debugfs(); + if (!debugfs) + return NULL; + } - if (asprintf(&tracing, "%s/tracing", debugfs) < 0) + if (asprintf(&tracing, "%s%s", debugfs, tracing_dir) < 0) return NULL; tracing_found = 1; diff --git a/tools/perf/util/util.h b/tools/perf/util/util.h index 027a5153495c..8bce58b47a82 100644 --- a/tools/perf/util/util.h +++ b/tools/perf/util/util.h @@ -75,6 +75,7 @@ #include <linux/types.h> #include <sys/ttydefaults.h> #include <api/fs/debugfs.h> +#include <api/fs/tracefs.h> #include <termios.h> #include <linux/bitops.h> #include <termios.h> @@ -248,14 +249,20 @@ static inline int sane_case(int x, int high) } int mkdir_p(char *path, mode_t mode); +int rm_rf(char *path); int copyfile(const char *from, const char *to); int copyfile_mode(const char *from, const char *to, mode_t mode); +int copyfile_offset(int fromfd, loff_t from_ofs, int tofd, loff_t to_ofs, u64 size); s64 perf_atoll(const char *str); char **argv_split(const char *str, int *argcp); void argv_free(char **argv); bool strglobmatch(const char *str, const char *pat); bool strlazymatch(const char *str, const char *pat); +static inline bool strisglob(const char *str) +{ + return strpbrk(str, "*?[") != NULL; +} int strtailcmp(const char *s1, const char *s2); char *strxfrchar(char *s, char from, char to); unsigned long convert_unit(unsigned long value, char *unit); @@ -276,6 +283,7 @@ char *ltrim(char *s); char *rtrim(char *s); void dump_stack(void); +void sighandler_dump_stack(int sig); extern unsigned int page_size; extern int cacheline_size; @@ -327,4 +335,8 @@ bool find_process(const char *name); int gzip_decompress_to_file(const char *input, int output_fd); #endif +#ifdef HAVE_LZMA_SUPPORT +int lzma_decompress_to_file(const char *input, int output_fd); +#endif + #endif /* GIT_COMPAT_UTIL_H */ diff --git a/tools/perf/util/vdso.c b/tools/perf/util/vdso.c index 5c7dd796979d..44d440da15dc 100644 --- a/tools/perf/util/vdso.c +++ b/tools/perf/util/vdso.c @@ -101,7 +101,7 @@ static char *get_file(struct vdso_file *vdso_file) return vdso; } -void vdso__exit(struct machine *machine) +void machine__exit_vdso(struct machine *machine) { struct vdso_info *vdso_info = machine->vdso_info; @@ -120,14 +120,14 @@ void vdso__exit(struct machine *machine) zfree(&machine->vdso_info); } -static struct dso *vdso__new(struct machine *machine, const char *short_name, - const char *long_name) +static struct dso *__machine__addnew_vdso(struct machine *machine, const char *short_name, + const char *long_name) { struct dso *dso; dso = dso__new(short_name); if (dso != NULL) { - dsos__add(&machine->user_dsos, dso); + __dsos__add(&machine->dsos, dso); dso__set_long_name(dso, long_name, false); } @@ -230,27 +230,29 @@ static const char *vdso__get_compat_file(struct vdso_file *vdso_file) return vdso_file->temp_file_name; } -static struct dso *vdso__findnew_compat(struct machine *machine, - struct vdso_file *vdso_file) +static struct dso *__machine__findnew_compat(struct machine *machine, + struct vdso_file *vdso_file) { const char *file_name; struct dso *dso; - dso = dsos__find(&machine->user_dsos, vdso_file->dso_name, true); + dso = __dsos__find(&machine->dsos, vdso_file->dso_name, true); if (dso) - return dso; + goto out; file_name = vdso__get_compat_file(vdso_file); if (!file_name) - return NULL; + goto out; - return vdso__new(machine, vdso_file->dso_name, file_name); + dso = __machine__addnew_vdso(machine, vdso_file->dso_name, file_name); +out: + return dso; } -static int vdso__dso_findnew_compat(struct machine *machine, - struct thread *thread, - struct vdso_info *vdso_info, - struct dso **dso) +static int __machine__findnew_vdso_compat(struct machine *machine, + struct thread *thread, + struct vdso_info *vdso_info, + struct dso **dso) { enum dso_type dso_type; @@ -267,10 +269,10 @@ static int vdso__dso_findnew_compat(struct machine *machine, switch (dso_type) { case DSO__TYPE_32BIT: - *dso = vdso__findnew_compat(machine, &vdso_info->vdso32); + *dso = __machine__findnew_compat(machine, &vdso_info->vdso32); return 1; case DSO__TYPE_X32BIT: - *dso = vdso__findnew_compat(machine, &vdso_info->vdsox32); + *dso = __machine__findnew_compat(machine, &vdso_info->vdsox32); return 1; case DSO__TYPE_UNKNOWN: case DSO__TYPE_64BIT: @@ -281,35 +283,37 @@ static int vdso__dso_findnew_compat(struct machine *machine, #endif -struct dso *vdso__dso_findnew(struct machine *machine, - struct thread *thread __maybe_unused) +struct dso *machine__findnew_vdso(struct machine *machine, + struct thread *thread __maybe_unused) { struct vdso_info *vdso_info; - struct dso *dso; + struct dso *dso = NULL; + pthread_rwlock_wrlock(&machine->dsos.lock); if (!machine->vdso_info) machine->vdso_info = vdso_info__new(); vdso_info = machine->vdso_info; if (!vdso_info) - return NULL; + goto out_unlock; #if BITS_PER_LONG == 64 - if (vdso__dso_findnew_compat(machine, thread, vdso_info, &dso)) - return dso; + if (__machine__findnew_vdso_compat(machine, thread, vdso_info, &dso)) + goto out_unlock; #endif - dso = dsos__find(&machine->user_dsos, DSO__NAME_VDSO, true); + dso = __dsos__find(&machine->dsos, DSO__NAME_VDSO, true); if (!dso) { char *file; file = get_file(&vdso_info->vdso); - if (!file) - return NULL; - - dso = vdso__new(machine, DSO__NAME_VDSO, file); + if (file) + dso = __machine__addnew_vdso(machine, DSO__NAME_VDSO, file); } +out_unlock: + dso__get(dso); + pthread_rwlock_unlock(&machine->dsos.lock); return dso; } diff --git a/tools/perf/util/vdso.h b/tools/perf/util/vdso.h index d97da1616f0c..cdc4fabfc212 100644 --- a/tools/perf/util/vdso.h +++ b/tools/perf/util/vdso.h @@ -23,7 +23,7 @@ bool dso__is_vdso(struct dso *dso); struct machine; struct thread; -struct dso *vdso__dso_findnew(struct machine *machine, struct thread *thread); -void vdso__exit(struct machine *machine); +struct dso *machine__findnew_vdso(struct machine *machine, struct thread *thread); +void machine__exit_vdso(struct machine *machine); #endif /* __PERF_VDSO__ */ diff --git a/tools/perf/util/xyarray.c b/tools/perf/util/xyarray.c index 22afbf6c536a..c10ba41ef3f6 100644 --- a/tools/perf/util/xyarray.c +++ b/tools/perf/util/xyarray.c @@ -9,11 +9,19 @@ struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size) if (xy != NULL) { xy->entry_size = entry_size; xy->row_size = row_size; + xy->entries = xlen * ylen; } return xy; } +void xyarray__reset(struct xyarray *xy) +{ + size_t n = xy->entries * xy->entry_size; + + memset(xy->contents, 0, n); +} + void xyarray__delete(struct xyarray *xy) { free(xy); diff --git a/tools/perf/util/xyarray.h b/tools/perf/util/xyarray.h index c488a07275dd..7f30af371b7e 100644 --- a/tools/perf/util/xyarray.h +++ b/tools/perf/util/xyarray.h @@ -6,11 +6,13 @@ struct xyarray { size_t row_size; size_t entry_size; + size_t entries; char contents[]; }; struct xyarray *xyarray__new(int xlen, int ylen, size_t entry_size); void xyarray__delete(struct xyarray *xy); +void xyarray__reset(struct xyarray *xy); static inline void *xyarray__entry(struct xyarray *xy, int x, int y) { diff --git a/tools/power/acpi/common/getopt.c b/tools/power/acpi/common/getopt.c index 5da129e10aa2..326e826a5d20 100644 --- a/tools/power/acpi/common/getopt.c +++ b/tools/power/acpi/common/getopt.c @@ -127,7 +127,7 @@ int acpi_getopt(int argc, char **argv, char *opts) argv[acpi_gbl_optind][0] != '-' || argv[acpi_gbl_optind][1] == '\0') { return (ACPI_OPT_END); - } else if (ACPI_STRCMP(argv[acpi_gbl_optind], "--") == 0) { + } else if (strcmp(argv[acpi_gbl_optind], "--") == 0) { acpi_gbl_optind++; return (ACPI_OPT_END); } @@ -140,7 +140,7 @@ int acpi_getopt(int argc, char **argv, char *opts) /* Make sure that the option is legal */ if (current_char == ':' || - (opts_ptr = ACPI_STRCHR(opts, current_char)) == NULL) { + (opts_ptr = strchr(opts, current_char)) == NULL) { ACPI_OPTION_ERROR("Illegal option: -", current_char); if (argv[acpi_gbl_optind][++current_char_ptr] == '\0') { diff --git a/tools/power/acpi/man/acpidump.8 b/tools/power/acpi/man/acpidump.8 index 38f095d86b52..79e2d1d435d1 100644 --- a/tools/power/acpi/man/acpidump.8 +++ b/tools/power/acpi/man/acpidump.8 @@ -22,9 +22,6 @@ acpidump options are as follow: .B \-b Dump tables to binary files .TP -.B \-c -Dump customized tables -.TP .B \-h \-? This help message .TP @@ -48,15 +45,25 @@ Verbose mode .B \-a <Address> Get table via a physical address .TP +.B \-c <on|off> +Turning on/off customized table dumping +.TP .B \-f <BinaryFile> Get table via a binary file .TP .B \-n <Signature> Get table via a name/signature .TP -Invocation without parameters dumps all available tables +.B \-x +Do not use but dump XSDT +.TP +.B \-x \-x +Do not use or dump XSDT +.TP +.fi +Invocation without parameters dumps all available tables. .TP -Multiple mixed instances of -a, -f, and -n are supported +Multiple mixed instances of -a, -f, and -n are supported. .SH EXAMPLES diff --git a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c index 92f1fd700344..dd5008b0617a 100644 --- a/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c +++ b/tools/power/acpi/os_specific/service_layers/oslinuxtbl.c @@ -222,7 +222,7 @@ acpi_os_get_table_by_address(acpi_physical_address address, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); exit: osl_unmap_table(mapped_table); @@ -531,7 +531,7 @@ static acpi_status osl_load_rsdp(void) gbl_rsdp_address = rsdp_base + (ACPI_CAST8(mapped_table) - rsdp_address); - ACPI_MEMCPY(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); + memcpy(&gbl_rsdp, mapped_table, sizeof(struct acpi_table_rsdp)); acpi_os_unmap_memory(rsdp_address, rsdp_size); return (AE_OK); @@ -582,64 +582,67 @@ static acpi_status osl_table_initialize(void) return (AE_OK); } - /* Get RSDP from memory */ + if (!gbl_dump_customized_tables) { - status = osl_load_rsdp(); - if (ACPI_FAILURE(status)) { - return (status); - } + /* Get RSDP from memory */ + + status = osl_load_rsdp(); + if (ACPI_FAILURE(status)) { + return (status); + } - /* Get XSDT from memory */ + /* Get XSDT from memory */ - if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { - if (gbl_xsdt) { - free(gbl_xsdt); - gbl_xsdt = NULL; + if (gbl_rsdp.revision && !gbl_do_not_dump_xsdt) { + if (gbl_xsdt) { + free(gbl_xsdt); + gbl_xsdt = NULL; + } + + gbl_revision = 2; + status = osl_get_bios_table(ACPI_SIG_XSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_xsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - gbl_revision = 2; - status = osl_get_bios_table(ACPI_SIG_XSDT, 0, - ACPI_CAST_PTR(struct - acpi_table_header *, - &gbl_xsdt), &address); - if (ACPI_FAILURE(status)) { - return (status); + /* Get RSDT from memory */ + + if (gbl_rsdp.rsdt_physical_address) { + if (gbl_rsdt) { + free(gbl_rsdt); + gbl_rsdt = NULL; + } + + status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + ACPI_CAST_PTR(struct + acpi_table_header + *, &gbl_rsdt), + &address); + if (ACPI_FAILURE(status)) { + return (status); + } } - } - /* Get RSDT from memory */ + /* Get FADT from memory */ - if (gbl_rsdp.rsdt_physical_address) { - if (gbl_rsdt) { - free(gbl_rsdt); - gbl_rsdt = NULL; + if (gbl_fadt) { + free(gbl_fadt); + gbl_fadt = NULL; } - status = osl_get_bios_table(ACPI_SIG_RSDT, 0, + status = osl_get_bios_table(ACPI_SIG_FADT, 0, ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_rsdt), &address); + &gbl_fadt), + &gbl_fadt_address); if (ACPI_FAILURE(status)) { return (status); } - } - - /* Get FADT from memory */ - - if (gbl_fadt) { - free(gbl_fadt); - gbl_fadt = NULL; - } - - status = osl_get_bios_table(ACPI_SIG_FADT, 0, - ACPI_CAST_PTR(struct acpi_table_header *, - &gbl_fadt), - &gbl_fadt_address); - if (ACPI_FAILURE(status)) { - return (status); - } - - if (!gbl_dump_customized_tables) { /* Add mandatory tables to global table list first */ @@ -961,7 +964,7 @@ osl_get_bios_table(char *signature, goto exit; } - ACPI_MEMCPY(local_table, mapped_table, table_length); + memcpy(local_table, mapped_table, table_length); *address = table_address; *table = local_table; @@ -1156,7 +1159,7 @@ osl_table_name_from_file(char *filename, char *signature, u32 *instance) /* Extract instance number */ if (isdigit((int)filename[ACPI_NAME_SIZE])) { - sscanf(&filename[ACPI_NAME_SIZE], "%d", instance); + sscanf(&filename[ACPI_NAME_SIZE], "%u", instance); } else if (strlen(filename) != ACPI_NAME_SIZE) { return (AE_BAD_SIGNATURE); } else { diff --git a/tools/power/acpi/os_specific/service_layers/osunixmap.c b/tools/power/acpi/os_specific/service_layers/osunixmap.c index 3853a7350440..44ad4889d468 100644 --- a/tools/power/acpi/os_specific/service_layers/osunixmap.c +++ b/tools/power/acpi/os_specific/service_layers/osunixmap.c @@ -54,7 +54,7 @@ ACPI_MODULE_NAME("osunixmap") #ifndef O_BINARY #define O_BINARY 0 #endif -#ifdef _free_BSD +#if defined(_dragon_fly) || defined(_free_BSD) #define MMAP_FLAGS MAP_SHARED #else #define MMAP_FLAGS MAP_PRIVATE @@ -146,6 +146,6 @@ void acpi_os_unmap_memory(void *where, acpi_size length) acpi_size page_size; page_size = acpi_os_get_page_size(); - offset = (acpi_physical_address) where % page_size; + offset = ACPI_TO_INTEGER(where) % page_size; munmap((u8 *)where - offset, (length + offset)); } diff --git a/tools/power/acpi/tools/acpidump/acpidump.h b/tools/power/acpi/tools/acpidump/acpidump.h index 84bdef0136cb..eed534481434 100644 --- a/tools/power/acpi/tools/acpidump/acpidump.h +++ b/tools/power/acpi/tools/acpidump/acpidump.h @@ -66,7 +66,7 @@ EXTERN u8 INIT_GLOBAL(gbl_summary_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_verbose_mode, FALSE); EXTERN u8 INIT_GLOBAL(gbl_binary_mode, FALSE); -EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, FALSE); +EXTERN u8 INIT_GLOBAL(gbl_dump_customized_tables, TRUE); EXTERN u8 INIT_GLOBAL(gbl_do_not_dump_xsdt, FALSE); EXTERN ACPI_FILE INIT_GLOBAL(gbl_output_file, NULL); EXTERN char INIT_GLOBAL(*gbl_output_filename, NULL); diff --git a/tools/power/acpi/tools/acpidump/apdump.c b/tools/power/acpi/tools/acpidump/apdump.c index c736adf5fb55..61d0de804b70 100644 --- a/tools/power/acpi/tools/acpidump/apdump.c +++ b/tools/power/acpi/tools/acpidump/apdump.c @@ -329,7 +329,7 @@ int ap_dump_table_by_name(char *signature) acpi_status status; int table_status; - if (ACPI_STRLEN(signature) != ACPI_NAME_SIZE) { + if (strlen(signature) != ACPI_NAME_SIZE) { acpi_log_error ("Invalid table signature [%s]: must be exactly 4 characters\n", signature); @@ -338,15 +338,15 @@ int ap_dump_table_by_name(char *signature) /* Table signatures are expected to be uppercase */ - ACPI_STRCPY(local_signature, signature); + strcpy(local_signature, signature); acpi_ut_strupr(local_signature); /* To be friendly, handle tables whose signatures do not match the name */ if (ACPI_COMPARE_NAME(local_signature, "FADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_FADT); + strcpy(local_signature, ACPI_SIG_FADT); } else if (ACPI_COMPARE_NAME(local_signature, "MADT")) { - ACPI_STRCPY(local_signature, ACPI_SIG_MADT); + strcpy(local_signature, ACPI_SIG_MADT); } /* Dump all instances of this signature (to handle multiple SSDTs) */ diff --git a/tools/power/acpi/tools/acpidump/apfiles.c b/tools/power/acpi/tools/acpidump/apfiles.c index 8f2fe168228e..a37f9702b2a9 100644 --- a/tools/power/acpi/tools/acpidump/apfiles.c +++ b/tools/power/acpi/tools/acpidump/apfiles.c @@ -136,10 +136,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) } else { ACPI_MOVE_NAME(filename, table->signature); } - filename[0] = (char)ACPI_TOLOWER(filename[0]); - filename[1] = (char)ACPI_TOLOWER(filename[1]); - filename[2] = (char)ACPI_TOLOWER(filename[2]); - filename[3] = (char)ACPI_TOLOWER(filename[3]); + filename[0] = (char)tolower((int)filename[0]); + filename[1] = (char)tolower((int)filename[1]); + filename[2] = (char)tolower((int)filename[2]); + filename[3] = (char)tolower((int)filename[3]); filename[ACPI_NAME_SIZE] = 0; /* Handle multiple SSDts - create different filenames for each */ @@ -147,10 +147,10 @@ int ap_write_to_binary_file(struct acpi_table_header *table, u32 instance) if (instance > 0) { acpi_ut_snprintf(instance_str, sizeof(instance_str), "%u", instance); - ACPI_STRCAT(filename, instance_str); + strcat(filename, instance_str); } - ACPI_STRCAT(filename, ACPI_TABLE_FILE_SUFFIX); + strcat(filename, ACPI_TABLE_FILE_SUFFIX); if (gbl_verbose_mode) { acpi_log_error diff --git a/tools/power/acpi/tools/acpidump/apmain.c b/tools/power/acpi/tools/acpidump/apmain.c index d0ba6535f5af..57620f66ae6c 100644 --- a/tools/power/acpi/tools/acpidump/apmain.c +++ b/tools/power/acpi/tools/acpidump/apmain.c @@ -80,7 +80,7 @@ struct ap_dump_action action_table[AP_MAX_ACTIONS]; u32 current_action = 0; #define AP_UTILITY_NAME "ACPI Binary Table Dump Utility" -#define AP_SUPPORTED_OPTIONS "?a:bcf:hn:o:r:svxz" +#define AP_SUPPORTED_OPTIONS "?a:bc:f:hn:o:r:svxz" /****************************************************************************** * @@ -96,7 +96,6 @@ static void ap_display_usage(void) ACPI_USAGE_HEADER("acpidump [options]"); ACPI_OPTION("-b", "Dump tables to binary files"); - ACPI_OPTION("-c", "Dump customized tables"); ACPI_OPTION("-h -?", "This help message"); ACPI_OPTION("-o <File>", "Redirect output to file"); ACPI_OPTION("-r <Address>", "Dump tables from specified RSDP"); @@ -107,6 +106,7 @@ static void ap_display_usage(void) ACPI_USAGE_TEXT("\nTable Options:\n"); ACPI_OPTION("-a <Address>", "Get table via a physical address"); + ACPI_OPTION("-c <on|off>", "Turning on/off customized table dumping"); ACPI_OPTION("-f <BinaryFile>", "Get table via a binary file"); ACPI_OPTION("-n <Signature>", "Get table via a name/signature"); ACPI_OPTION("-x", "Do not use but dump XSDT"); @@ -181,7 +181,16 @@ static int ap_do_options(int argc, char **argv) case 'c': /* Dump customized tables */ - gbl_dump_customized_tables = TRUE; + if (!strcmp(acpi_gbl_optarg, "on")) { + gbl_dump_customized_tables = TRUE; + } else if (!strcmp(acpi_gbl_optarg, "off")) { + gbl_dump_customized_tables = FALSE; + } else { + acpi_log_error + ("%s: Cannot handle this switch, please use on|off\n", + acpi_gbl_optarg); + return (-1); + } continue; case 'h': diff --git a/tools/power/cpupower/utils/helpers/pci.c b/tools/power/cpupower/utils/helpers/pci.c index 9690798e6446..8b278983cfc5 100644 --- a/tools/power/cpupower/utils/helpers/pci.c +++ b/tools/power/cpupower/utils/helpers/pci.c @@ -25,14 +25,21 @@ struct pci_dev *pci_acc_init(struct pci_access **pacc, int domain, int bus, int slot, int func, int vendor, int dev) { - struct pci_filter filter_nb_link = { domain, bus, slot, func, - vendor, dev }; + struct pci_filter filter_nb_link; struct pci_dev *device; *pacc = pci_alloc(); if (*pacc == NULL) return NULL; + pci_filter_init(*pacc, &filter_nb_link); + filter_nb_link.domain = domain; + filter_nb_link.bus = bus; + filter_nb_link.slot = slot; + filter_nb_link.func = func; + filter_nb_link.vendor = vendor; + filter_nb_link.device = dev; + pci_init(*pacc); pci_scan_bus(*pacc); diff --git a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c index 90a8c4f071e7..c83f1606970b 100644 --- a/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c +++ b/tools/power/cpupower/utils/idle_monitor/mperf_monitor.c @@ -135,7 +135,7 @@ static int mperf_get_count_percent(unsigned int id, double *percent, dprint("%s: TSC Ref - mperf_diff: %llu, tsc_diff: %llu\n", mperf_cstates[id].name, mperf_diff, tsc_diff); } else if (max_freq_mode == MAX_FREQ_SYSFS) { - timediff = timespec_diff_us(time_start, time_end); + timediff = max_frequency * timespec_diff_us(time_start, time_end); *percent = 100.0 * mperf_diff / timediff; dprint("%s: MAXFREQ - mperf_diff: %llu, time_diff: %llu\n", mperf_cstates[id].name, mperf_diff, timediff); @@ -176,7 +176,7 @@ static int mperf_get_count_freq(unsigned int id, unsigned long long *count, dprint("%s: Average freq based on %s maximum frequency:\n", mperf_cstates[id].name, (max_freq_mode == MAX_FREQ_TSC_REF) ? "TSC calculated" : "sysfs read"); - dprint("%max_frequency: %lu", max_frequency); + dprint("max_frequency: %lu\n", max_frequency); dprint("aperf_diff: %llu\n", aperf_diff); dprint("mperf_diff: %llu\n", mperf_diff); dprint("avg freq: %llu\n", *count); @@ -279,6 +279,7 @@ use_sysfs: return -1; } max_freq_mode = MAX_FREQ_SYSFS; + max_frequency /= 1000; /* Default automatically to MHz value */ return 0; } diff --git a/tools/power/x86/turbostat/Makefile b/tools/power/x86/turbostat/Makefile index d1b3a361e526..e367b1a85d70 100644 --- a/tools/power/x86/turbostat/Makefile +++ b/tools/power/x86/turbostat/Makefile @@ -1,11 +1,15 @@ CC = $(CROSS_COMPILE)gcc -BUILD_OUTPUT := $(PWD) +BUILD_OUTPUT := $(CURDIR) PREFIX := /usr DESTDIR := +ifeq ("$(origin O)", "command line") + BUILD_OUTPUT := $(O) +endif + turbostat : turbostat.c CFLAGS += -Wall -CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/uapi/asm/msr-index.h"' +CFLAGS += -DMSRHEADER='"../../../../arch/x86/include/asm/msr-index.h"' %: %.c @mkdir -p $(BUILD_OUTPUT) diff --git a/tools/power/x86/turbostat/turbostat.8 b/tools/power/x86/turbostat/turbostat.8 index feea7ad9500b..05b8fc38dc8b 100644 --- a/tools/power/x86/turbostat/turbostat.8 +++ b/tools/power/x86/turbostat/turbostat.8 @@ -20,9 +20,11 @@ upon its completion. The second method is to omit the command, and turbostat displays statistics every 5 seconds. The 5-second interval can be changed using the --interval option. - +.PP Some information is not available on older processors. .SS Options +Options can be specified with a single or double '-', and only as much of the option +name as necessary to disambiguate it from others is necessary. Note that options are case-sensitive. \fB--Counter MSR#\fP shows the delta of the specified 64-bit MSR counter. .PP \fB--counter MSR#\fP shows the delta of the specified 32-bit MSR counter. @@ -55,16 +57,20 @@ more than once may also enable internal turbostat debug information. The \fBcommand\fP parameter forks \fBcommand\fP, and upon its exit, displays the statistics gathered since it was forked. .PP -.SH FIELD DESCRIPTIONS +.SH DEFAULT FIELD DESCRIPTIONS .nf -\fBPackage\fP processor package number. -\fBCore\fP processor core number. -\fBCPU\fP Linux CPU (logical processor) number. -Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology. +\fBCPU\fP Linux CPU (logical processor) number. Yes, it is okay that on many systems the CPUs are not listed in numerical order -- for efficiency reasons, turbostat runs in topology order, so HT siblings appear together. \fBAVG_MHz\fP number of cycles executed divided by time elapsed. \fB%Busy\fP percent of the interval that the CPU retired instructions, aka. % of time in "C0" state. \fBBzy_MHz\fP average clock rate while the CPU was busy (in "c0" state). \fBTSC_MHz\fP average MHz that the TSC ran during the entire interval. +.fi +.PP +.SH DEBUG FIELD DESCRIPTIONS +.nf +\fBPackage\fP processor package number. +\fBCore\fP processor core number. +Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading Technology (HT). \fBCPU%c1, CPU%c3, CPU%c6, CPU%c7\fP show the percentage residency in hardware core idle states. \fBCoreTmp\fP Degrees Celsius reported by the per-core Digital Thermal Sensor. \fBPkgTtmp\fP Degrees Celsius reported by the per-package Package Thermal Monitor. @@ -81,63 +87,76 @@ Note that multiple CPUs per core indicate support for Intel(R) Hyper-Threading T Without any parameters, turbostat displays statistics ever 5 seconds. (override interval with "-i sec" option, or specify a command for turbostat to fork). +.nf +[root@hsw]# ./turbostat + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 488 12.51 3898 3498 + 0 0 0.01 3885 3498 + 4 3897 99.99 3898 3498 + 1 0 0.00 3861 3498 + 5 0 0.00 3882 3498 + 2 1 0.02 3894 3498 + 6 2 0.06 3898 3498 + 3 0 0.00 3849 3498 + 7 0 0.00 3877 3498 + +.fi +.SH DEBUG EXAMPLE +The "--debug" option prints additional system information before measurements: The first row of statistics is a summary for the entire system. For residency % columns, the summary is a weighted average. For Temperature columns, the summary is the column maximum. For Watts columns, the summary is a system total. Subsequent rows show per-CPU statistics. - -.nf -[root@ivy]# ./turbostat - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 6 0.36 1596 3492 0 0.59 0.01 99.04 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 0 9 0.58 1596 3492 0 0.28 0.01 99.13 0.00 23 24 23.82 0.01 72.47 0.00 6.40 1.01 0.00 - 0 4 1 0.07 1596 3492 0 0.79 - 1 1 10 0.65 1596 3492 0 0.59 0.00 98.76 0.00 23 - 1 5 5 0.28 1596 3492 0 0.95 - 2 2 10 0.66 1596 3492 0 0.41 0.01 98.92 0.00 23 - 2 6 2 0.10 1597 3492 0 0.97 - 3 3 3 0.20 1596 3492 0 0.44 0.00 99.37 0.00 23 - 3 7 5 0.31 1596 3492 0 0.33 -.fi -.SH DEBUG EXAMPLE -The "--debug" option prints additional system information before measurements: - .nf -turbostat version 4.0 10-Feb, 2015 - Len Brown <lenb@kernel.org> -CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3a:9 (6:58:9) +turbostat version 4.1 10-Feb, 2015 - Len Brown <lenb@kernel.org> +CPUID(0): GenuineIntel 13 CPUID levels; family:model:stepping 0x6:3c:3 (6:60:3) CPUID(6): APERF, DTS, PTM, EPB -RAPL: 851 sec. Joule Counter Range, at 77 Watts -cpu0: MSR_NHM_PLATFORM_INFO: 0x81010f0012300 -16 * 100 = 1600 MHz max efficiency +RAPL: 3121 sec. Joule Counter Range, at 84 Watts +cpu0: MSR_NHM_PLATFORM_INFO: 0x80838f3012300 +8 * 100 = 800 MHz max efficiency 35 * 100 = 3500 MHz TSC frequency -cpu0: MSR_IA32_POWER_CTL: 0x0014005d (C1E auto-promotion: DISabled) -cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e008402 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, locked: pkg-cstate-limit=2: pc6n) +cpu0: MSR_IA32_POWER_CTL: 0x0004005d (C1E auto-promotion: DISabled) +cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x1e000400 (UNdemote-C3, UNdemote-C1, demote-C3, demote-C1, UNlocked: pkg-cstate-limit=0: pc0) cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x25262727 37 * 100 = 3700 MHz max turbo 4 active cores 38 * 100 = 3800 MHz max turbo 3 active cores 39 * 100 = 3900 MHz max turbo 2 active cores 39 * 100 = 3900 MHz max turbo 1 active cores cpu0: MSR_IA32_ENERGY_PERF_BIAS: 0x00000006 (balanced) -cpu0: MSR_RAPL_POWER_UNIT: 0x000a1003 (0.125000 Watts, 0.000015 Joules, 0.000977 sec.) -cpu0: MSR_PKG_POWER_INFO: 0x01e00268 (77 W TDP, RAPL 60 - 0 W, 0.000000 sec.) -cpu0: MSR_PKG_POWER_LIMIT: 0x30000148268 (UNlocked) -cpu0: PKG Limit #1: ENabled (77.000000 Watts, 1.000000 sec, clamp DISabled) -cpu0: PKG Limit #2: DISabled (96.000000 Watts, 0.000977* sec, clamp DISabled) +cpu0: MSR_CORE_PERF_LIMIT_REASONS, 0x31200000 (Active: ) (Logged: Auto-HWP, Amps, MultiCoreTurbo, Transitions, ) +cpu0: MSR_GFX_PERF_LIMIT_REASONS, 0x00000000 (Active: ) (Logged: ) +cpu0: MSR_RING_PERF_LIMIT_REASONS, 0x0d000000 (Active: ) (Logged: Amps, PkgPwrL1, PkgPwrL2, ) +cpu0: MSR_RAPL_POWER_UNIT: 0x000a0e03 (0.125000 Watts, 0.000061 Joules, 0.000977 sec.) +cpu0: MSR_PKG_POWER_INFO: 0x000002a0 (84 W TDP, RAPL 0 - 0 W, 0.000000 sec.) +cpu0: MSR_PKG_POWER_LIMIT: 0x428348001a82a0 (UNlocked) +cpu0: PKG Limit #1: ENabled (84.000000 Watts, 8.000000 sec, clamp DISabled) +cpu0: PKG Limit #2: ENabled (105.000000 Watts, 0.002441* sec, clamp DISabled) cpu0: MSR_PP0_POLICY: 0 cpu0: MSR_PP0_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: Cores Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) cpu0: MSR_PP1_POLICY: 0 cpu0: MSR_PP1_POWER_LIMIT: 0x00000000 (UNlocked) cpu0: GFX Limit: DISabled (0.000000 Watts, 0.000977 sec, clamp DISabled) -cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00691400 (105 C) -cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x884e0000 (27 C) -cpu0: MSR_IA32_THERM_STATUS: 0x88580000 (17 C +/- 1) -cpu1: MSR_IA32_THERM_STATUS: 0x885a0000 (15 C +/- 1) -cpu2: MSR_IA32_THERM_STATUS: 0x88570000 (18 C +/- 1) -cpu3: MSR_IA32_THERM_STATUS: 0x884e0000 (27 C +/- 1) - ... +cpu0: MSR_IA32_TEMPERATURE_TARGET: 0x00641400 (100 C) +cpu0: MSR_IA32_PACKAGE_THERM_STATUS: 0x88340800 (48 C) +cpu0: MSR_IA32_THERM_STATUS: 0x88340000 (48 C +/- 1) +cpu1: MSR_IA32_THERM_STATUS: 0x88440000 (32 C +/- 1) +cpu2: MSR_IA32_THERM_STATUS: 0x88450000 (31 C +/- 1) +cpu3: MSR_IA32_THERM_STATUS: 0x88490000 (27 C +/- 1) + Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp PkgWatt CorWatt GFXWatt + - - 493 12.64 3898 3498 0 12.64 0.00 0.00 74.72 47 47 21.62 13.74 0.00 + 0 0 4 0.11 3894 3498 0 99.89 0.00 0.00 0.00 47 47 21.62 13.74 0.00 + 0 4 3897 99.98 3898 3498 0 0.02 + 1 1 7 0.17 3887 3498 0 0.04 0.00 0.00 99.79 32 + 1 5 0 0.00 3885 3498 0 0.21 + 2 2 29 0.76 3895 3498 0 0.10 0.01 0.01 99.13 32 + 2 6 2 0.06 3896 3498 0 0.80 + 3 3 1 0.02 3832 3498 0 0.03 0.00 0.00 99.95 28 + 3 7 0 0.00 3879 3498 0 0.04 +^C + .fi The \fBmax efficiency\fP frequency, a.k.a. Low Frequency Mode, is the frequency available at the minimum package voltage. The \fBTSC frequency\fP is the base @@ -147,6 +166,9 @@ should be sustainable on all CPUs indefinitely, given nominal power and cooling. The remaining rows show what maximum turbo frequency is possible depending on the number of idle cores. Note that not all information is available on all processors. +.PP +The --debug option adds additional columns to the measurement ouput, including CPU idle power-state residency processor temperature sensor readinds. +See the field definitions above. .SH FORK EXAMPLE If turbostat is invoked with a command, it will fork that command and output the statistics gathered when the command exits. @@ -154,27 +176,23 @@ eg. Here a cycle soaker is run on 1 CPU (see %c0) for a few seconds until ^C while the other CPUs are mostly idle: .nf -root@ivy: turbostat cat /dev/zero > /dev/null +root@hsw: turbostat cat /dev/zero > /dev/null ^C - Core CPU Avg_MHz %Busy Bzy_MHz TSC_MHz SMI CPU%c1 CPU%c3 CPU%c6 CPU%c7 CoreTmp PkgTmp Pkg%pc2 Pkg%pc3 Pkg%pc6 Pkg%pc7 PkgWatt CorWatt GFXWatt - - - 496 12.75 3886 3492 0 13.16 0.04 74.04 0.00 36 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 0 22 0.57 3830 3492 0 0.83 0.02 98.59 0.00 27 36 0.00 0.00 0.00 0.00 23.15 17.65 0.00 - 0 4 9 0.24 3829 3492 0 1.15 - 1 1 4 0.09 3783 3492 0 99.91 0.00 0.00 0.00 36 - 1 5 3880 99.82 3888 3492 0 0.18 - 2 2 17 0.44 3813 3492 0 0.77 0.04 98.75 0.00 28 - 2 6 12 0.32 3823 3492 0 0.89 - 3 3 16 0.43 3844 3492 0 0.63 0.11 98.84 0.00 30 - 3 7 4 0.11 3827 3492 0 0.94 -30.372243 sec + CPU Avg_MHz %Busy Bzy_MHz TSC_MHz + - 482 12.51 3854 3498 + 0 0 0.01 1960 3498 + 4 0 0.00 2128 3498 + 1 0 0.00 3003 3498 + 5 3854 99.98 3855 3498 + 2 0 0.01 3504 3498 + 6 3 0.08 3884 3498 + 3 0 0.00 2553 3498 + 7 0 0.00 2126 3498 +10.783983 sec .fi -Above the cycle soaker drives cpu5 up its 3.8 GHz turbo limit -while the other processors are generally in various states of idle. - -Note that cpu1 and cpu5 are HT siblings within core1. -As cpu5 is very busy, it prevents its sibling, cpu1, -from entering a c-state deeper than c1. +Above the cycle soaker drives cpu5 up its 3.9 GHz turbo limit. +The first row shows the average MHz and %Busy across all the processors in the system. Note that the Avg_MHz column reflects the total number of cycles executed divided by the measurement interval. If the %Busy column is 100%, diff --git a/tools/power/x86/turbostat/turbostat.c b/tools/power/x86/turbostat/turbostat.c index 2d089cac8580..323b65edfc97 100644 --- a/tools/power/x86/turbostat/turbostat.c +++ b/tools/power/x86/turbostat/turbostat.c @@ -52,11 +52,13 @@ unsigned int skip_c0; unsigned int skip_c1; unsigned int do_nhm_cstates; unsigned int do_snb_cstates; +unsigned int do_knl_cstates; unsigned int do_pc2; unsigned int do_pc3; unsigned int do_pc6; unsigned int do_pc7; unsigned int do_c8_c9_c10; +unsigned int do_skl_residency; unsigned int do_slm_cstates; unsigned int use_c1_residency_msr; unsigned int has_aperf; @@ -65,8 +67,6 @@ unsigned int units = 1000000; /* MHz etc */ unsigned int genuine_intel; unsigned int has_invariant_tsc; unsigned int do_nhm_platform_info; -unsigned int do_nhm_turbo_ratio_limit; -unsigned int do_ivt_turbo_ratio_limit; unsigned int extra_msr_offset32; unsigned int extra_msr_offset64; unsigned int extra_delta_offset32; @@ -84,11 +84,15 @@ unsigned int do_dts; unsigned int do_ptm; unsigned int tcc_activation_temp; unsigned int tcc_activation_temp_override; -double rapl_power_units, rapl_energy_units, rapl_time_units; +double rapl_power_units, rapl_time_units; +double rapl_dram_energy_units, rapl_energy_units; double rapl_joule_counter_range; unsigned int do_core_perf_limit_reasons; unsigned int do_gfx_perf_limit_reasons; unsigned int do_ring_perf_limit_reasons; +unsigned int crystal_hz; +unsigned long long tsc_hz; +int base_cpu; #define RAPL_PKG (1 << 0) /* 0x610 MSR_PKG_POWER_LIMIT */ @@ -101,18 +105,18 @@ unsigned int do_ring_perf_limit_reasons; #define RAPL_DRAM (1 << 3) /* 0x618 MSR_DRAM_POWER_LIMIT */ /* 0x619 MSR_DRAM_ENERGY_STATUS */ - /* 0x61c MSR_DRAM_POWER_INFO */ #define RAPL_DRAM_PERF_STATUS (1 << 4) /* 0x61b MSR_DRAM_PERF_STATUS */ +#define RAPL_DRAM_POWER_INFO (1 << 5) + /* 0x61c MSR_DRAM_POWER_INFO */ -#define RAPL_CORES (1 << 5) +#define RAPL_CORES (1 << 6) /* 0x638 MSR_PP0_POWER_LIMIT */ /* 0x639 MSR_PP0_ENERGY_STATUS */ -#define RAPL_CORE_POLICY (1 << 6) +#define RAPL_CORE_POLICY (1 << 7) /* 0x63a MSR_PP0_POLICY */ - -#define RAPL_GFX (1 << 7) +#define RAPL_GFX (1 << 8) /* 0x640 MSR_PP1_POWER_LIMIT */ /* 0x641 MSR_PP1_ENERGY_STATUS */ /* 0x642 MSR_PP1_POLICY */ @@ -159,6 +163,10 @@ struct pkg_data { unsigned long long pc8; unsigned long long pc9; unsigned long long pc10; + unsigned long long pkg_wtd_core_c0; + unsigned long long pkg_any_core_c0; + unsigned long long pkg_any_gfxe_c0; + unsigned long long pkg_both_core_gfxe_c0; unsigned int package_id; unsigned int energy_pkg; /* MSR_PKG_ENERGY_STATUS */ unsigned int energy_dram; /* MSR_DRAM_ENERGY_STATUS */ @@ -292,8 +300,7 @@ void print_header(void) if (has_aperf) outp += sprintf(outp, " Bzy_MHz"); outp += sprintf(outp, " TSC_MHz"); - if (do_smi) - outp += sprintf(outp, " SMI"); + if (extra_delta_offset32) outp += sprintf(outp, " count 0x%03X", extra_delta_offset32); if (extra_delta_offset64) @@ -302,9 +309,16 @@ void print_header(void) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset32); if (extra_msr_offset64) outp += sprintf(outp, " MSR 0x%03X", extra_msr_offset64); + + if (!debug) + goto done; + + if (do_smi) + outp += sprintf(outp, " SMI"); + if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c1"); - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, " CPU%%c3"); if (do_nhm_cstates) outp += sprintf(outp, " CPU%%c6"); @@ -316,6 +330,13 @@ void print_header(void) if (do_ptm) outp += sprintf(outp, " PkgTmp"); + if (do_skl_residency) { + outp += sprintf(outp, " Totl%%C0"); + outp += sprintf(outp, " Any%%C0"); + outp += sprintf(outp, " GFX%%C0"); + outp += sprintf(outp, " CPUGFX%%"); + } + if (do_pc2) outp += sprintf(outp, " Pkg%%pc2"); if (do_pc3) @@ -359,6 +380,7 @@ void print_header(void) outp += sprintf(outp, " time"); } + done: outp += sprintf(outp, "\n"); } @@ -396,6 +418,12 @@ int dump_counters(struct thread_data *t, struct core_data *c, if (p) { outp += sprintf(outp, "package: %d\n", p->package_id); + + outp += sprintf(outp, "Weighted cores: %016llX\n", p->pkg_wtd_core_c0); + outp += sprintf(outp, "Any cores: %016llX\n", p->pkg_any_core_c0); + outp += sprintf(outp, "Any GFX: %016llX\n", p->pkg_any_gfxe_c0); + outp += sprintf(outp, "CPU + GFX: %016llX\n", p->pkg_both_core_gfxe_c0); + outp += sprintf(outp, "pc2: %016llX\n", p->pc2); if (do_pc3) outp += sprintf(outp, "pc3: %016llX\n", p->pc3); @@ -487,10 +515,6 @@ int format_counters(struct thread_data *t, struct core_data *c, /* TSC_MHz */ outp += sprintf(outp, "%8.0f", 1.0 * t->tsc/units/interval_float); - /* SMI */ - if (do_smi) - outp += sprintf(outp, "%8d", t->smi_count); - /* delta */ if (extra_delta_offset32) outp += sprintf(outp, " %11llu", t->extra_delta32); @@ -506,6 +530,13 @@ int format_counters(struct thread_data *t, struct core_data *c, if (extra_msr_offset64) outp += sprintf(outp, " 0x%016llx", t->extra_msr64); + if (!debug) + goto done; + + /* SMI */ + if (do_smi) + outp += sprintf(outp, "%8d", t->smi_count); + if (do_nhm_cstates) { if (!skip_c1) outp += sprintf(outp, "%8.2f", 100.0 * t->c1/t->tsc); @@ -517,7 +548,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) goto done; - if (do_nhm_cstates && !do_slm_cstates) + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c3/t->tsc); if (do_nhm_cstates) outp += sprintf(outp, "%8.2f", 100.0 * c->c6/t->tsc); @@ -531,9 +562,18 @@ int format_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) goto done; + /* PkgTmp */ if (do_ptm) outp += sprintf(outp, "%8d", p->pkg_temp_c); + /* Totl%C0, Any%C0 GFX%C0 CPUGFX% */ + if (do_skl_residency) { + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_wtd_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_core_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_any_gfxe_c0/t->tsc); + outp += sprintf(outp, "%8.2f", 100.0 * p->pkg_both_core_gfxe_c0/t->tsc); + } + if (do_pc2) outp += sprintf(outp, "%8.2f", 100.0 * p->pc2/t->tsc); if (do_pc3) @@ -565,7 +605,7 @@ int format_counters(struct thread_data *t, struct core_data *c, if (do_rapl & RAPL_GFX) outp += sprintf(outp, fmt8, p->energy_gfx * rapl_energy_units / interval_float); if (do_rapl & RAPL_DRAM) - outp += sprintf(outp, fmt8, p->energy_dram * rapl_energy_units / interval_float); + outp += sprintf(outp, fmt8, p->energy_dram * rapl_dram_energy_units / interval_float); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) @@ -582,7 +622,7 @@ int format_counters(struct thread_data *t, struct core_data *c, p->energy_gfx * rapl_energy_units); if (do_rapl & RAPL_DRAM) outp += sprintf(outp, fmt8, - p->energy_dram * rapl_energy_units); + p->energy_dram * rapl_dram_energy_units); if (do_rapl & RAPL_PKG_PERF_STATUS) outp += sprintf(outp, fmt8, 100.0 * p->rapl_pkg_perf_status * rapl_time_units / interval_float); if (do_rapl & RAPL_DRAM_PERF_STATUS) @@ -636,6 +676,13 @@ void format_all_counters(struct thread_data *t, struct core_data *c, struct pkg_ void delta_package(struct pkg_data *new, struct pkg_data *old) { + + if (do_skl_residency) { + old->pkg_wtd_core_c0 = new->pkg_wtd_core_c0 - old->pkg_wtd_core_c0; + old->pkg_any_core_c0 = new->pkg_any_core_c0 - old->pkg_any_core_c0; + old->pkg_any_gfxe_c0 = new->pkg_any_gfxe_c0 - old->pkg_any_gfxe_c0; + old->pkg_both_core_gfxe_c0 = new->pkg_both_core_gfxe_c0 - old->pkg_both_core_gfxe_c0; + } old->pc2 = new->pc2 - old->pc2; if (do_pc3) old->pc3 = new->pc3 - old->pc3; @@ -782,6 +829,11 @@ void clear_counters(struct thread_data *t, struct core_data *c, struct pkg_data c->c7 = 0; c->core_temp_c = 0; + p->pkg_wtd_core_c0 = 0; + p->pkg_any_core_c0 = 0; + p->pkg_any_gfxe_c0 = 0; + p->pkg_both_core_gfxe_c0 = 0; + p->pc2 = 0; if (do_pc3) p->pc3 = 0; @@ -826,6 +878,13 @@ int sum_counters(struct thread_data *t, struct core_data *c, if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 += p->pkg_wtd_core_c0; + average.packages.pkg_any_core_c0 += p->pkg_any_core_c0; + average.packages.pkg_any_gfxe_c0 += p->pkg_any_gfxe_c0; + average.packages.pkg_both_core_gfxe_c0 += p->pkg_both_core_gfxe_c0; + } + average.packages.pc2 += p->pc2; if (do_pc3) average.packages.pc3 += p->pc3; @@ -873,6 +932,13 @@ void compute_average(struct thread_data *t, struct core_data *c, average.cores.c6 /= topo.num_cores; average.cores.c7 /= topo.num_cores; + if (do_skl_residency) { + average.packages.pkg_wtd_core_c0 /= topo.num_packages; + average.packages.pkg_any_core_c0 /= topo.num_packages; + average.packages.pkg_any_gfxe_c0 /= topo.num_packages; + average.packages.pkg_both_core_gfxe_c0 /= topo.num_packages; + } + average.packages.pc2 /= topo.num_packages; if (do_pc3) average.packages.pc3 /= topo.num_packages; @@ -954,14 +1020,17 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_THREAD_IN_CORE)) return 0; - if (do_nhm_cstates && !do_slm_cstates) { + if (do_nhm_cstates && !do_slm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C3_RESIDENCY, &c->c3)) return -6; } - if (do_nhm_cstates) { + if (do_nhm_cstates && !do_knl_cstates) { if (get_msr(cpu, MSR_CORE_C6_RESIDENCY, &c->c6)) return -7; + } else if (do_knl_cstates) { + if (get_msr(cpu, MSR_KNL_CORE_C6_RESIDENCY, &c->c6)) + return -7; } if (do_snb_cstates) @@ -979,6 +1048,16 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (!(t->flags & CPU_IS_FIRST_CORE_IN_PACKAGE)) return 0; + if (do_skl_residency) { + if (get_msr(cpu, MSR_PKG_WEIGHTED_CORE_C0_RES, &p->pkg_wtd_core_c0)) + return -10; + if (get_msr(cpu, MSR_PKG_ANY_CORE_C0_RES, &p->pkg_any_core_c0)) + return -11; + if (get_msr(cpu, MSR_PKG_ANY_GFXE_C0_RES, &p->pkg_any_gfxe_c0)) + return -12; + if (get_msr(cpu, MSR_PKG_BOTH_CORE_GFXE_C0_RES, &p->pkg_both_core_gfxe_c0)) + return -13; + } if (do_pc3) if (get_msr(cpu, MSR_PKG_C3_RESIDENCY, &p->pc3)) return -9; @@ -1055,49 +1134,77 @@ int get_counters(struct thread_data *t, struct core_data *c, struct pkg_data *p) #define PCL_6R 9 /* PC6 Retention */ #define PCL__7 10 /* PC7 */ #define PCL_7S 11 /* PC7 Shrink */ -#define PCLUNL 12 /* Unlimited */ +#define PCL__8 12 /* PC8 */ +#define PCL__9 13 /* PC9 */ +#define PCLUNL 14 /* Unlimited */ int pkg_cstate_limit = PCLUKN; char *pkg_cstate_limit_strings[] = { "reserved", "unknown", "pc0", "pc1", "pc2", - "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "unlimited"}; + "pc3", "pc4", "pc6", "pc6n", "pc6r", "pc7", "pc7s", "pc8", "pc9", "unlimited"}; -int nhm_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL}; -int snb_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL}; -int hsw_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCLRSV, PCLUNL}; -int slv_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7}; -int amt_pkg_cstate_limits[8] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7}; -int phi_pkg_cstate_limits[8] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL}; +int nhm_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__3, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int snb_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCL__7, PCL_7S, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int hsw_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL__3, PCL__6, PCL__7, PCL_7S, PCL__8, PCL__9, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int slv_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCLRSV, PCLRSV, PCL__4, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int amt_pkg_cstate_limits[16] = {PCL__0, PCL__1, PCL__2, PCLRSV, PCLRSV, PCLRSV, PCL__6, PCL__7, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; +int phi_pkg_cstate_limits[16] = {PCL__0, PCL__2, PCL_6N, PCL_6R, PCLRSV, PCLRSV, PCLRSV, PCLUNL, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV, PCLRSV}; -void print_verbose_header(void) +static void +dump_nhm_platform_info(void) { unsigned long long msr; unsigned int ratio; - if (!do_nhm_platform_info) - return; - - get_msr(0, MSR_NHM_PLATFORM_INFO, &msr); + get_msr(base_cpu, MSR_NHM_PLATFORM_INFO, &msr); fprintf(stderr, "cpu0: MSR_NHM_PLATFORM_INFO: 0x%08llx\n", msr); ratio = (msr >> 40) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz max efficiency frequency\n", ratio, bclk, ratio * bclk); ratio = (msr >> 8) & 0xFF; - fprintf(stderr, "%d * %.0f = %.0f MHz TSC frequency\n", + fprintf(stderr, "%d * %.0f = %.0f MHz base frequency\n", ratio, bclk, ratio * bclk); - get_msr(0, MSR_IA32_POWER_CTL, &msr); + get_msr(base_cpu, MSR_IA32_POWER_CTL, &msr); fprintf(stderr, "cpu0: MSR_IA32_POWER_CTL: 0x%08llx (C1E auto-promotion: %sabled)\n", msr, msr & 0x2 ? "EN" : "DIS"); - if (!do_ivt_turbo_ratio_limit) - goto print_nhm_turbo_ratio_limits; + return; +} + +static void +dump_hsw_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; + + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT2, &msr); + + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT2: 0x%08llx\n", msr); + + ratio = (msr >> 8) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 18 active cores\n", + ratio, bclk, ratio * bclk); + + ratio = (msr >> 0) & 0xFF; + if (ratio) + fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 17 active cores\n", + ratio, bclk, ratio * bclk); + return; +} + +static void +dump_ivt_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_IVT_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT1, &msr); - fprintf(stderr, "cpu0: MSR_IVT_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT1: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1138,30 +1245,18 @@ void print_verbose_header(void) if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 9 active cores\n", ratio, bclk, ratio * bclk); + return; +} -print_nhm_turbo_ratio_limits: - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - -#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) -#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) - - fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); - - fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", - (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", - (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", - (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", - (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", - (msr & (1 << 15)) ? "" : "UN", - (unsigned int)msr & 7, - pkg_cstate_limit_strings[pkg_cstate_limit]); - - if (!do_nhm_turbo_ratio_limit) - return; +static void +dump_nhm_turbo_ratio_limits(void) +{ + unsigned long long msr; + unsigned int ratio; - get_msr(0, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + get_msr(base_cpu, MSR_TURBO_RATIO_LIMIT, &msr); - fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); + fprintf(stderr, "cpu0: MSR_TURBO_RATIO_LIMIT: 0x%08llx\n", msr); ratio = (msr >> 56) & 0xFF; if (ratio) @@ -1202,7 +1297,91 @@ print_nhm_turbo_ratio_limits: if (ratio) fprintf(stderr, "%d * %.0f = %.0f MHz max turbo 1 active cores\n", ratio, bclk, ratio * bclk); + return; +} + +static void +dump_knl_turbo_ratio_limits(void) +{ + int cores; + unsigned int ratio; + unsigned long long msr; + int delta_cores; + int delta_ratio; + int i; + + get_msr(base_cpu, MSR_NHM_TURBO_RATIO_LIMIT, &msr); + + fprintf(stderr, "cpu0: MSR_NHM_TURBO_RATIO_LIMIT: 0x%08llx\n", + msr); + + /** + * Turbo encoding in KNL is as follows: + * [7:0] -- Base value of number of active cores of bucket 1. + * [15:8] -- Base value of freq ratio of bucket 1. + * [20:16] -- +ve delta of number of active cores of bucket 2. + * i.e. active cores of bucket 2 = + * active cores of bucket 1 + delta + * [23:21] -- Negative delta of freq ratio of bucket 2. + * i.e. freq ratio of bucket 2 = + * freq ratio of bucket 1 - delta + * [28:24]-- +ve delta of number of active cores of bucket 3. + * [31:29]-- -ve delta of freq ratio of bucket 3. + * [36:32]-- +ve delta of number of active cores of bucket 4. + * [39:37]-- -ve delta of freq ratio of bucket 4. + * [44:40]-- +ve delta of number of active cores of bucket 5. + * [47:45]-- -ve delta of freq ratio of bucket 5. + * [52:48]-- +ve delta of number of active cores of bucket 6. + * [55:53]-- -ve delta of freq ratio of bucket 6. + * [60:56]-- +ve delta of number of active cores of bucket 7. + * [63:61]-- -ve delta of freq ratio of bucket 7. + */ + cores = msr & 0xFF; + ratio = (msr >> 8) && 0xFF; + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + + for (i = 16; i < 64; i = i + 8) { + delta_cores = (msr >> i) & 0x1F; + delta_ratio = (msr >> (i + 5)) && 0x7; + if (!delta_cores || !delta_ratio) + return; + cores = cores + delta_cores; + ratio = ratio - delta_ratio; + + /** -ve ratios will make successive ratio calculations + * negative. Hence return instead of carrying on. + */ + if (ratio > 0) + fprintf(stderr, + "%d * %.0f = %.0f MHz max turbo %d active cores\n", + ratio, bclk, ratio * bclk, cores); + } +} + +static void +dump_nhm_cst_cfg(void) +{ + unsigned long long msr; + + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + +#define SNB_C1_AUTO_UNDEMOTE (1UL << 27) +#define SNB_C3_AUTO_UNDEMOTE (1UL << 28) + + fprintf(stderr, "cpu0: MSR_NHM_SNB_PKG_CST_CFG_CTL: 0x%08llx", msr); + fprintf(stderr, " (%s%s%s%s%slocked: pkg-cstate-limit=%d: %s)\n", + (msr & SNB_C3_AUTO_UNDEMOTE) ? "UNdemote-C3, " : "", + (msr & SNB_C1_AUTO_UNDEMOTE) ? "UNdemote-C1, " : "", + (msr & NHM_C3_AUTO_DEMOTE) ? "demote-C3, " : "", + (msr & NHM_C1_AUTO_DEMOTE) ? "demote-C1, " : "", + (msr & (1 << 15)) ? "" : "UN", + (unsigned int)msr & 7, + pkg_cstate_limit_strings[pkg_cstate_limit]); + return; } void free_all_buffers(void) @@ -1268,12 +1447,41 @@ int parse_int_file(const char *fmt, ...) } /* - * cpu_is_first_sibling_in_core(cpu) - * return 1 if given CPU is 1st HT sibling in the core + * get_cpu_position_in_core(cpu) + * return the position of the CPU among its HT siblings in the core + * return -1 if the sibling is not in list */ -int cpu_is_first_sibling_in_core(int cpu) +int get_cpu_position_in_core(int cpu) { - return cpu == parse_int_file("/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); + char path[64]; + FILE *filep; + int this_cpu; + char character; + int i; + + sprintf(path, + "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", + cpu); + filep = fopen(path, "r"); + if (filep == NULL) { + perror(path); + exit(1); + } + + for (i = 0; i < topo.num_threads_per_core; i++) { + fscanf(filep, "%d", &this_cpu); + if (this_cpu == cpu) { + fclose(filep); + return i; + } + + /* Account for no separator after last thread*/ + if (i != (topo.num_threads_per_core - 1)) + fscanf(filep, "%c", &character); + } + + fclose(filep); + return -1; } /* @@ -1299,25 +1507,31 @@ int get_num_ht_siblings(int cpu) { char path[80]; FILE *filep; - int sib1, sib2; - int matches; + int sib1; + int matches = 0; char character; + char str[100]; + char *ch; sprintf(path, "/sys/devices/system/cpu/cpu%d/topology/thread_siblings_list", cpu); filep = fopen_or_die(path, "r"); + /* * file format: - * if a pair of number with a character between: 2 siblings (eg. 1-2, or 1,4) - * otherwinse 1 sibling (self). + * A ',' separated or '-' separated set of numbers + * (eg 1-2 or 1,3,4,5) */ - matches = fscanf(filep, "%d%c%d\n", &sib1, &character, &sib2); + fscanf(filep, "%d%c\n", &sib1, &character); + fseek(filep, 0, SEEK_SET); + fgets(str, 100, filep); + ch = strchr(str, character); + while (ch != NULL) { + matches++; + ch = strchr(ch+1, character); + } fclose(filep); - - if (matches == 3) - return 2; - else - return 1; + return matches+1; } /* @@ -1481,9 +1695,12 @@ restart: void check_dev_msr() { struct stat sb; + char pathname[32]; - if (stat("/dev/cpu/0/msr", &sb)) - err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (stat(pathname, &sb)) + if (system("/sbin/modprobe msr > /dev/null 2>&1")) + err(-5, "no /dev/cpu/0/msr, Try \"# modprobe msr\" "); } void check_permissions() @@ -1494,6 +1711,7 @@ void check_permissions() cap_user_data_t cap_data = &cap_data_data; extern int capget(cap_user_header_t hdrp, cap_user_data_t datap); int do_exit = 0; + char pathname[32]; /* check for CAP_SYS_RAWIO */ cap_header->pid = getpid(); @@ -1508,7 +1726,8 @@ void check_permissions() } /* test file permissions */ - if (euidaccess("/dev/cpu/0/msr", R_OK)) { + sprintf(pathname, "/dev/cpu/%d/msr", base_cpu); + if (euidaccess(pathname, R_OK)) { do_exit++; warn("/dev/cpu/0/msr open failed, try chown or chmod +r /dev/cpu/*/msr"); } @@ -1573,6 +1792,8 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ pkg_cstate_limits = hsw_pkg_cstate_limits; break; case 0x37: /* BYT */ @@ -1588,9 +1809,9 @@ int probe_nhm_msrs(unsigned int family, unsigned int model) default: return 0; } - get_msr(0, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); + get_msr(base_cpu, MSR_NHM_SNB_PKG_CST_CFG_CTL, &msr); - pkg_cstate_limit = pkg_cstate_limits[msr & 0x7]; + pkg_cstate_limit = pkg_cstate_limits[msr & 0xF]; return 1; } @@ -1615,11 +1836,66 @@ int has_ivt_turbo_ratio_limit(unsigned int family, unsigned int model) switch (model) { case 0x3E: /* IVB Xeon */ + case 0x3F: /* HSW Xeon */ return 1; default: return 0; } } +int has_hsw_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x3F: /* HSW Xeon */ + return 1; + default: + return 0; + } +} + +int has_knl_turbo_ratio_limit(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + if (family != 6) + return 0; + + switch (model) { + case 0x57: /* Knights Landing */ + return 1; + default: + return 0; + } +} +static void +dump_cstate_pstate_config_info(family, model) +{ + if (!do_nhm_platform_info) + return; + + dump_nhm_platform_info(); + + if (has_hsw_turbo_ratio_limit(family, model)) + dump_hsw_turbo_ratio_limits(); + + if (has_ivt_turbo_ratio_limit(family, model)) + dump_ivt_turbo_ratio_limits(); + + if (has_nhm_turbo_ratio_limit(family, model)) + dump_nhm_turbo_ratio_limits(); + + if (has_knl_turbo_ratio_limit(family, model)) + dump_knl_turbo_ratio_limits(); + + dump_nhm_cst_cfg(); +} + /* * print_epb() @@ -1648,7 +1924,7 @@ int print_epb(struct thread_data *t, struct core_data *c, struct pkg_data *p) if (get_msr(cpu, MSR_IA32_ENERGY_PERF_BIAS, &msr)) return 0; - switch (msr & 0x7) { + switch (msr & 0xF) { case ENERGY_PERF_BIAS_PERFORMANCE: epb_string = "performance"; break; @@ -1690,35 +1966,35 @@ int print_perf_limit(struct thread_data *t, struct core_data *c, struct pkg_data get_msr(cpu, MSR_CORE_PERF_LIMIT_REASONS, &msr); fprintf(stderr, "cpu%d: MSR_CORE_PERF_LIMIT_REASONS, 0x%08llx", cpu, msr); fprintf(stderr, " (Active: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)", - (msr & 1 << 0) ? "PROCHOT, " : "", - (msr & 1 << 1) ? "ThermStatus, " : "", - (msr & 1 << 2) ? "bit2, " : "", - (msr & 1 << 4) ? "Graphics, " : "", - (msr & 1 << 5) ? "Auto-HWP, " : "", - (msr & 1 << 6) ? "VR-Therm, " : "", - (msr & 1 << 8) ? "Amps, " : "", - (msr & 1 << 9) ? "CorePwr, " : "", - (msr & 1 << 10) ? "PkgPwrL1, " : "", - (msr & 1 << 11) ? "PkgPwrL2, " : "", - (msr & 1 << 12) ? "MultiCoreTurbo, " : "", - (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 15) ? "bit15, " : "", (msr & 1 << 14) ? "bit14, " : "", - (msr & 1 << 15) ? "bit15, " : ""); + (msr & 1 << 13) ? "Transitions, " : "", + (msr & 1 << 12) ? "MultiCoreTurbo, " : "", + (msr & 1 << 11) ? "PkgPwrL2, " : "", + (msr & 1 << 10) ? "PkgPwrL1, " : "", + (msr & 1 << 9) ? "CorePwr, " : "", + (msr & 1 << 8) ? "Amps, " : "", + (msr & 1 << 6) ? "VR-Therm, " : "", + (msr & 1 << 5) ? "Auto-HWP, " : "", + (msr & 1 << 4) ? "Graphics, " : "", + (msr & 1 << 2) ? "bit2, " : "", + (msr & 1 << 1) ? "ThermStatus, " : "", + (msr & 1 << 0) ? "PROCHOT, " : ""); fprintf(stderr, " (Logged: %s%s%s%s%s%s%s%s%s%s%s%s%s%s)\n", - (msr & 1 << 16) ? "PROCHOT, " : "", - (msr & 1 << 17) ? "ThermStatus, " : "", - (msr & 1 << 18) ? "bit18, " : "", - (msr & 1 << 20) ? "Graphics, " : "", - (msr & 1 << 21) ? "Auto-HWP, " : "", - (msr & 1 << 22) ? "VR-Therm, " : "", - (msr & 1 << 24) ? "Amps, " : "", - (msr & 1 << 25) ? "CorePwr, " : "", - (msr & 1 << 26) ? "PkgPwrL1, " : "", - (msr & 1 << 27) ? "PkgPwrL2, " : "", - (msr & 1 << 28) ? "MultiCoreTurbo, " : "", - (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 31) ? "bit31, " : "", (msr & 1 << 30) ? "bit30, " : "", - (msr & 1 << 31) ? "bit31, " : ""); + (msr & 1 << 29) ? "Transitions, " : "", + (msr & 1 << 28) ? "MultiCoreTurbo, " : "", + (msr & 1 << 27) ? "PkgPwrL2, " : "", + (msr & 1 << 26) ? "PkgPwrL1, " : "", + (msr & 1 << 25) ? "CorePwr, " : "", + (msr & 1 << 24) ? "Amps, " : "", + (msr & 1 << 22) ? "VR-Therm, " : "", + (msr & 1 << 21) ? "Auto-HWP, " : "", + (msr & 1 << 20) ? "Graphics, " : "", + (msr & 1 << 18) ? "bit18, " : "", + (msr & 1 << 17) ? "ThermStatus, " : "", + (msr & 1 << 16) ? "PROCHOT, " : ""); } if (do_gfx_perf_limit_reasons) { @@ -1772,7 +2048,7 @@ double get_tdp(model) unsigned long long msr; if (do_rapl & RAPL_PKG_POWER_INFO) - if (!get_msr(0, MSR_PKG_POWER_INFO, &msr)) + if (!get_msr(base_cpu, MSR_PKG_POWER_INFO, &msr)) return ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units; switch (model) { @@ -1784,6 +2060,26 @@ double get_tdp(model) } } +/* + * rapl_dram_energy_units_probe() + * Energy units are either hard-coded, or come from RAPL Energy Unit MSR. + */ +static double +rapl_dram_energy_units_probe(int model, double rapl_energy_units) +{ + /* only called for genuine_intel, family 6 */ + + switch (model) { + case 0x3F: /* HSX */ + case 0x4F: /* BDX */ + case 0x56: /* BDX-DE */ + case 0x57: /* KNL */ + return (rapl_dram_energy_units = 15.3 / 1000000); + default: + return (rapl_energy_units); + } +} + /* * rapl_probe() @@ -1812,14 +2108,19 @@ void rapl_probe(unsigned int family, unsigned int model) case 0x47: /* BDW */ do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_GFX | RAPL_PKG_POWER_INFO; break; + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + break; case 0x3F: /* HSX */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ - do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; + case 0x57: /* KNL */ + do_rapl = RAPL_PKG | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_DRAM_PERF_STATUS | RAPL_PKG_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x2D: case 0x3E: - do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; + do_rapl = RAPL_PKG | RAPL_CORES | RAPL_CORE_POLICY | RAPL_DRAM | RAPL_DRAM_POWER_INFO | RAPL_PKG_PERF_STATUS | RAPL_DRAM_PERF_STATUS | RAPL_PKG_POWER_INFO; break; case 0x37: /* BYT */ case 0x4D: /* AVN */ @@ -1830,7 +2131,7 @@ void rapl_probe(unsigned int family, unsigned int model) } /* units on package 0, verify later other packages match */ - if (get_msr(0, MSR_RAPL_POWER_UNIT, &msr)) + if (get_msr(base_cpu, MSR_RAPL_POWER_UNIT, &msr)) return; rapl_power_units = 1.0 / (1 << (msr & 0xF)); @@ -1839,6 +2140,8 @@ void rapl_probe(unsigned int family, unsigned int model) else rapl_energy_units = 1.0 / (1 << (msr >> 8 & 0x1F)); + rapl_dram_energy_units = rapl_dram_energy_units_probe(model, rapl_energy_units); + time_unit = msr >> 16 & 0xF; if (time_unit == 0) time_unit = 0xA; @@ -2009,19 +2312,18 @@ int print_rapl(struct thread_data *t, struct core_data *c, struct pkg_data *p) ((msr >> 48) & 1) ? "EN" : "DIS"); } - if (do_rapl & RAPL_DRAM) { + if (do_rapl & RAPL_DRAM_POWER_INFO) { if (get_msr(cpu, MSR_DRAM_POWER_INFO, &msr)) return -6; - fprintf(stderr, "cpu%d: MSR_DRAM_POWER_INFO,: 0x%08llx (%.0f W TDP, RAPL %.0f - %.0f W, %f sec.)\n", cpu, msr, ((msr >> 0) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 16) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 32) & RAPL_POWER_GRANULARITY) * rapl_power_units, ((msr >> 48) & RAPL_TIME_GRANULARITY) * rapl_time_units); - - + } + if (do_rapl & RAPL_DRAM) { if (get_msr(cpu, MSR_DRAM_POWER_LIMIT, &msr)) return -9; fprintf(stderr, "cpu%d: MSR_DRAM_POWER_LIMIT: 0x%08llx (%slocked)\n", @@ -2090,6 +2392,8 @@ int has_snb_msrs(unsigned int family, unsigned int model) case 0x47: /* BDW */ case 0x4F: /* BDX */ case 0x56: /* BDX-DE */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; @@ -2110,12 +2414,36 @@ int has_hsw_msrs(unsigned int family, unsigned int model) switch (model) { case 0x45: /* HSW */ case 0x3D: /* BDW */ + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + return 1; + } + return 0; +} + +/* + * SKL adds support for additional MSRS: + * + * MSR_PKG_WEIGHTED_CORE_C0_RES 0x00000658 + * MSR_PKG_ANY_CORE_C0_RES 0x00000659 + * MSR_PKG_ANY_GFXE_C0_RES 0x0000065A + * MSR_PKG_BOTH_CORE_GFXE_C0_RES 0x0000065B + */ +int has_skl_msrs(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + + switch (model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ return 1; } return 0; } + int is_slm(unsigned int family, unsigned int model) { if (!genuine_intel) @@ -2128,6 +2456,17 @@ int is_slm(unsigned int family, unsigned int model) return 0; } +int is_knl(unsigned int family, unsigned int model) +{ + if (!genuine_intel) + return 0; + switch (model) { + case 0x57: /* KNL */ + return 1; + } + return 0; +} + #define SLM_BCLK_FREQS 5 double slm_freq_table[SLM_BCLK_FREQS] = { 83.3, 100.0, 133.3, 116.7, 80.0}; @@ -2137,7 +2476,7 @@ double slm_bclk(void) unsigned int i; double freq; - if (get_msr(0, MSR_FSB_FREQ, &msr)) + if (get_msr(base_cpu, MSR_FSB_FREQ, &msr)) fprintf(stderr, "SLM BCLK: unknown\n"); i = msr & 0xf; @@ -2205,7 +2544,7 @@ int set_temperature_target(struct thread_data *t, struct core_data *c, struct pk if (!do_nhm_platform_info) goto guess; - if (get_msr(0, MSR_IA32_TEMPERATURE_TARGET, &msr)) + if (get_msr(base_cpu, MSR_IA32_TEMPERATURE_TARGET, &msr)) goto guess; target_c_local = (msr >> 16) & 0xFF; @@ -2228,7 +2567,7 @@ guess: return 0; } -void check_cpuid() +void process_cpuid() { unsigned int eax, ebx, ecx, edx, max_level; unsigned int fms, family, model, stepping; @@ -2294,6 +2633,41 @@ void check_cpuid() do_ptm ? "" : "No ", has_epb ? "" : "No "); + if (max_level > 0x15) { + unsigned int eax_crystal; + unsigned int ebx_tsc; + + /* + * CPUID 15H TSC/Crystal ratio, possibly Crystal Hz + */ + eax_crystal = ebx_tsc = crystal_hz = edx = 0; + __get_cpuid(0x15, &eax_crystal, &ebx_tsc, &crystal_hz, &edx); + + if (ebx_tsc != 0) { + + if (debug && (ebx != 0)) + fprintf(stderr, "CPUID(0x15): eax_crystal: %d ebx_tsc: %d ecx_crystal_hz: %d\n", + eax_crystal, ebx_tsc, crystal_hz); + + if (crystal_hz == 0) + switch(model) { + case 0x4E: /* SKL */ + case 0x5E: /* SKL */ + crystal_hz = 24000000; /* 24 MHz */ + break; + default: + crystal_hz = 0; + } + + if (crystal_hz) { + tsc_hz = (unsigned long long) crystal_hz * ebx_tsc / eax_crystal; + if (debug) + fprintf(stderr, "TSC: %lld MHz (%d Hz * %d / %d / 1000000)\n", + tsc_hz / 1000000, crystal_hz, ebx_tsc, eax_crystal); + } + } + } + do_nhm_platform_info = do_nhm_cstates = do_smi = probe_nhm_msrs(family, model); do_snb_cstates = has_snb_msrs(family, model); do_pc2 = do_snb_cstates && (pkg_cstate_limit >= PCL__2); @@ -2301,18 +2675,20 @@ void check_cpuid() do_pc6 = (pkg_cstate_limit >= PCL__6); do_pc7 = do_snb_cstates && (pkg_cstate_limit >= PCL__7); do_c8_c9_c10 = has_hsw_msrs(family, model); + do_skl_residency = has_skl_msrs(family, model); do_slm_cstates = is_slm(family, model); + do_knl_cstates = is_knl(family, model); bclk = discover_bclk(family, model); - do_nhm_turbo_ratio_limit = do_nhm_platform_info && has_nhm_turbo_ratio_limit(family, model); - do_ivt_turbo_ratio_limit = has_ivt_turbo_ratio_limit(family, model); rapl_probe(family, model); perf_limit_reasons_probe(family, model); + if (debug) + dump_cstate_pstate_config_info(); + return; } - void help() { fprintf(stderr, @@ -2428,14 +2804,14 @@ void topology_probe() if (debug > 1) fprintf(stderr, "max_core_id %d, sizing for %d cores per package\n", max_core_id, topo.num_cores_per_pkg); - if (!summary_only && topo.num_cores_per_pkg > 1) + if (debug && !summary_only && topo.num_cores_per_pkg > 1) show_core = 1; topo.num_packages = max_package_id + 1; if (debug > 1) fprintf(stderr, "max_package_id %d, sizing for %d packages\n", max_package_id, topo.num_packages); - if (!summary_only && topo.num_packages > 1) + if (debug && !summary_only && topo.num_packages > 1) show_pkg = 1; topo.num_threads_per_core = max_siblings; @@ -2516,13 +2892,9 @@ int initialize_counters(int cpu_id) my_package_id = get_physical_package_id(cpu_id); my_core_id = get_core_id(cpu_id); - - if (cpu_is_first_sibling_in_core(cpu_id)) { - my_thread_id = 0; + my_thread_id = get_cpu_position_in_core(cpu_id); + if (!my_thread_id) topo.num_cores++; - } else { - my_thread_id = 1; - } init_counter(EVEN_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); init_counter(ODD_COUNTERS, my_thread_id, my_core_id, my_package_id, cpu_id); @@ -2546,16 +2918,24 @@ void setup_all_buffers(void) for_all_proc_cpus(initialize_counters); } +void set_base_cpu(void) +{ + base_cpu = sched_getcpu(); + if (base_cpu < 0) + err(-ENODEV, "No valid cpus found"); + + if (debug > 1) + fprintf(stderr, "base_cpu = %d\n", base_cpu); +} + void turbostat_init() { + setup_all_buffers(); + set_base_cpu(); check_dev_msr(); check_permissions(); - check_cpuid(); - - setup_all_buffers(); + process_cpuid(); - if (debug) - print_verbose_header(); if (debug) for_all_cpus(print_epb, ODD_COUNTERS); @@ -2634,7 +3014,7 @@ int get_and_dump_counters(void) } void print_version() { - fprintf(stderr, "turbostat version 4.1 10-Feb, 2015" + fprintf(stderr, "turbostat version 4.7 27-May, 2015" " - Len Brown <lenb@kernel.org>\n"); } diff --git a/tools/testing/nvdimm/Kbuild b/tools/testing/nvdimm/Kbuild new file mode 100644 index 000000000000..f56914c7929b --- /dev/null +++ b/tools/testing/nvdimm/Kbuild @@ -0,0 +1,43 @@ +ldflags-y += --wrap=ioremap_wt +ldflags-y += --wrap=ioremap_wc +ldflags-y += --wrap=devm_ioremap_nocache +ldflags-y += --wrap=ioremap_cache +ldflags-y += --wrap=ioremap_nocache +ldflags-y += --wrap=iounmap +ldflags-y += --wrap=__request_region +ldflags-y += --wrap=__release_region + +DRIVERS := ../../../drivers +NVDIMM_SRC := $(DRIVERS)/nvdimm +ACPI_SRC := $(DRIVERS)/acpi + +obj-$(CONFIG_LIBNVDIMM) += libnvdimm.o +obj-$(CONFIG_BLK_DEV_PMEM) += nd_pmem.o +obj-$(CONFIG_ND_BTT) += nd_btt.o +obj-$(CONFIG_ND_BLK) += nd_blk.o +obj-$(CONFIG_ACPI_NFIT) += nfit.o + +nfit-y := $(ACPI_SRC)/nfit.o +nfit-y += config_check.o + +nd_pmem-y := $(NVDIMM_SRC)/pmem.o +nd_pmem-y += config_check.o + +nd_btt-y := $(NVDIMM_SRC)/btt.o +nd_btt-y += config_check.o + +nd_blk-y := $(NVDIMM_SRC)/blk.o +nd_blk-y += config_check.o + +libnvdimm-y := $(NVDIMM_SRC)/core.o +libnvdimm-y += $(NVDIMM_SRC)/bus.o +libnvdimm-y += $(NVDIMM_SRC)/dimm_devs.o +libnvdimm-y += $(NVDIMM_SRC)/dimm.o +libnvdimm-y += $(NVDIMM_SRC)/region_devs.o +libnvdimm-y += $(NVDIMM_SRC)/region.o +libnvdimm-y += $(NVDIMM_SRC)/namespace_devs.o +libnvdimm-y += $(NVDIMM_SRC)/label.o +libnvdimm-$(CONFIG_BTT) += $(NVDIMM_SRC)/btt_devs.o +libnvdimm-y += config_check.o + +obj-m += test/ diff --git a/tools/testing/nvdimm/Makefile b/tools/testing/nvdimm/Makefile new file mode 100644 index 000000000000..3dfe024b4e7e --- /dev/null +++ b/tools/testing/nvdimm/Makefile @@ -0,0 +1,7 @@ +KDIR ?= ../../../ + +default: + $(MAKE) -C $(KDIR) M=$$PWD + +install: default + $(MAKE) -C $(KDIR) M=$$PWD modules_install diff --git a/tools/testing/nvdimm/config_check.c b/tools/testing/nvdimm/config_check.c new file mode 100644 index 000000000000..f2c7615554eb --- /dev/null +++ b/tools/testing/nvdimm/config_check.c @@ -0,0 +1,15 @@ +#include <linux/kconfig.h> +#include <linux/bug.h> + +void check(void) +{ + /* + * These kconfig symbols must be set to "m" for nfit_test to + * load and operate. + */ + BUILD_BUG_ON(!IS_MODULE(CONFIG_LIBNVDIMM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_BLK_DEV_PMEM)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BTT)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ND_BLK)); + BUILD_BUG_ON(!IS_MODULE(CONFIG_ACPI_NFIT)); +} diff --git a/tools/testing/nvdimm/test/Kbuild b/tools/testing/nvdimm/test/Kbuild new file mode 100644 index 000000000000..9241064970fe --- /dev/null +++ b/tools/testing/nvdimm/test/Kbuild @@ -0,0 +1,8 @@ +ccflags-y := -I$(src)/../../../../drivers/nvdimm/ +ccflags-y += -I$(src)/../../../../drivers/acpi/ + +obj-m += nfit_test.o +obj-m += nfit_test_iomap.o + +nfit_test-y := nfit.o +nfit_test_iomap-y := iomap.o diff --git a/tools/testing/nvdimm/test/iomap.c b/tools/testing/nvdimm/test/iomap.c new file mode 100644 index 000000000000..64bfaa50831c --- /dev/null +++ b/tools/testing/nvdimm/test/iomap.c @@ -0,0 +1,178 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#include <linux/rculist.h> +#include <linux/export.h> +#include <linux/ioport.h> +#include <linux/module.h> +#include <linux/types.h> +#include <linux/io.h> +#include "nfit_test.h" + +static LIST_HEAD(iomap_head); + +static struct iomap_ops { + nfit_test_lookup_fn nfit_test_lookup; + struct list_head list; +} iomap_ops = { + .list = LIST_HEAD_INIT(iomap_ops.list), +}; + +void nfit_test_setup(nfit_test_lookup_fn lookup) +{ + iomap_ops.nfit_test_lookup = lookup; + list_add_rcu(&iomap_ops.list, &iomap_head); +} +EXPORT_SYMBOL(nfit_test_setup); + +void nfit_test_teardown(void) +{ + list_del_rcu(&iomap_ops.list); + synchronize_rcu(); +} +EXPORT_SYMBOL(nfit_test_teardown); + +static struct nfit_test_resource *get_nfit_res(resource_size_t resource) +{ + struct iomap_ops *ops; + + ops = list_first_or_null_rcu(&iomap_head, typeof(*ops), list); + if (ops) + return ops->nfit_test_lookup(resource); + return NULL; +} + +void __iomem *__nfit_test_ioremap(resource_size_t offset, unsigned long size, + void __iomem *(*fallback_fn)(resource_size_t, unsigned long)) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return fallback_fn(offset, size); +} + +void __iomem *__wrap_devm_ioremap_nocache(struct device *dev, + resource_size_t offset, unsigned long size) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res(offset); + rcu_read_unlock(); + if (nfit_res) + return (void __iomem *) nfit_res->buf + offset + - nfit_res->res->start; + return devm_ioremap_nocache(dev, offset, size); +} +EXPORT_SYMBOL(__wrap_devm_ioremap_nocache); + +void __iomem *__wrap_ioremap_cache(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_cache); +} +EXPORT_SYMBOL(__wrap_ioremap_cache); + +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_nocache); +} +EXPORT_SYMBOL(__wrap_ioremap_nocache); + +void __iomem *__wrap_ioremap_wt(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wt); +} +EXPORT_SYMBOL(__wrap_ioremap_wt); + +void __iomem *__wrap_ioremap_wc(resource_size_t offset, unsigned long size) +{ + return __nfit_test_ioremap(offset, size, ioremap_wc); +} +EXPORT_SYMBOL(__wrap_ioremap_wc); + +void __wrap_iounmap(volatile void __iomem *addr) +{ + struct nfit_test_resource *nfit_res; + + rcu_read_lock(); + nfit_res = get_nfit_res((unsigned long) addr); + rcu_read_unlock(); + if (nfit_res) + return; + return iounmap(addr); +} +EXPORT_SYMBOL(__wrap_iounmap); + +struct resource *__wrap___request_region(struct resource *parent, + resource_size_t start, resource_size_t n, const char *name, + int flags) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start + n > nfit_res->res->start + + resource_size(nfit_res->res)) { + pr_debug("%s: start: %llx n: %llx overflow: %pr\n", + __func__, start, n, + nfit_res->res); + return NULL; + } + + res->start = start; + res->end = start + n - 1; + res->name = name; + res->flags = resource_type(parent); + res->flags |= IORESOURCE_BUSY | flags; + pr_debug("%s: %pr\n", __func__, res); + return res; + } + } + return __request_region(parent, start, n, name, flags); +} +EXPORT_SYMBOL(__wrap___request_region); + +void __wrap___release_region(struct resource *parent, resource_size_t start, + resource_size_t n) +{ + struct nfit_test_resource *nfit_res; + + if (parent == &iomem_resource) { + rcu_read_lock(); + nfit_res = get_nfit_res(start); + rcu_read_unlock(); + if (nfit_res) { + struct resource *res = nfit_res->res + 1; + + if (start != res->start || resource_size(res) != n) + pr_info("%s: start: %llx n: %llx mismatch: %pr\n", + __func__, start, n, res); + else + memset(res, 0, sizeof(*res)); + return; + } + } + __release_region(parent, start, n); +} +EXPORT_SYMBOL(__wrap___release_region); + +MODULE_LICENSE("GPL v2"); diff --git a/tools/testing/nvdimm/test/nfit.c b/tools/testing/nvdimm/test/nfit.c new file mode 100644 index 000000000000..d0bdae40ccc9 --- /dev/null +++ b/tools/testing/nvdimm/test/nfit.c @@ -0,0 +1,1162 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include <linux/platform_device.h> +#include <linux/dma-mapping.h> +#include <linux/libnvdimm.h> +#include <linux/vmalloc.h> +#include <linux/device.h> +#include <linux/module.h> +#include <linux/ndctl.h> +#include <linux/sizes.h> +#include <linux/slab.h> +#include <nfit.h> +#include <nd.h> +#include "nfit_test.h" + +/* + * Generate an NFIT table to describe the following topology: + * + * BUS0: Interleaved PMEM regions, and aliasing with BLK regions + * + * (a) (b) DIMM BLK-REGION + * +----------+--------------+----------+---------+ + * +------+ | blk2.0 | pm0.0 | blk2.1 | pm1.0 | 0 region2 + * | imc0 +--+- - - - - region0 - - - -+----------+ + + * +--+---+ | blk3.0 | pm0.0 | blk3.1 | pm1.0 | 1 region3 + * | +----------+--------------v----------v v + * +--+---+ | | + * | cpu0 | region1 + * +--+---+ | | + * | +-------------------------^----------^ ^ + * +--+---+ | blk4.0 | pm1.0 | 2 region4 + * | imc1 +--+-------------------------+----------+ + + * +------+ | blk5.0 | pm1.0 | 3 region5 + * +-------------------------+----------+-+-------+ + * + * *) In this layout we have four dimms and two memory controllers in one + * socket. Each unique interface (BLK or PMEM) to DPA space + * is identified by a region device with a dynamically assigned id. + * + * *) The first portion of dimm0 and dimm1 are interleaved as REGION0. + * A single PMEM namespace "pm0.0" is created using half of the + * REGION0 SPA-range. REGION0 spans dimm0 and dimm1. PMEM namespace + * allocate from from the bottom of a region. The unallocated + * portion of REGION0 aliases with REGION2 and REGION3. That + * unallacted capacity is reclaimed as BLK namespaces ("blk2.0" and + * "blk3.0") starting at the base of each DIMM to offset (a) in those + * DIMMs. "pm0.0", "blk2.0" and "blk3.0" are free-form readable + * names that can be assigned to a namespace. + * + * *) In the last portion of dimm0 and dimm1 we have an interleaved + * SPA range, REGION1, that spans those two dimms as well as dimm2 + * and dimm3. Some of REGION1 allocated to a PMEM namespace named + * "pm1.0" the rest is reclaimed in 4 BLK namespaces (for each + * dimm in the interleave set), "blk2.1", "blk3.1", "blk4.0", and + * "blk5.0". + * + * *) The portion of dimm2 and dimm3 that do not participate in the + * REGION1 interleaved SPA range (i.e. the DPA address below offset + * (b) are also included in the "blk4.0" and "blk5.0" namespaces. + * Note, that BLK namespaces need not be contiguous in DPA-space, and + * can consume aliased capacity from multiple interleave sets. + * + * BUS1: Legacy NVDIMM (single contiguous range) + * + * region2 + * +---------------------+ + * |---------------------| + * || pm2.0 || + * |---------------------| + * +---------------------+ + * + * *) A NFIT-table may describe a simple system-physical-address range + * with no BLK aliasing. This type of region may optionally + * reference an NVDIMM. + */ +enum { + NUM_PM = 2, + NUM_DCR = 4, + NUM_BDW = NUM_DCR, + NUM_SPA = NUM_PM + NUM_DCR + NUM_BDW, + NUM_MEM = NUM_DCR + NUM_BDW + 2 /* spa0 iset */ + 4 /* spa1 iset */, + DIMM_SIZE = SZ_32M, + LABEL_SIZE = SZ_128K, + SPA0_SIZE = DIMM_SIZE, + SPA1_SIZE = DIMM_SIZE*2, + SPA2_SIZE = DIMM_SIZE, + BDW_SIZE = 64 << 8, + DCR_SIZE = 12, + NUM_NFITS = 2, /* permit testing multiple NFITs per system */ +}; + +struct nfit_test_dcr { + __le64 bdw_addr; + __le32 bdw_status; + __u8 aperature[BDW_SIZE]; +}; + +#define NFIT_DIMM_HANDLE(node, socket, imc, chan, dimm) \ + (((node & 0xfff) << 16) | ((socket & 0xf) << 12) \ + | ((imc & 0xf) << 8) | ((chan & 0xf) << 4) | (dimm & 0xf)) + +static u32 handle[NUM_DCR] = { + [0] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 0), + [1] = NFIT_DIMM_HANDLE(0, 0, 0, 0, 1), + [2] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 0), + [3] = NFIT_DIMM_HANDLE(0, 0, 1, 0, 1), +}; + +struct nfit_test { + struct acpi_nfit_desc acpi_desc; + struct platform_device pdev; + struct list_head resources; + void *nfit_buf; + dma_addr_t nfit_dma; + size_t nfit_size; + int num_dcr; + int num_pm; + void **dimm; + dma_addr_t *dimm_dma; + void **flush; + dma_addr_t *flush_dma; + void **label; + dma_addr_t *label_dma; + void **spa_set; + dma_addr_t *spa_set_dma; + struct nfit_test_dcr **dcr; + dma_addr_t *dcr_dma; + int (*alloc)(struct nfit_test *t); + void (*setup)(struct nfit_test *t); +}; + +static struct nfit_test *to_nfit_test(struct device *dev) +{ + struct platform_device *pdev = to_platform_device(dev); + + return container_of(pdev, struct nfit_test, pdev); +} + +static int nfit_test_ctl(struct nvdimm_bus_descriptor *nd_desc, + struct nvdimm *nvdimm, unsigned int cmd, void *buf, + unsigned int buf_len) +{ + struct acpi_nfit_desc *acpi_desc = to_acpi_desc(nd_desc); + struct nfit_test *t = container_of(acpi_desc, typeof(*t), acpi_desc); + struct nfit_mem *nfit_mem = nvdimm_provider_data(nvdimm); + int i, rc; + + if (!nfit_mem || !test_bit(cmd, &nfit_mem->dsm_mask)) + return -ENOTTY; + + /* lookup label space for the given dimm */ + for (i = 0; i < ARRAY_SIZE(handle); i++) + if (__to_nfit_memdev(nfit_mem)->device_handle == handle[i]) + break; + if (i >= ARRAY_SIZE(handle)) + return -ENXIO; + + switch (cmd) { + case ND_CMD_GET_CONFIG_SIZE: { + struct nd_cmd_get_config_size *nd_cmd = buf; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + nd_cmd->status = 0; + nd_cmd->config_size = LABEL_SIZE; + nd_cmd->max_xfer = SZ_4K; + rc = 0; + break; + } + case ND_CMD_GET_CONFIG_DATA: { + struct nd_cmd_get_config_data_hdr *nd_cmd = buf; + unsigned int len, offset = nd_cmd->in_offset; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) > buf_len) + return -EINVAL; + + nd_cmd->status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(nd_cmd->out_buf, t->label[i] + offset, len); + rc = buf_len - sizeof(*nd_cmd) - len; + break; + } + case ND_CMD_SET_CONFIG_DATA: { + struct nd_cmd_set_config_hdr *nd_cmd = buf; + unsigned int len, offset = nd_cmd->in_offset; + u32 *status; + + if (buf_len < sizeof(*nd_cmd)) + return -EINVAL; + if (offset >= LABEL_SIZE) + return -EINVAL; + if (nd_cmd->in_length + sizeof(*nd_cmd) + 4 > buf_len) + return -EINVAL; + + status = buf + nd_cmd->in_length + sizeof(*nd_cmd); + *status = 0; + len = min(nd_cmd->in_length, LABEL_SIZE - offset); + memcpy(t->label[i] + offset, nd_cmd->in_buf, len); + rc = buf_len - sizeof(*nd_cmd) - (len + 4); + break; + } + default: + return -ENOTTY; + } + + return rc; +} + +static DEFINE_SPINLOCK(nfit_test_lock); +static struct nfit_test *instances[NUM_NFITS]; + +static void release_nfit_res(void *data) +{ + struct nfit_test_resource *nfit_res = data; + struct resource *res = nfit_res->res; + + spin_lock(&nfit_test_lock); + list_del(&nfit_res->list); + spin_unlock(&nfit_test_lock); + + if (is_vmalloc_addr(nfit_res->buf)) + vfree(nfit_res->buf); + else + dma_free_coherent(nfit_res->dev, resource_size(res), + nfit_res->buf, res->start); + kfree(res); + kfree(nfit_res); +} + +static void *__test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma, + void *buf) +{ + struct device *dev = &t->pdev.dev; + struct resource *res = kzalloc(sizeof(*res) * 2, GFP_KERNEL); + struct nfit_test_resource *nfit_res = kzalloc(sizeof(*nfit_res), + GFP_KERNEL); + int rc; + + if (!res || !buf || !nfit_res) + goto err; + rc = devm_add_action(dev, release_nfit_res, nfit_res); + if (rc) + goto err; + INIT_LIST_HEAD(&nfit_res->list); + memset(buf, 0, size); + nfit_res->dev = dev; + nfit_res->buf = buf; + nfit_res->res = res; + res->start = *dma; + res->end = *dma + size - 1; + res->name = "NFIT"; + spin_lock(&nfit_test_lock); + list_add(&nfit_res->list, &t->resources); + spin_unlock(&nfit_test_lock); + + return nfit_res->buf; + err: + if (buf && !is_vmalloc_addr(buf)) + dma_free_coherent(dev, size, buf, *dma); + else if (buf) + vfree(buf); + kfree(res); + kfree(nfit_res); + return NULL; +} + +static void *test_alloc(struct nfit_test *t, size_t size, dma_addr_t *dma) +{ + void *buf = vmalloc(size); + + *dma = (unsigned long) buf; + return __test_alloc(t, size, dma, buf); +} + +static void *test_alloc_coherent(struct nfit_test *t, size_t size, + dma_addr_t *dma) +{ + struct device *dev = &t->pdev.dev; + void *buf = dma_alloc_coherent(dev, size, dma, GFP_KERNEL); + + return __test_alloc(t, size, dma, buf); +} + +static struct nfit_test_resource *nfit_test_lookup(resource_size_t addr) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(instances); i++) { + struct nfit_test_resource *n, *nfit_res = NULL; + struct nfit_test *t = instances[i]; + + if (!t) + continue; + spin_lock(&nfit_test_lock); + list_for_each_entry(n, &t->resources, list) { + if (addr >= n->res->start && (addr < n->res->start + + resource_size(n->res))) { + nfit_res = n; + break; + } else if (addr >= (unsigned long) n->buf + && (addr < (unsigned long) n->buf + + resource_size(n->res))) { + nfit_res = n; + break; + } + } + spin_unlock(&nfit_test_lock); + if (nfit_res) + return nfit_res; + } + + return NULL; +} + +static int nfit_test0_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_table_nfit) + + sizeof(struct acpi_nfit_system_address) * NUM_SPA + + sizeof(struct acpi_nfit_memory_map) * NUM_MEM + + sizeof(struct acpi_nfit_control_region) * NUM_DCR + + sizeof(struct acpi_nfit_data_region) * NUM_BDW + + sizeof(struct acpi_nfit_flush_address) * NUM_DCR; + int i; + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA0_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + t->spa_set[1] = test_alloc_coherent(t, SPA1_SIZE, &t->spa_set_dma[1]); + if (!t->spa_set[1]) + return -ENOMEM; + + for (i = 0; i < NUM_DCR; i++) { + t->dimm[i] = test_alloc(t, DIMM_SIZE, &t->dimm_dma[i]); + if (!t->dimm[i]) + return -ENOMEM; + + t->label[i] = test_alloc(t, LABEL_SIZE, &t->label_dma[i]); + if (!t->label[i]) + return -ENOMEM; + sprintf(t->label[i], "label%d", i); + + t->flush[i] = test_alloc(t, 8, &t->flush_dma[i]); + if (!t->flush[i]) + return -ENOMEM; + } + + for (i = 0; i < NUM_DCR; i++) { + t->dcr[i] = test_alloc(t, LABEL_SIZE, &t->dcr_dma[i]); + if (!t->dcr[i]) + return -ENOMEM; + } + + return 0; +} + +static int nfit_test1_alloc(struct nfit_test *t) +{ + size_t nfit_size = sizeof(struct acpi_table_nfit) + + sizeof(struct acpi_nfit_system_address) + + sizeof(struct acpi_nfit_memory_map) + + sizeof(struct acpi_nfit_control_region); + + t->nfit_buf = test_alloc(t, nfit_size, &t->nfit_dma); + if (!t->nfit_buf) + return -ENOMEM; + t->nfit_size = nfit_size; + + t->spa_set[0] = test_alloc_coherent(t, SPA2_SIZE, &t->spa_set_dma[0]); + if (!t->spa_set[0]) + return -ENOMEM; + + return 0; +} + +static void nfit_test_init_header(struct acpi_table_nfit *nfit, size_t size) +{ + memcpy(nfit->header.signature, ACPI_SIG_NFIT, 4); + nfit->header.length = size; + nfit->header.revision = 1; + memcpy(nfit->header.oem_id, "LIBND", 6); + memcpy(nfit->header.oem_table_id, "TEST", 5); + nfit->header.oem_revision = 1; + memcpy(nfit->header.asl_compiler_id, "TST", 4); + nfit->header.asl_compiler_revision = 1; +} + +static void nfit_test0_setup(struct nfit_test *t) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct acpi_nfit_memory_map *memdev; + void *nfit_buf = t->nfit_buf; + size_t size = t->nfit_size; + struct acpi_nfit_system_address *spa; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_data_region *bdw; + struct acpi_nfit_flush_address *flush; + unsigned int offset; + + nfit_test_init_header(nfit_buf, size); + + /* + * spa0 (interleave first half of dimm0 and dimm1, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + sizeof(struct acpi_table_nfit); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA0_SIZE; + + /* + * spa1 (interleave last half of the 4 DIMMS, note storage + * does not actually alias the related block-data-window + * regions) + */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa); + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 1+1; + spa->address = t->spa_set_dma[1]; + spa->length = SPA1_SIZE; + + /* spa2 (dcr0) dimm0 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 2; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 2+1; + spa->address = t->dcr_dma[0]; + spa->length = DCR_SIZE; + + /* spa3 (dcr1) dimm1 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 3; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 3+1; + spa->address = t->dcr_dma[1]; + spa->length = DCR_SIZE; + + /* spa4 (dcr2) dimm2 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 4; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 4+1; + spa->address = t->dcr_dma[2]; + spa->length = DCR_SIZE; + + /* spa5 (dcr3) dimm3 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 5; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_DCR), 16); + spa->range_index = 5+1; + spa->address = t->dcr_dma[3]; + spa->length = DCR_SIZE; + + /* spa6 (bdw for dcr0) dimm0 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 6; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 6+1; + spa->address = t->dimm_dma[0]; + spa->length = DIMM_SIZE; + + /* spa7 (bdw for dcr1) dimm1 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 7; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 7+1; + spa->address = t->dimm_dma[1]; + spa->length = DIMM_SIZE; + + /* spa8 (bdw for dcr2) dimm2 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 8; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 8+1; + spa->address = t->dimm_dma[2]; + spa->length = DIMM_SIZE; + + /* spa9 (bdw for dcr3) dimm3 */ + spa = nfit_buf + sizeof(struct acpi_table_nfit) + sizeof(*spa) * 9; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_BDW), 16); + spa->range_index = 9+1; + spa->address = t->dimm_dma[3]; + spa->length = DIMM_SIZE; + + offset = sizeof(struct acpi_table_nfit) + sizeof(*spa) * 10; + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0]; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region1 (spa0, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map); + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 1+1; + memdev->region_size = SPA0_SIZE/2; + memdev->region_offset = t->spa_set_dma[0] + SPA0_SIZE/2; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 2; + + /* mem-region2 (spa1, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 2; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 0+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1]; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region3 (spa1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 3; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 1; + memdev->range_index = 1+1; + memdev->region_index = 1+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region4 (spa1, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 4; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 2+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 2*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region5 (spa1, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 5; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 1+1; + memdev->region_index = 3+1; + memdev->region_size = SPA1_SIZE/4; + memdev->region_offset = t->spa_set_dma[1] + 3*SPA1_SIZE/4; + memdev->address = SPA0_SIZE/2; + memdev->interleave_index = 0; + memdev->interleave_ways = 4; + + /* mem-region6 (spa/dcr0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 6; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 2+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region7 (spa/dcr1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 7; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 3+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region8 (spa/dcr2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 8; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 4+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region9 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 9; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 5+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region10 (spa/bdw0, dimm0) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 10; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[0]; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 6+1; + memdev->region_index = 0+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region11 (spa/bdw1, dimm1) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 11; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[1]; + memdev->physical_id = 1; + memdev->region_id = 0; + memdev->range_index = 7+1; + memdev->region_index = 1+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region12 (spa/bdw2, dimm2) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 12; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[2]; + memdev->physical_id = 2; + memdev->region_id = 0; + memdev->range_index = 8+1; + memdev->region_index = 2+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + /* mem-region13 (spa/dcr3, dimm3) */ + memdev = nfit_buf + offset + sizeof(struct acpi_nfit_memory_map) * 13; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = handle[3]; + memdev->physical_id = 3; + memdev->region_id = 0; + memdev->range_index = 9+1; + memdev->region_index = 3+1; + memdev->region_size = 0; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + + offset = offset + sizeof(struct acpi_nfit_memory_map) * 14; + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[0]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor1 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region); + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 1+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[1]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor2 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 2; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 2+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[2]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + /* dcr-descriptor3 */ + dcr = nfit_buf + offset + sizeof(struct acpi_nfit_control_region) * 3; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 3+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~handle[3]; + dcr->windows = 1; + dcr->window_size = DCR_SIZE; + dcr->command_offset = 0; + dcr->command_size = 8; + dcr->status_offset = 8; + dcr->status_size = 4; + + offset = offset + sizeof(struct acpi_nfit_control_region) * 4; + /* bdw0 (spa/dcr0, dimm0) */ + bdw = nfit_buf + offset; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 0+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw1 (spa/dcr1, dimm1) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region); + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 1+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw2 (spa/dcr2, dimm2) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 2; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 2+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + /* bdw3 (spa/dcr3, dimm3) */ + bdw = nfit_buf + offset + sizeof(struct acpi_nfit_data_region) * 3; + bdw->header.type = ACPI_NFIT_TYPE_DATA_REGION; + bdw->header.length = sizeof(struct acpi_nfit_data_region); + bdw->region_index = 3+1; + bdw->windows = 1; + bdw->offset = 0; + bdw->size = BDW_SIZE; + bdw->capacity = DIMM_SIZE; + bdw->start_address = 0; + + offset = offset + sizeof(struct acpi_nfit_data_region) * 4; + /* flush0 (dimm0) */ + flush = nfit_buf + offset; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[0]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[0]; + + /* flush1 (dimm1) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 1; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[1]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[1]; + + /* flush2 (dimm2) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 2; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[2]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[2]; + + /* flush3 (dimm3) */ + flush = nfit_buf + offset + sizeof(struct acpi_nfit_flush_address) * 3; + flush->header.type = ACPI_NFIT_TYPE_FLUSH_ADDRESS; + flush->header.length = sizeof(struct acpi_nfit_flush_address); + flush->device_handle = handle[3]; + flush->hint_count = 1; + flush->hint_address[0] = t->flush_dma[3]; + + acpi_desc = &t->acpi_desc; + set_bit(ND_CMD_GET_CONFIG_SIZE, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_GET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + set_bit(ND_CMD_SET_CONFIG_DATA, &acpi_desc->dimm_dsm_force_en); + nd_desc = &acpi_desc->nd_desc; + nd_desc->ndctl = nfit_test_ctl; +} + +static void nfit_test1_setup(struct nfit_test *t) +{ + size_t size = t->nfit_size, offset; + void *nfit_buf = t->nfit_buf; + struct acpi_nfit_memory_map *memdev; + struct acpi_nfit_control_region *dcr; + struct acpi_nfit_system_address *spa; + + nfit_test_init_header(nfit_buf, size); + + offset = sizeof(struct acpi_table_nfit); + /* spa0 (flat range with no bdw aliasing) */ + spa = nfit_buf + offset; + spa->header.type = ACPI_NFIT_TYPE_SYSTEM_ADDRESS; + spa->header.length = sizeof(*spa); + memcpy(spa->range_guid, to_nfit_uuid(NFIT_SPA_PM), 16); + spa->range_index = 0+1; + spa->address = t->spa_set_dma[0]; + spa->length = SPA2_SIZE; + + offset += sizeof(*spa); + /* mem-region0 (spa0, dimm0) */ + memdev = nfit_buf + offset; + memdev->header.type = ACPI_NFIT_TYPE_MEMORY_MAP; + memdev->header.length = sizeof(*memdev); + memdev->device_handle = 0; + memdev->physical_id = 0; + memdev->region_id = 0; + memdev->range_index = 0+1; + memdev->region_index = 0+1; + memdev->region_size = SPA2_SIZE; + memdev->region_offset = 0; + memdev->address = 0; + memdev->interleave_index = 0; + memdev->interleave_ways = 1; + memdev->flags = ACPI_NFIT_MEM_SAVE_FAILED | ACPI_NFIT_MEM_RESTORE_FAILED + | ACPI_NFIT_MEM_FLUSH_FAILED | ACPI_NFIT_MEM_HEALTH_OBSERVED + | ACPI_NFIT_MEM_ARMED; + + offset += sizeof(*memdev); + /* dcr-descriptor0 */ + dcr = nfit_buf + offset; + dcr->header.type = ACPI_NFIT_TYPE_CONTROL_REGION; + dcr->header.length = sizeof(struct acpi_nfit_control_region); + dcr->region_index = 0+1; + dcr->vendor_id = 0xabcd; + dcr->device_id = 0; + dcr->revision_id = 1; + dcr->serial_number = ~0; + dcr->code = 0x201; + dcr->windows = 0; + dcr->window_size = 0; + dcr->command_offset = 0; + dcr->command_size = 0; + dcr->status_offset = 0; + dcr->status_size = 0; +} + +static int nfit_test_blk_do_io(struct nd_blk_region *ndbr, resource_size_t dpa, + void *iobuf, u64 len, int rw) +{ + struct nfit_blk *nfit_blk = ndbr->blk_provider_data; + struct nfit_blk_mmio *mmio = &nfit_blk->mmio[BDW]; + struct nd_region *nd_region = &ndbr->nd_region; + unsigned int lane; + + lane = nd_region_acquire_lane(nd_region); + if (rw) + memcpy(mmio->base + dpa, iobuf, len); + else + memcpy(iobuf, mmio->base + dpa, len); + nd_region_release_lane(nd_region, lane); + + return 0; +} + +static int nfit_test_probe(struct platform_device *pdev) +{ + struct nvdimm_bus_descriptor *nd_desc; + struct acpi_nfit_desc *acpi_desc; + struct device *dev = &pdev->dev; + struct nfit_test *nfit_test; + int rc; + + nfit_test = to_nfit_test(&pdev->dev); + + /* common alloc */ + if (nfit_test->num_dcr) { + int num = nfit_test->num_dcr; + + nfit_test->dimm = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->dimm_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); + nfit_test->flush = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->flush_dma = devm_kcalloc(dev, num, sizeof(dma_addr_t), + GFP_KERNEL); + nfit_test->label = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->label_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + nfit_test->dcr = devm_kcalloc(dev, num, + sizeof(struct nfit_test_dcr *), GFP_KERNEL); + nfit_test->dcr_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->dimm && nfit_test->dimm_dma && nfit_test->label + && nfit_test->label_dma && nfit_test->dcr + && nfit_test->dcr_dma && nfit_test->flush + && nfit_test->flush_dma) + /* pass */; + else + return -ENOMEM; + } + + if (nfit_test->num_pm) { + int num = nfit_test->num_pm; + + nfit_test->spa_set = devm_kcalloc(dev, num, sizeof(void *), + GFP_KERNEL); + nfit_test->spa_set_dma = devm_kcalloc(dev, num, + sizeof(dma_addr_t), GFP_KERNEL); + if (nfit_test->spa_set && nfit_test->spa_set_dma) + /* pass */; + else + return -ENOMEM; + } + + /* per-nfit specific alloc */ + if (nfit_test->alloc(nfit_test)) + return -ENOMEM; + + nfit_test->setup(nfit_test); + acpi_desc = &nfit_test->acpi_desc; + acpi_desc->dev = &pdev->dev; + acpi_desc->nfit = nfit_test->nfit_buf; + acpi_desc->blk_do_io = nfit_test_blk_do_io; + nd_desc = &acpi_desc->nd_desc; + nd_desc->attr_groups = acpi_nfit_attribute_groups; + acpi_desc->nvdimm_bus = nvdimm_bus_register(&pdev->dev, nd_desc); + if (!acpi_desc->nvdimm_bus) + return -ENXIO; + + rc = acpi_nfit_init(acpi_desc, nfit_test->nfit_size); + if (rc) { + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + return rc; + } + + return 0; +} + +static int nfit_test_remove(struct platform_device *pdev) +{ + struct nfit_test *nfit_test = to_nfit_test(&pdev->dev); + struct acpi_nfit_desc *acpi_desc = &nfit_test->acpi_desc; + + nvdimm_bus_unregister(acpi_desc->nvdimm_bus); + + return 0; +} + +static void nfit_test_release(struct device *dev) +{ + struct nfit_test *nfit_test = to_nfit_test(dev); + + kfree(nfit_test); +} + +static const struct platform_device_id nfit_test_id[] = { + { KBUILD_MODNAME }, + { }, +}; + +static struct platform_driver nfit_test_driver = { + .probe = nfit_test_probe, + .remove = nfit_test_remove, + .driver = { + .name = KBUILD_MODNAME, + }, + .id_table = nfit_test_id, +}; + +#ifdef CONFIG_CMA_SIZE_MBYTES +#define CMA_SIZE_MBYTES CONFIG_CMA_SIZE_MBYTES +#else +#define CMA_SIZE_MBYTES 0 +#endif + +static __init int nfit_test_init(void) +{ + int rc, i; + + nfit_test_setup(nfit_test_lookup); + + for (i = 0; i < NUM_NFITS; i++) { + struct nfit_test *nfit_test; + struct platform_device *pdev; + static int once; + + nfit_test = kzalloc(sizeof(*nfit_test), GFP_KERNEL); + if (!nfit_test) { + rc = -ENOMEM; + goto err_register; + } + INIT_LIST_HEAD(&nfit_test->resources); + switch (i) { + case 0: + nfit_test->num_pm = NUM_PM; + nfit_test->num_dcr = NUM_DCR; + nfit_test->alloc = nfit_test0_alloc; + nfit_test->setup = nfit_test0_setup; + break; + case 1: + nfit_test->num_pm = 1; + nfit_test->alloc = nfit_test1_alloc; + nfit_test->setup = nfit_test1_setup; + break; + default: + rc = -EINVAL; + goto err_register; + } + pdev = &nfit_test->pdev; + pdev->name = KBUILD_MODNAME; + pdev->id = i; + pdev->dev.release = nfit_test_release; + rc = platform_device_register(pdev); + if (rc) { + put_device(&pdev->dev); + goto err_register; + } + + rc = dma_coerce_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)); + if (rc) + goto err_register; + + instances[i] = nfit_test; + + if (!once++) { + dma_addr_t dma; + void *buf; + + buf = dma_alloc_coherent(&pdev->dev, SZ_128M, &dma, + GFP_KERNEL); + if (!buf) { + rc = -ENOMEM; + dev_warn(&pdev->dev, "need 128M of free cma\n"); + goto err_register; + } + dma_free_coherent(&pdev->dev, SZ_128M, buf, dma); + } + } + + rc = platform_driver_register(&nfit_test_driver); + if (rc) + goto err_register; + return 0; + + err_register: + for (i = 0; i < NUM_NFITS; i++) + if (instances[i]) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); + return rc; +} + +static __exit void nfit_test_exit(void) +{ + int i; + + platform_driver_unregister(&nfit_test_driver); + for (i = 0; i < NUM_NFITS; i++) + platform_device_unregister(&instances[i]->pdev); + nfit_test_teardown(); +} + +module_init(nfit_test_init); +module_exit(nfit_test_exit); +MODULE_LICENSE("GPL v2"); +MODULE_AUTHOR("Intel Corporation"); diff --git a/tools/testing/nvdimm/test/nfit_test.h b/tools/testing/nvdimm/test/nfit_test.h new file mode 100644 index 000000000000..96c5e16d7db9 --- /dev/null +++ b/tools/testing/nvdimm/test/nfit_test.h @@ -0,0 +1,29 @@ +/* + * Copyright(c) 2013-2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of version 2 of the GNU General Public License as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef __NFIT_TEST_H__ +#define __NFIT_TEST_H__ + +struct nfit_test_resource { + struct list_head list; + struct resource *res; + struct device *dev; + void *buf; +}; + +typedef struct nfit_test_resource *(*nfit_test_lookup_fn)(resource_size_t); +void __iomem *__wrap_ioremap_nocache(resource_size_t offset, + unsigned long size); +void __wrap_iounmap(volatile void __iomem *addr); +void nfit_test_setup(nfit_test_lookup_fn lookup); +void nfit_test_teardown(void); +#endif diff --git a/tools/testing/selftests/Makefile b/tools/testing/selftests/Makefile index 0db571340edb..24ae9e829e9a 100644 --- a/tools/testing/selftests/Makefile +++ b/tools/testing/selftests/Makefile @@ -4,6 +4,7 @@ TARGETS += efivarfs TARGETS += exec TARGETS += firmware TARGETS += ftrace +TARGETS += futex TARGETS += kcmp TARGETS += memfd TARGETS += memory-hotplug @@ -12,12 +13,18 @@ TARGETS += mqueue TARGETS += net TARGETS += powerpc TARGETS += ptrace +TARGETS += seccomp TARGETS += size TARGETS += sysctl +ifneq (1, $(quicktest)) TARGETS += timers +endif TARGETS += user TARGETS += vm +TARGETS += x86 #Please keep the TARGETS list alphabetically sorted +# Run "make quicktest=1 run_tests" or +# "make quicktest=1 kselftest from top level Makefile TARGETS_HOTPLUG = cpu-hotplug TARGETS_HOTPLUG += memory-hotplug @@ -26,7 +33,7 @@ TARGETS_HOTPLUG += memory-hotplug # Makefile to avoid test build failures when test # Makefile doesn't have explicit build rules. ifeq (1,$(MAKELEVEL)) -undefine LDFLAGS +override LDFLAGS = override MAKEFLAGS = endif @@ -55,7 +62,40 @@ clean_hotplug: make -C $$TARGET clean; \ done; +INSTALL_PATH ?= install +INSTALL_PATH := $(abspath $(INSTALL_PATH)) +ALL_SCRIPT := $(INSTALL_PATH)/run_kselftest.sh + +install: +ifdef INSTALL_PATH + @# Ask all targets to install their files + mkdir -p $(INSTALL_PATH) + for TARGET in $(TARGETS); do \ + mkdir -p $(INSTALL_PATH)/$$TARGET ; \ + make -C $$TARGET INSTALL_PATH=$(INSTALL_PATH)/$$TARGET install; \ + done; + + @# Ask all targets to emit their test scripts + echo "#!/bin/bash" > $(ALL_SCRIPT) + echo "cd \$$(dirname \$$0)" >> $(ALL_SCRIPT) + echo "ROOT=\$$PWD" >> $(ALL_SCRIPT) + + for TARGET in $(TARGETS); do \ + echo "echo ; echo Running tests in $$TARGET" >> $(ALL_SCRIPT); \ + echo "echo ========================================" >> $(ALL_SCRIPT); \ + echo "cd $$TARGET" >> $(ALL_SCRIPT); \ + make -s --no-print-directory -C $$TARGET emit_tests >> $(ALL_SCRIPT); \ + echo "cd \$$ROOT" >> $(ALL_SCRIPT); \ + done; + + chmod u+x $(ALL_SCRIPT) +else + $(error Error: set INSTALL_PATH to use install) +endif + clean: for TARGET in $(TARGETS); do \ make -C $$TARGET clean; \ done; + +.PHONY: install diff --git a/tools/testing/selftests/breakpoints/Makefile b/tools/testing/selftests/breakpoints/Makefile index e18b42b254af..182235640209 100644 --- a/tools/testing/selftests/breakpoints/Makefile +++ b/tools/testing/selftests/breakpoints/Makefile @@ -16,8 +16,9 @@ else echo "Not an x86 target, can't build breakpoints selftests" endif -run_tests: - @./breakpoint_test || echo "breakpoints selftests: [FAIL]" +TEST_PROGS := breakpoint_test + +include ../lib.mk clean: rm -fr breakpoint_test diff --git a/tools/testing/selftests/cpu-hotplug/Makefile b/tools/testing/selftests/cpu-hotplug/Makefile index e9c28d8dc84b..fe1f99101c5d 100644 --- a/tools/testing/selftests/cpu-hotplug/Makefile +++ b/tools/testing/selftests/cpu-hotplug/Makefile @@ -1,9 +1,10 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh || echo "cpu-hotplug selftests: [FAIL]" +TEST_PROGS := cpu-on-off-test.sh + +include ../lib.mk run_full_test: - @/bin/bash ./on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" + @/bin/bash ./cpu-on-off-test.sh -a || echo "cpu-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/cpu-hotplug/on-off-test.sh b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh index 98b1d6565f2c..98b1d6565f2c 100644..100755 --- a/tools/testing/selftests/cpu-hotplug/on-off-test.sh +++ b/tools/testing/selftests/cpu-hotplug/cpu-on-off-test.sh diff --git a/tools/testing/selftests/efivarfs/Makefile b/tools/testing/selftests/efivarfs/Makefile index 29e8c6bc81b0..736c3ddfc787 100644 --- a/tools/testing/selftests/efivarfs/Makefile +++ b/tools/testing/selftests/efivarfs/Makefile @@ -1,12 +1,13 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall test_objs = open-unlink create-read all: $(test_objs) -run_tests: all - @/bin/bash ./efivarfs.sh || echo "efivarfs selftests: [FAIL]" +TEST_PROGS := efivarfs.sh +TEST_FILES := $(test_objs) + +include ../lib.mk clean: rm -f $(test_objs) diff --git a/tools/testing/selftests/efivarfs/efivarfs.sh b/tools/testing/selftests/efivarfs/efivarfs.sh index 77edcdcc016b..77edcdcc016b 100644..100755 --- a/tools/testing/selftests/efivarfs/efivarfs.sh +++ b/tools/testing/selftests/efivarfs/efivarfs.sh diff --git a/tools/testing/selftests/exec/Makefile b/tools/testing/selftests/exec/Makefile index 66dfc2ce1788..6b76bfdc847e 100644 --- a/tools/testing/selftests/exec/Makefile +++ b/tools/testing/selftests/exec/Makefile @@ -1,7 +1,6 @@ -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall BINARIES = execveat -DEPS = execveat.symlink execveat.denatured script subdir +DEPS = execveat.symlink execveat.denatured script all: $(BINARIES) $(DEPS) subdir: @@ -18,8 +17,12 @@ execveat.denatured: execveat %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - ./execveat +TEST_PROGS := execveat +TEST_FILES := $(DEPS) + +include ../lib.mk + +override EMIT_TESTS := echo "mkdir -p subdir; (./execveat && echo \"selftests: execveat [PASS]\") || echo \"selftests: execveat [FAIL]\"" clean: rm -rf $(BINARIES) $(DEPS) subdir.moved execveat.moved xxxxx* diff --git a/tools/testing/selftests/firmware/Makefile b/tools/testing/selftests/firmware/Makefile index e23cce0bbc3a..9bf82234855b 100644 --- a/tools/testing/selftests/firmware/Makefile +++ b/tools/testing/selftests/firmware/Makefile @@ -3,25 +3,9 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -fw_filesystem: - @if /bin/sh ./fw_filesystem.sh ; then \ - echo "fw_filesystem: ok"; \ - else \ - echo "fw_filesystem: [FAIL]"; \ - exit 1; \ - fi +TEST_PROGS := fw_filesystem.sh fw_userhelper.sh -fw_userhelper: - @if /bin/sh ./fw_userhelper.sh ; then \ - echo "fw_userhelper: ok"; \ - else \ - echo "fw_userhelper: [FAIL]"; \ - exit 1; \ - fi - -run_tests: all fw_filesystem fw_userhelper +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all clean run_tests fw_filesystem fw_userhelper diff --git a/tools/testing/selftests/firmware/fw_filesystem.sh b/tools/testing/selftests/firmware/fw_filesystem.sh index 3fc6c10c2479..3fc6c10c2479 100644..100755 --- a/tools/testing/selftests/firmware/fw_filesystem.sh +++ b/tools/testing/selftests/firmware/fw_filesystem.sh diff --git a/tools/testing/selftests/firmware/fw_userhelper.sh b/tools/testing/selftests/firmware/fw_userhelper.sh index 6efbade12139..6efbade12139 100644..100755 --- a/tools/testing/selftests/firmware/fw_userhelper.sh +++ b/tools/testing/selftests/firmware/fw_userhelper.sh diff --git a/tools/testing/selftests/ftrace/Makefile b/tools/testing/selftests/ftrace/Makefile index 76cc9f156267..0acbeca47225 100644 --- a/tools/testing/selftests/ftrace/Makefile +++ b/tools/testing/selftests/ftrace/Makefile @@ -1,7 +1,9 @@ all: -run_tests: - @/bin/sh ./ftracetest || echo "ftrace selftests: [FAIL]" +TEST_PROGS := ftracetest +TEST_DIRS := test.d/ + +include ../lib.mk clean: rm -rf logs/* diff --git a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc index fd9c49a13612..aa51f6c17359 100644 --- a/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc +++ b/tools/testing/selftests/ftrace/test.d/00basic/basic4.tc @@ -2,4 +2,4 @@ # description: Basic event tracing check test -f available_events -a -f set_event -a -d events # check scheduler events are available -grep -q sched available_events && exit 0 || exit -1
\ No newline at end of file +grep -q sched available_events && exit 0 || exit $FAIL diff --git a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc index 668616d9bb03..87eb9d6dd4ca 100644 --- a/tools/testing/selftests/ftrace/test.d/event/event-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/event-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:sched_switch' > set_event -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -eq 0 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/sched_switch/enable -usleep 1 + +yield count=`cat trace | grep sched_switch | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc index 655c415b6e7f..ced27ef0638f 100644 --- a/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/subsystem-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f set_event -o ! -d events/sched ]; then @@ -21,7 +25,8 @@ reset_tracer do_reset echo 'sched:*' > set_event -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -31,7 +36,8 @@ fi do_reset echo 1 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -lt 3 ]; then @@ -41,7 +47,8 @@ fi do_reset echo 0 > events/sched/enable -usleep 1 + +yield count=`cat trace | grep -v ^# | awk '{ print $5 }' | sort -u | wc -l` if [ $count -ne 0 ]; then diff --git a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc index 480845774007..0bb5df3c00d4 100644 --- a/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc +++ b/tools/testing/selftests/ftrace/test.d/event/toplevel-enable.tc @@ -9,7 +9,11 @@ do_reset() { fail() { #msg do_reset echo $1 - exit -1 + exit $FAIL +} + +yield() { + ping localhost -c 1 || sleep .001 || usleep 1 || sleep 1 } if [ ! -f available_events -o ! -f set_event -o ! -d events ]; then @@ -21,6 +25,9 @@ reset_tracer do_reset echo '*:*' > set_event + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -29,6 +36,9 @@ fi do_reset echo 1 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -eq 0 ]; then fail "none of events are recorded" @@ -37,6 +47,9 @@ fi do_reset echo 0 > events/enable + +yield + count=`cat trace | grep -v ^# | wc -l` if [ $count -ne 0 ]; then fail "any of events should not be recorded" diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc index c15e018e0220..15c2dba06ea2 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter-stack.tc @@ -16,7 +16,9 @@ fi do_reset() { reset_tracer - echo 0 > /proc/sys/kernel/stack_tracer_enabled + if [ -e /proc/sys/kernel/stack_tracer_enabled ]; then + echo 0 > /proc/sys/kernel/stack_tracer_enabled + fi enable_tracing clear_trace echo > set_ftrace_filter @@ -25,7 +27,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc index 6af5f6360b18..0ab2189613ef 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/fgraph-filter.tc @@ -17,7 +17,7 @@ do_reset() { fail() { # msg do_reset echo $1 - exit -1 + exit $FAIL } disable_tracing diff --git a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc index 2e719cb1fc4d..7808336d6f50 100644 --- a/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc +++ b/tools/testing/selftests/ftrace/test.d/ftrace/func_profiler.tc @@ -31,7 +31,7 @@ fail() { # mesg reset_tracer echo > set_ftrace_filter echo $1 - exit -1 + exit $FAIL } echo "Testing function tracer with profiler:" diff --git a/tools/testing/selftests/futex/Makefile b/tools/testing/selftests/futex/Makefile new file mode 100644 index 000000000000..6a1752956283 --- /dev/null +++ b/tools/testing/selftests/futex/Makefile @@ -0,0 +1,29 @@ +SUBDIRS := functional + +TEST_PROGS := run.sh + +.PHONY: all clean +all: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done + +include ../lib.mk + +override define RUN_TESTS + ./run.sh +endef + +override define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) + + @for SUBDIR in $(SUBDIRS); do \ + $(MAKE) -C $$SUBDIR INSTALL_PATH=$(INSTALL_PATH)/$$SUBDIR install; \ + done; +endef + +override define EMIT_TESTS + echo "./run.sh" +endef + +clean: + for DIR in $(SUBDIRS); do $(MAKE) -C $$DIR $@ ; done diff --git a/tools/testing/selftests/futex/README b/tools/testing/selftests/futex/README new file mode 100644 index 000000000000..3224a049b196 --- /dev/null +++ b/tools/testing/selftests/futex/README @@ -0,0 +1,62 @@ +Futex Test +========== +Futex Test is intended to thoroughly test the Linux kernel futex system call +API. + +Functional tests shall test the documented behavior of the futex operation +code under test. This includes checking for proper behavior under normal use, +odd corner cases, regression tests, and abject abuse and misuse. + +Futextest will also provide example implementation of mutual exclusion +primitives. These can be used as is in user applications or can serve as +examples for system libraries. These will likely be added to either a new lib/ +directory or purely as header files under include/, I'm leaning toward the +latter. + +Quick Start +----------- +# make +# ./run.sh + +Design and Implementation Goals +------------------------------- +o Tests should be as self contained as is practical so as to facilitate sharing + the individual tests on mailing list discussions and bug reports. +o The build system shall remain as simple as possible, avoiding any archive or + shared object building and linking. +o Where possible, any helper functions or other package-wide code shall be + implemented in header files, avoiding the need to compile intermediate object + files. +o External dependendencies shall remain as minimal as possible. Currently gcc + and glibc are the only dependencies. +o Tests return 0 for success and < 0 for failure. + +Output Formatting +----------------- +Test output shall be easily parsable by both human and machine. Title and +results are printed to stdout, while intermediate ERROR or FAIL messages are +sent to stderr. Tests shall support the -c option to print PASS, FAIL, and +ERROR strings in color for easy visual parsing. Output shall conform to the +following format: + +test_name: Description of the test + Arguments: arg1=val1 #units specified for clarity where appropriate + ERROR: Description of unexpected error + FAIL: Reason for test failure + # FIXME: Perhaps an " INFO: informational message" option would be + # useful here. Using -v to toggle it them on and off, as with -c. + # there may be multiple ERROR or FAIL messages +Result: (PASS|FAIL|ERROR) + +Naming +------ +o FIXME: decide on a sane test naming scheme. Currently the tests are named + based on the primary futex operation they test. Eventually this will become a + problem as we intend to write multiple tests which collide in this namespace. + Perhaps something like "wait-wake-1" "wait-wake-2" is adequate, leaving the + detailed description in the test source and the output. + +Coding Style +------------ +o The Futex Test project adheres to the coding standards set forth by Linux + kernel as defined in the Linux source Documentation/CodingStyle. diff --git a/tools/testing/selftests/futex/functional/.gitignore b/tools/testing/selftests/futex/functional/.gitignore new file mode 100644 index 000000000000..a09f57061902 --- /dev/null +++ b/tools/testing/selftests/futex/functional/.gitignore @@ -0,0 +1,7 @@ +futex_requeue_pi +futex_requeue_pi_mismatched_ops +futex_requeue_pi_signal_restart +futex_wait_private_mapped_file +futex_wait_timeout +futex_wait_uninitialized_heap +futex_wait_wouldblock diff --git a/tools/testing/selftests/futex/functional/Makefile b/tools/testing/selftests/futex/functional/Makefile new file mode 100644 index 000000000000..9d6b75ef7b5d --- /dev/null +++ b/tools/testing/selftests/futex/functional/Makefile @@ -0,0 +1,25 @@ +INCLUDES := -I../include -I../../ +CFLAGS := $(CFLAGS) -g -O2 -Wall -D_GNU_SOURCE -pthread $(INCLUDES) +LDFLAGS := $(LDFLAGS) -pthread -lrt + +HEADERS := ../include/futextest.h +TARGETS := \ + futex_wait_timeout \ + futex_wait_wouldblock \ + futex_requeue_pi \ + futex_requeue_pi_signal_restart \ + futex_requeue_pi_mismatched_ops \ + futex_wait_uninitialized_heap \ + futex_wait_private_mapped_file + +TEST_PROGS := $(TARGETS) run.sh + +.PHONY: all clean +all: $(TARGETS) + +$(TARGETS): $(HEADERS) + +include ../../lib.mk + +clean: + rm -f $(TARGETS) diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi.c b/tools/testing/selftests/futex/functional/futex_requeue_pi.c new file mode 100644 index 000000000000..3da06ad23996 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi.c @@ -0,0 +1,409 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test excercises the futex syscall op codes needed for requeuing + * priority inheritance aware POSIX condition variables and mutexes. + * + * AUTHORS + * Sripathi Kodi <sripathik@in.ibm.com> + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2008-Jan-13: Initial version by Sripathi Kodi <sripathik@in.ibm.com> + * 2009-Nov-6: futex test adaptation by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <limits.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <signal.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define MAX_WAKE_ITERS 1000 +#define THREAD_MAX 10 +#define SIGNAL_PERIOD_US 100 + +atomic_t waiters_blocked = ATOMIC_INITIALIZER; +atomic_t waiters_woken = ATOMIC_INITIALIZER; + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +futex_t wake_complete = FUTEX_INITIALIZER; + +/* Test option defaults */ +static long timeout_ns; +static int broadcast; +static int owner; +static int locked; + +struct thread_arg { + long id; + struct timespec *timeout; + int lock; + int ret; +}; +#define THREAD_ARG_INITIALIZER { 0, NULL, 0, 0 } + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -b Broadcast wakeup (all waiters)\n"); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -l Lock the pi futex across requeue\n"); + printf(" -o Use a third party pi futex owner during requeue (cancels -l)\n"); + printf(" -t N Timeout in nanoseconds (default: 0)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + int ret; + struct sched_param schedp; + pthread_attr_t attr; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + + +void *waiterfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + futex_t old_val; + + info("Waiter %ld: running\n", args->id); + /* Each thread sleeps for a different amount of time + * This is to avoid races, because we don't lock the + * external mutex here */ + usleep(1000 * (long)args->id); + + old_val = f1; + atomic_inc(&waiters_blocked); + info("Calling futex_wait_requeue_pi: %p (%u) -> %p\n", + &f1, f1, &f2); + args->ret = futex_wait_requeue_pi(&f1, old_val, &f2, args->timeout, + FUTEX_PRIVATE_FLAG); + + info("waiter %ld woke with %d %s\n", args->id, args->ret, + args->ret < 0 ? strerror(errno) : ""); + atomic_inc(&waiters_woken); + if (args->ret < 0) { + if (args->timeout && errno == ETIMEDOUT) + args->ret = 0; + else { + args->ret = RET_ERROR; + error("futex_wait_requeue_pi\n", errno); + } + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + info("Waiter %ld: exiting with %d\n", args->id, args->ret); + pthread_exit((void *)&args->ret); +} + +void *broadcast_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int nr_requeue = INT_MAX; + int task_count = 0; + futex_t old_val; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + info("Waker: Calling broadcast\n"); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + continue_requeue: + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) { + args->ret = RET_ERROR; + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + } else if (++i < MAX_WAKE_ITERS) { + task_count += args->ret; + if (task_count < THREAD_MAX - waiters_woken.val) + goto continue_requeue; + } else { + error("max broadcast iterations (%d) reached with %d/%d tasks woken or requeued\n", + 0, MAX_WAKE_ITERS, task_count, THREAD_MAX); + args->ret = RET_ERROR; + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->lock) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + if (args->ret > 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + pthread_exit((void *)&args->ret); +} + +void *signal_wakerfn(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + unsigned int old_val; + int nr_requeue = 0; + int task_count = 0; + int nr_wake = 1; + int i = 0; + + info("Waker: waiting for waiters to block\n"); + while (waiters_blocked.val < THREAD_MAX) + usleep(1000); + usleep(1000); + + while (task_count < THREAD_MAX && waiters_woken.val < THREAD_MAX) { + info("task_count: %d, waiters_woken: %d\n", + task_count, waiters_woken.val); + if (args->lock) { + info("Calling FUTEX_LOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + } + info("Waker: Calling signal\n"); + /* cond_signal */ + old_val = f1; + args->ret = futex_cmp_requeue_pi(&f1, old_val, &f2, + nr_wake, nr_requeue, + FUTEX_PRIVATE_FLAG); + if (args->ret < 0) + args->ret = -errno; + info("futex: %x\n", f2); + if (args->lock) { + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", + f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + } + info("futex: %x\n", f2); + if (args->ret < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + args->ret = RET_ERROR; + break; + } + + task_count += args->ret; + usleep(SIGNAL_PERIOD_US); + i++; + /* we have to loop at least THREAD_MAX times */ + if (i > MAX_WAKE_ITERS + THREAD_MAX) { + error("max signaling iterations (%d) reached, giving up on pending waiters.\n", + 0, MAX_WAKE_ITERS + THREAD_MAX); + args->ret = RET_ERROR; + break; + } + } + + futex_wake(&wake_complete, 1, FUTEX_PRIVATE_FLAG); + + if (args->ret >= 0) + args->ret = task_count; + + info("Waker: exiting with %d\n", args->ret); + info("Waker: waiters_woken: %d\n", waiters_woken.val); + pthread_exit((void *)&args->ret); +} + +void *third_party_blocker(void *arg) +{ + struct thread_arg *args = (struct thread_arg *)arg; + int ret2 = 0; + + args->ret = futex_lock_pi(&f2, NULL, 0, FUTEX_PRIVATE_FLAG); + if (args->ret) + goto out; + args->ret = futex_wait(&wake_complete, wake_complete, NULL, + FUTEX_PRIVATE_FLAG); + ret2 = futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + + out: + if (args->ret || ret2) { + error("third_party_blocker() futex error", 0); + args->ret = RET_ERROR; + } + + pthread_exit((void *)&args->ret); +} + +int unit_test(int broadcast, long lock, int third_party_owner, long timeout_ns) +{ + void *(*wakerfn)(void *) = signal_wakerfn; + struct thread_arg blocker_arg = THREAD_ARG_INITIALIZER; + struct thread_arg waker_arg = THREAD_ARG_INITIALIZER; + pthread_t waiter[THREAD_MAX], waker, blocker; + struct timespec ts, *tsp = NULL; + struct thread_arg args[THREAD_MAX]; + int *waiter_ret; + int i, ret = RET_PASS; + + if (timeout_ns) { + time_t secs; + + info("timeout_ns = %ld\n", timeout_ns); + ret = clock_gettime(CLOCK_MONOTONIC, &ts); + secs = (ts.tv_nsec + timeout_ns) / 1000000000; + ts.tv_nsec = ((int64_t)ts.tv_nsec + timeout_ns) % 1000000000; + ts.tv_sec += secs; + info("ts.tv_sec = %ld\n", ts.tv_sec); + info("ts.tv_nsec = %ld\n", ts.tv_nsec); + tsp = &ts; + } + + if (broadcast) + wakerfn = broadcast_wakerfn; + + if (third_party_owner) { + if (create_rt_thread(&blocker, third_party_blocker, + (void *)&blocker_arg, SCHED_FIFO, 1)) { + error("Creating third party blocker thread failed\n", + errno); + ret = RET_ERROR; + goto out; + } + } + + atomic_set(&waiters_woken, 0); + for (i = 0; i < THREAD_MAX; i++) { + args[i].id = i; + args[i].timeout = tsp; + info("Starting thread %d\n", i); + if (create_rt_thread(&waiter[i], waiterfn, (void *)&args[i], + SCHED_FIFO, 1)) { + error("Creating waiting thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + } + waker_arg.lock = lock; + if (create_rt_thread(&waker, wakerfn, (void *)&waker_arg, + SCHED_FIFO, 1)) { + error("Creating waker thread failed\n", errno); + ret = RET_ERROR; + goto out; + } + + /* Wait for threads to finish */ + /* Store the first error or failure encountered in waiter_ret */ + waiter_ret = &args[0].ret; + for (i = 0; i < THREAD_MAX; i++) + pthread_join(waiter[i], + *waiter_ret ? NULL : (void **)&waiter_ret); + + if (third_party_owner) + pthread_join(blocker, NULL); + pthread_join(waker, NULL); + +out: + if (!ret) { + if (*waiter_ret) + ret = *waiter_ret; + else if (waker_arg.ret < 0) + ret = waker_arg.ret; + else if (blocker_arg.ret) + ret = blocker_arg.ret; + } + + return ret; +} + +int main(int argc, char *argv[]) +{ + int c, ret; + + while ((c = getopt(argc, argv, "bchlot:v:")) != -1) { + switch (c) { + case 'b': + broadcast = 1; + break; + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'l': + locked = 1; + break; + case 'o': + owner = 1; + locked = 0; + break; + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test requeue functionality\n", basename(argv[0])); + printf("\tArguments: broadcast=%d locked=%d owner=%d timeout=%ldns\n", + broadcast, locked, owner, timeout_ns); + + /* + * FIXME: unit_test is obsolete now that we parse options and the + * various style of runs are done by run.sh - simplify the code and move + * unit_test into main() + */ + ret = unit_test(broadcast, locked, owner, timeout_ns); + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c new file mode 100644 index 000000000000..d5e4f2c4da2a --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_mismatched_ops.c @@ -0,0 +1,135 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * 1. Block a thread using FUTEX_WAIT + * 2. Attempt to use FUTEX_CMP_REQUEUE_PI on the futex from 1. + * 3. The kernel must detect the mismatch and return -EINVAL. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +int child_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *blocking_child(void *arg) +{ + child_ret = futex_wait(&f1, f1, NULL, FUTEX_PRIVATE_FLAG); + if (child_ret < 0) { + child_ret = -errno; + error("futex_wait\n", errno); + } + return (void *)&child_ret; +} + +int main(int argc, char *argv[]) +{ + int ret = RET_PASS; + pthread_t child; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Detect mismatched requeue_pi operations\n", + basename(argv[0])); + + if (pthread_create(&child, NULL, blocking_child, NULL)) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + /* Allow the child to block in the kernel. */ + sleep(1); + + /* + * The kernel should detect the waiter did not setup the + * q->requeue_pi_key and return -EINVAL. If it does not, + * it likely gave the lock to the child, which is now hung + * in the kernel. + */ + ret = futex_cmp_requeue_pi(&f1, f1, &f2, 1, 0, FUTEX_PRIVATE_FLAG); + if (ret < 0) { + if (errno == EINVAL) { + /* + * The kernel correctly detected the mismatched + * requeue_pi target and aborted. Wake the child with + * FUTEX_WAKE. + */ + ret = futex_wake(&f1, 1, FUTEX_PRIVATE_FLAG); + if (ret == 1) { + ret = RET_PASS; + } else if (ret < 0) { + error("futex_wake\n", errno); + ret = RET_ERROR; + } else { + error("futex_wake did not wake the child\n", 0); + ret = RET_ERROR; + } + } else { + error("futex_cmp_requeue_pi\n", errno); + ret = RET_ERROR; + } + } else if (ret > 0) { + fail("futex_cmp_requeue_pi failed to detect the mismatch\n"); + ret = RET_FAIL; + } else { + error("futex_cmp_requeue_pi found no waiters\n", 0); + ret = RET_ERROR; + } + + pthread_join(child, NULL); + + if (!ret) + ret = child_ret; + + out: + /* If the kernel crashes, we shouldn't return at all. */ + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c new file mode 100644 index 000000000000..3d7dc6afc3f8 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_requeue_pi_signal_restart.c @@ -0,0 +1,223 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2006-2008 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * This test exercises the futex_wait_requeue_pi() signal handling both + * before and after the requeue. The first should be restarted by the + * kernel. The latter should return EWOULDBLOCK to the waiter. + * + * AUTHORS + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2008-May-5: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <limits.h> +#include <pthread.h> +#include <signal.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include "atomic.h" +#include "futextest.h" +#include "logging.h" + +#define DELAY_US 100 + +futex_t f1 = FUTEX_INITIALIZER; +futex_t f2 = FUTEX_INITIALIZER; +atomic_t requeued = ATOMIC_INITIALIZER; + +int waiter_ret = 0; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int create_rt_thread(pthread_t *pth, void*(*func)(void *), void *arg, + int policy, int prio) +{ + struct sched_param schedp; + pthread_attr_t attr; + int ret; + + pthread_attr_init(&attr); + memset(&schedp, 0, sizeof(schedp)); + + ret = pthread_attr_setinheritsched(&attr, PTHREAD_EXPLICIT_SCHED); + if (ret) { + error("pthread_attr_setinheritsched\n", ret); + return -1; + } + + ret = pthread_attr_setschedpolicy(&attr, policy); + if (ret) { + error("pthread_attr_setschedpolicy\n", ret); + return -1; + } + + schedp.sched_priority = prio; + ret = pthread_attr_setschedparam(&attr, &schedp); + if (ret) { + error("pthread_attr_setschedparam\n", ret); + return -1; + } + + ret = pthread_create(pth, &attr, func, arg); + if (ret) { + error("pthread_create\n", ret); + return -1; + } + return 0; +} + +void handle_signal(int signo) +{ + info("signal received %s requeue\n", + requeued.val ? "after" : "prior to"); +} + +void *waiterfn(void *arg) +{ + unsigned int old_val; + int res; + + waiter_ret = RET_PASS; + + info("Waiter running\n"); + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + old_val = f1; + res = futex_wait_requeue_pi(&f1, old_val, &(f2), NULL, + FUTEX_PRIVATE_FLAG); + if (!requeued.val || errno != EWOULDBLOCK) { + fail("unexpected return from futex_wait_requeue_pi: %d (%s)\n", + res, strerror(errno)); + info("w2:futex: %x\n", f2); + if (!res) + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + waiter_ret = RET_FAIL; + } + + info("Waiter exiting with %d\n", waiter_ret); + pthread_exit(NULL); +} + + +int main(int argc, char *argv[]) +{ + unsigned int old_val; + struct sigaction sa; + pthread_t waiter; + int c, res, ret = RET_PASS; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test signal handling during requeue_pi\n", + basename(argv[0])); + printf("\tArguments: <none>\n"); + + sa.sa_handler = handle_signal; + sigemptyset(&sa.sa_mask); + sa.sa_flags = 0; + if (sigaction(SIGUSR1, &sa, NULL)) { + error("sigaction\n", errno); + exit(1); + } + + info("m1:f2: %x\n", f2); + info("Creating waiter\n"); + res = create_rt_thread(&waiter, waiterfn, NULL, SCHED_FIFO, 1); + if (res) { + error("Creating waiting thread failed", res); + ret = RET_ERROR; + goto out; + } + + info("Calling FUTEX_LOCK_PI on f2=%x @ %p\n", f2, &f2); + info("m2:f2: %x\n", f2); + futex_lock_pi(&f2, 0, 0, FUTEX_PRIVATE_FLAG); + info("m3:f2: %x\n", f2); + + while (1) { + /* + * signal the waiter before requeue, waiter should automatically + * restart futex_wait_requeue_pi() in the kernel. Wait for the + * waiter to block on f1 again. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + usleep(DELAY_US); + + info("Requeueing waiter via FUTEX_CMP_REQUEUE_PI\n"); + old_val = f1; + res = futex_cmp_requeue_pi(&f1, old_val, &(f2), 1, 0, + FUTEX_PRIVATE_FLAG); + /* + * If res is non-zero, we either requeued the waiter or hit an + * error, break out and handle it. If it is zero, then the + * signal may have hit before the the waiter was blocked on f1. + * Try again. + */ + if (res > 0) { + atomic_set(&requeued, 1); + break; + } else if (res < 0) { + error("FUTEX_CMP_REQUEUE_PI failed\n", errno); + ret = RET_ERROR; + break; + } + } + info("m4:f2: %x\n", f2); + + /* + * Signal the waiter after requeue, waiter should return from + * futex_wait_requeue_pi() with EWOULDBLOCK. Join the thread here so the + * futex_unlock_pi() can't happen before the signal wakeup is detected + * in the kernel. + */ + info("Issuing SIGUSR1 to waiter\n"); + pthread_kill(waiter, SIGUSR1); + info("Waiting for waiter to return\n"); + pthread_join(waiter, NULL); + + info("Calling FUTEX_UNLOCK_PI on mutex=%x @ %p\n", f2, &f2); + futex_unlock_pi(&f2, FUTEX_PRIVATE_FLAG); + info("m5:f2: %x\n", f2); + + out: + if (ret == RET_PASS && waiter_ret) + ret = waiter_ret; + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c new file mode 100644 index 000000000000..5f687f247454 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_private_mapped_file.c @@ -0,0 +1,125 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Internally, Futex has two handling mode, anon and file. The private file + * mapping is special. At first it behave as file, but after write anything + * it behave as anon. This test is intent to test such case. + * + * AUTHOR + * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + *****************************************************************************/ + +#include <stdio.h> +#include <stdlib.h> +#include <syscall.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <pthread.h> +#include <libgen.h> +#include <signal.h> + +#include "logging.h" +#include "futextest.h" + +#define PAGE_SZ 4096 + +char pad[PAGE_SZ] = {1}; +futex_t val = 1; +char pad2[PAGE_SZ] = {1}; + +#define WAKE_WAIT_US 3000000 +struct timespec wait_timeout = { .tv_sec = 5, .tv_nsec = 0}; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *thr_futex_wait(void *arg) +{ + int ret; + + info("futex wait\n"); + ret = futex_wait(&val, 1, &wait_timeout, 0); + if (ret && errno != EWOULDBLOCK && errno != ETIMEDOUT) { + error("futex error.\n", errno); + print_result(RET_ERROR); + exit(RET_ERROR); + } + + if (ret && errno == ETIMEDOUT) + fail("waiter timedout\n"); + + info("futex_wait: ret = %d, errno = %d\n", ret, errno); + + return NULL; +} + +int main(int argc, char **argv) +{ + pthread_t thr; + int ret = RET_PASS; + int res; + int c; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the futex value of private file mappings in FUTEX_WAIT\n", + basename(argv[0])); + + ret = pthread_create(&thr, NULL, thr_futex_wait, NULL); + if (ret < 0) { + fprintf(stderr, "pthread_create error\n"); + ret = RET_ERROR; + goto out; + } + + info("wait a while\n"); + usleep(WAKE_WAIT_US); + val = 2; + res = futex_wake(&val, 1, 0); + info("futex_wake %d\n", res); + if (res != 1) { + fail("FUTEX_WAKE didn't find the waiting thread.\n"); + ret = RET_FAIL; + } + + info("join\n"); + pthread_join(thr, NULL); + + out: + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_timeout.c b/tools/testing/selftests/futex/functional/futex_wait_timeout.c new file mode 100644 index 000000000000..ab428ca894de --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_timeout.c @@ -0,0 +1,86 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Block on a futex and wait for timeout. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +static long timeout_ns = 100000; /* 100us default timeout */ + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -t N Timeout in nanoseconds (default: 100,000)\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + futex_t f1 = FUTEX_INITIALIZER; + struct timespec to; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 't': + timeout_ns = atoi(optarg); + break; + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Block on a futex and wait for timeout\n", + basename(argv[0])); + printf("\tArguments: timeout=%ldns\n", timeout_ns); + + /* initialize timeout */ + to.tv_sec = 0; + to.tv_nsec = timeout_ns; + + info("Calling futex_wait on f1: %u @ %p\n", f1, &f1); + res = futex_wait(&f1, f1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != ETIMEDOUT) { + fail("futex_wait returned %d\n", ret < 0 ? errno : ret); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c new file mode 100644 index 000000000000..fe7aee96844b --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_uninitialized_heap.c @@ -0,0 +1,124 @@ +/****************************************************************************** + * + * Copyright FUJITSU LIMITED 2010 + * Copyright KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Wait on uninitialized heap. It shold be zero and FUTEX_WAIT should + * return immediately. This test is intent to test zero page handling in + * futex. + * + * AUTHOR + * KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + * HISTORY + * 2010-Jan-6: Initial version by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> + * + *****************************************************************************/ + +#include <pthread.h> +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <syscall.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <unistd.h> +#include <errno.h> +#include <linux/futex.h> +#include <libgen.h> + +#include "logging.h" +#include "futextest.h" + +#define WAIT_US 5000000 + +static int child_blocked = 1; +static int child_ret; +void *buf; + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +void *wait_thread(void *arg) +{ + int res; + + child_ret = RET_PASS; + res = futex_wait(buf, 1, NULL, 0); + child_blocked = 0; + + if (res != 0 && errno != EWOULDBLOCK) { + error("futex failure\n", errno); + child_ret = RET_ERROR; + } + pthread_exit(NULL); +} + +int main(int argc, char **argv) +{ + int c, ret = RET_PASS; + long page_size; + pthread_t thr; + + while ((c = getopt(argc, argv, "chv:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + page_size = sysconf(_SC_PAGESIZE); + + buf = mmap(NULL, page_size, PROT_READ|PROT_WRITE, + MAP_PRIVATE|MAP_ANONYMOUS, 0, 0); + if (buf == (void *)-1) { + error("mmap\n", errno); + exit(1); + } + + printf("%s: Test the uninitialized futex value in FUTEX_WAIT\n", + basename(argv[0])); + + + ret = pthread_create(&thr, NULL, wait_thread, NULL); + if (ret) { + error("pthread_create\n", errno); + ret = RET_ERROR; + goto out; + } + + info("waiting %dus for child to return\n", WAIT_US); + usleep(WAIT_US); + + ret = child_ret; + if (child_blocked) { + fail("child blocked in kernel\n"); + ret = RET_FAIL; + } + + out: + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c new file mode 100644 index 000000000000..b6b027448825 --- /dev/null +++ b/tools/testing/selftests/futex/functional/futex_wait_wouldblock.c @@ -0,0 +1,79 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Test if FUTEX_WAIT op returns -EWOULDBLOCK if the futex value differs + * from the expected one. + * + * AUTHOR + * Gowrishankar <gowrishankar.m@in.ibm.com> + * + * HISTORY + * 2009-Nov-14: Initial version by Gowrishankar <gowrishankar.m@in.ibm.com> + * + *****************************************************************************/ + +#include <errno.h> +#include <getopt.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <time.h> +#include "futextest.h" +#include "logging.h" + +#define timeout_ns 100000 + +void usage(char *prog) +{ + printf("Usage: %s\n", prog); + printf(" -c Use color\n"); + printf(" -h Display this help message\n"); + printf(" -v L Verbosity level: %d=QUIET %d=CRITICAL %d=INFO\n", + VQUIET, VCRITICAL, VINFO); +} + +int main(int argc, char *argv[]) +{ + struct timespec to = {.tv_sec = 0, .tv_nsec = timeout_ns}; + futex_t f1 = FUTEX_INITIALIZER; + int res, ret = RET_PASS; + int c; + + while ((c = getopt(argc, argv, "cht:v:")) != -1) { + switch (c) { + case 'c': + log_color(1); + break; + case 'h': + usage(basename(argv[0])); + exit(0); + case 'v': + log_verbosity(atoi(optarg)); + break; + default: + usage(basename(argv[0])); + exit(1); + } + } + + printf("%s: Test the unexpected futex value in FUTEX_WAIT\n", + basename(argv[0])); + + info("Calling futex_wait on f1: %u @ %p with val=%u\n", f1, &f1, f1+1); + res = futex_wait(&f1, f1+1, &to, FUTEX_PRIVATE_FLAG); + if (!res || errno != EWOULDBLOCK) { + fail("futex_wait returned: %d %s\n", + res ? errno : res, res ? strerror(errno) : ""); + ret = RET_FAIL; + } + + print_result(ret); + return ret; +} diff --git a/tools/testing/selftests/futex/functional/run.sh b/tools/testing/selftests/futex/functional/run.sh new file mode 100755 index 000000000000..e87dbe2a0b0d --- /dev/null +++ b/tools/testing/selftests/futex/functional/run.sh @@ -0,0 +1,79 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run tests in the current directory. +# +# AUTHOR +# Darren Hart <dvhart@linux.intel.com> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> +# 2010-Jan-6: Add futex_wait_uninitialized_heap and futex_wait_private_mapped_file +# by KOSAKI Motohiro <kosaki.motohiro@jp.fujitsu.com> +# +############################################################################### + +# Test for a color capable console +if [ -z "$USE_COLOR" ]; then + tput setf 7 + if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 + fi +fi +if [ "$USE_COLOR" -eq 1 ]; then + COLOR="-c" +fi + + +echo +# requeue pi testing +# without timeouts +./futex_requeue_pi $COLOR +./futex_requeue_pi $COLOR -b +./futex_requeue_pi $COLOR -b -l +./futex_requeue_pi $COLOR -b -o +./futex_requeue_pi $COLOR -l +./futex_requeue_pi $COLOR -o +# with timeouts +./futex_requeue_pi $COLOR -b -l -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -l -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +./futex_requeue_pi $COLOR -b -t 5000 +./futex_requeue_pi $COLOR -t 5000 +./futex_requeue_pi $COLOR -b -t 500000 +./futex_requeue_pi $COLOR -t 500000 +./futex_requeue_pi $COLOR -b -o -t 5000 +./futex_requeue_pi $COLOR -l -t 5000 +./futex_requeue_pi $COLOR -b -o -t 500000 +./futex_requeue_pi $COLOR -l -t 500000 +# with long timeout +./futex_requeue_pi $COLOR -b -l -t 2000000000 +./futex_requeue_pi $COLOR -l -t 2000000000 + + +echo +./futex_requeue_pi_mismatched_ops $COLOR + +echo +./futex_requeue_pi_signal_restart $COLOR + +echo +./futex_wait_timeout $COLOR + +echo +./futex_wait_wouldblock $COLOR + +echo +./futex_wait_uninitialized_heap $COLOR +./futex_wait_private_mapped_file $COLOR diff --git a/tools/testing/selftests/futex/include/atomic.h b/tools/testing/selftests/futex/include/atomic.h new file mode 100644 index 000000000000..f861da3e31ab --- /dev/null +++ b/tools/testing/selftests/futex/include/atomic.h @@ -0,0 +1,83 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * GCC atomic builtin wrappers + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-17: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _ATOMIC_H +#define _ATOMIC_H + +typedef struct { + volatile int val; +} atomic_t; + +#define ATOMIC_INITIALIZER { 0 } + +/** + * atomic_cmpxchg() - Atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Return the old value of addr->val. + */ +static inline int +atomic_cmpxchg(atomic_t *addr, int oldval, int newval) +{ + return __sync_val_compare_and_swap(&addr->val, oldval, newval); +} + +/** + * atomic_inc() - Atomic incrememnt + * @addr: Address of the variable to increment + * + * Return the new value of addr->val. + */ +static inline int +atomic_inc(atomic_t *addr) +{ + return __sync_add_and_fetch(&addr->val, 1); +} + +/** + * atomic_dec() - Atomic decrement + * @addr: Address of the variable to decrement + * + * Return the new value of addr-val. + */ +static inline int +atomic_dec(atomic_t *addr) +{ + return __sync_sub_and_fetch(&addr->val, 1); +} + +/** + * atomic_set() - Atomic set + * @addr: Address of the variable to set + * @newval: New value for the atomic_t + * + * Return the new value of addr->val. + */ +static inline int +atomic_set(atomic_t *addr, int newval) +{ + addr->val = newval; + return newval; +} + +#endif diff --git a/tools/testing/selftests/futex/include/futextest.h b/tools/testing/selftests/futex/include/futextest.h new file mode 100644 index 000000000000..b98c3aba7102 --- /dev/null +++ b/tools/testing/selftests/futex/include/futextest.h @@ -0,0 +1,266 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _FUTEXTEST_H +#define _FUTEXTEST_H + +#include <unistd.h> +#include <sys/syscall.h> +#include <sys/types.h> +#include <linux/futex.h> + +typedef volatile u_int32_t futex_t; +#define FUTEX_INITIALIZER 0 + +/* Define the newer op codes if the system header file is not up to date. */ +#ifndef FUTEX_WAIT_BITSET +#define FUTEX_WAIT_BITSET 9 +#endif +#ifndef FUTEX_WAKE_BITSET +#define FUTEX_WAKE_BITSET 10 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI +#define FUTEX_WAIT_REQUEUE_PI 11 +#endif +#ifndef FUTEX_CMP_REQUEUE_PI +#define FUTEX_CMP_REQUEUE_PI 12 +#endif +#ifndef FUTEX_WAIT_REQUEUE_PI_PRIVATE +#define FUTEX_WAIT_REQUEUE_PI_PRIVATE (FUTEX_WAIT_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif +#ifndef FUTEX_REQUEUE_PI_PRIVATE +#define FUTEX_CMP_REQUEUE_PI_PRIVATE (FUTEX_CMP_REQUEUE_PI | \ + FUTEX_PRIVATE_FLAG) +#endif + +/** + * futex() - SYS_futex syscall wrapper + * @uaddr: address of first futex + * @op: futex op code + * @val: typically expected value of uaddr, but varies by op + * @timeout: typically an absolute struct timespec (except where noted + * otherwise). Overloaded by some ops + * @uaddr2: address of second futex for some ops\ + * @val3: varies by op + * @opflags: flags to be bitwise OR'd with op, such as FUTEX_PRIVATE_FLAG + * + * futex() is used by all the following futex op wrappers. It can also be + * used for misuse and abuse testing. Generally, the specific op wrappers + * should be used instead. It is a macro instead of an static inline function as + * some of the types over overloaded (timeout is used for nr_requeue for + * example). + * + * These argument descriptions are the defaults for all + * like-named arguments in the following wrappers except where noted below. + */ +#define futex(uaddr, op, val, timeout, uaddr2, val3, opflags) \ + syscall(SYS_futex, uaddr, op | opflags, val, timeout, uaddr2, val3) + +/** + * futex_wait() - block on uaddr with optional timeout + * @timeout: relative timeout + */ +static inline int +futex_wait(futex_t *uaddr, futex_t val, struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT, val, timeout, NULL, 0, opflags); +} + +/** + * futex_wake() - wake one or more tasks blocked on uaddr + * @nr_wake: wake up to this many tasks + */ +static inline int +futex_wake(futex_t *uaddr, int nr_wake, int opflags) +{ + return futex(uaddr, FUTEX_WAKE, nr_wake, NULL, NULL, 0, opflags); +} + +/** + * futex_wait_bitset() - block on uaddr with bitset + * @bitset: bitset to be used with futex_wake_bitset + */ +static inline int +futex_wait_bitset(futex_t *uaddr, futex_t val, struct timespec *timeout, + u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_BITSET, val, timeout, NULL, bitset, + opflags); +} + +/** + * futex_wake_bitset() - wake one or more tasks blocked on uaddr with bitset + * @bitset: bitset to compare with that used in futex_wait_bitset + */ +static inline int +futex_wake_bitset(futex_t *uaddr, int nr_wake, u_int32_t bitset, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_BITSET, nr_wake, NULL, NULL, bitset, + opflags); +} + +/** + * futex_lock_pi() - block on uaddr as a PI mutex + * @detect: whether (1) or not (0) to perform deadlock detection + */ +static inline int +futex_lock_pi(futex_t *uaddr, struct timespec *timeout, int detect, + int opflags) +{ + return futex(uaddr, FUTEX_LOCK_PI, detect, timeout, NULL, 0, opflags); +} + +/** + * futex_unlock_pi() - release uaddr as a PI mutex, waking the top waiter + */ +static inline int +futex_unlock_pi(futex_t *uaddr, int opflags) +{ + return futex(uaddr, FUTEX_UNLOCK_PI, 0, NULL, NULL, 0, opflags); +} + +/** + * futex_wake_op() - FIXME: COME UP WITH A GOOD ONE LINE DESCRIPTION + */ +static inline int +futex_wake_op(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_wake2, + int wake_op, int opflags) +{ + return futex(uaddr, FUTEX_WAKE_OP, nr_wake, nr_wake2, uaddr2, wake_op, + opflags); +} + +/** + * futex_requeue() - requeue without expected value comparison, deprecated + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + * + * Due to its inherently racy implementation, futex_requeue() is deprecated in + * favor of futex_cmp_requeue(). + */ +static inline int +futex_requeue(futex_t *uaddr, futex_t *uaddr2, int nr_wake, int nr_requeue, + int opflags) +{ + return futex(uaddr, FUTEX_REQUEUE, nr_wake, nr_requeue, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue() - requeue tasks from uaddr to uaddr2 + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_wait_requeue_pi() - block on uaddr and prepare to requeue to uaddr2 + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * + * This is the first half of the requeue_pi mechanism. It shall always be + * paired with futex_cmp_requeue_pi(). + */ +static inline int +futex_wait_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, + struct timespec *timeout, int opflags) +{ + return futex(uaddr, FUTEX_WAIT_REQUEUE_PI, val, timeout, uaddr2, 0, + opflags); +} + +/** + * futex_cmp_requeue_pi() - requeue tasks from uaddr to uaddr2 (PI aware) + * @uaddr: non-PI futex source + * @uaddr2: PI futex target + * @nr_wake: wake up to this many tasks + * @nr_requeue: requeue up to this many tasks + */ +static inline int +futex_cmp_requeue_pi(futex_t *uaddr, futex_t val, futex_t *uaddr2, int nr_wake, + int nr_requeue, int opflags) +{ + return futex(uaddr, FUTEX_CMP_REQUEUE_PI, nr_wake, nr_requeue, uaddr2, + val, opflags); +} + +/** + * futex_cmpxchg() - atomic compare and exchange + * @uaddr: The address of the futex to be modified + * @oldval: The expected value of the futex + * @newval: The new value to try and assign the futex + * + * Implement cmpxchg using gcc atomic builtins. + * http://gcc.gnu.org/onlinedocs/gcc-4.1.0/gcc/Atomic-Builtins.html + * + * Return the old futex value. + */ +static inline u_int32_t +futex_cmpxchg(futex_t *uaddr, u_int32_t oldval, u_int32_t newval) +{ + return __sync_val_compare_and_swap(uaddr, oldval, newval); +} + +/** + * futex_dec() - atomic decrement of the futex value + * @uaddr: The address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_dec(futex_t *uaddr) +{ + return __sync_sub_and_fetch(uaddr, 1); +} + +/** + * futex_inc() - atomic increment of the futex value + * @uaddr: the address of the futex to be modified + * + * Return the new futex value. + */ +static inline u_int32_t +futex_inc(futex_t *uaddr) +{ + return __sync_add_and_fetch(uaddr, 1); +} + +/** + * futex_set() - atomic decrement of the futex value + * @uaddr: the address of the futex to be modified + * @newval: New value for the atomic_t + * + * Return the new futex value. + */ +static inline u_int32_t +futex_set(futex_t *uaddr, u_int32_t newval) +{ + *uaddr = newval; + return newval; +} + +#endif diff --git a/tools/testing/selftests/futex/include/logging.h b/tools/testing/selftests/futex/include/logging.h new file mode 100644 index 000000000000..014aa01197af --- /dev/null +++ b/tools/testing/selftests/futex/include/logging.h @@ -0,0 +1,153 @@ +/****************************************************************************** + * + * Copyright © International Business Machines Corp., 2009 + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * DESCRIPTION + * Glibc independent futex library for testing kernel functionality. + * + * AUTHOR + * Darren Hart <dvhart@linux.intel.com> + * + * HISTORY + * 2009-Nov-6: Initial version by Darren Hart <dvhart@linux.intel.com> + * + *****************************************************************************/ + +#ifndef _LOGGING_H +#define _LOGGING_H + +#include <string.h> +#include <unistd.h> +#include <linux/futex.h> +#include "kselftest.h" + +/* + * Define PASS, ERROR, and FAIL strings with and without color escape + * sequences, default to no color. + */ +#define ESC 0x1B, '[' +#define BRIGHT '1' +#define GREEN '3', '2' +#define YELLOW '3', '3' +#define RED '3', '1' +#define ESCEND 'm' +#define BRIGHT_GREEN ESC, BRIGHT, ';', GREEN, ESCEND +#define BRIGHT_YELLOW ESC, BRIGHT, ';', YELLOW, ESCEND +#define BRIGHT_RED ESC, BRIGHT, ';', RED, ESCEND +#define RESET_COLOR ESC, '0', 'm' +static const char PASS_COLOR[] = {BRIGHT_GREEN, ' ', 'P', 'A', 'S', 'S', + RESET_COLOR, 0}; +static const char ERROR_COLOR[] = {BRIGHT_YELLOW, 'E', 'R', 'R', 'O', 'R', + RESET_COLOR, 0}; +static const char FAIL_COLOR[] = {BRIGHT_RED, ' ', 'F', 'A', 'I', 'L', + RESET_COLOR, 0}; +static const char INFO_NORMAL[] = " INFO"; +static const char PASS_NORMAL[] = " PASS"; +static const char ERROR_NORMAL[] = "ERROR"; +static const char FAIL_NORMAL[] = " FAIL"; +const char *INFO = INFO_NORMAL; +const char *PASS = PASS_NORMAL; +const char *ERROR = ERROR_NORMAL; +const char *FAIL = FAIL_NORMAL; + +/* Verbosity setting for INFO messages */ +#define VQUIET 0 +#define VCRITICAL 1 +#define VINFO 2 +#define VMAX VINFO +int _verbose = VCRITICAL; + +/* Functional test return codes */ +#define RET_PASS 0 +#define RET_ERROR -1 +#define RET_FAIL -2 + +/** + * log_color() - Use colored output for PASS, ERROR, and FAIL strings + * @use_color: use color (1) or not (0) + */ +void log_color(int use_color) +{ + if (use_color) { + PASS = PASS_COLOR; + ERROR = ERROR_COLOR; + FAIL = FAIL_COLOR; + } else { + PASS = PASS_NORMAL; + ERROR = ERROR_NORMAL; + FAIL = FAIL_NORMAL; + } +} + +/** + * log_verbosity() - Set verbosity of test output + * @verbose: Enable (1) verbose output or not (0) + * + * Currently setting verbose=1 will enable INFO messages and 0 will disable + * them. FAIL and ERROR messages are always displayed. + */ +void log_verbosity(int level) +{ + if (level > VMAX) + level = VMAX; + else if (level < 0) + level = 0; + _verbose = level; +} + +/** + * print_result() - Print standard PASS | ERROR | FAIL results + * @ret: the return value to be considered: 0 | RET_ERROR | RET_FAIL + * + * print_result() is primarily intended for functional tests. + */ +void print_result(int ret) +{ + const char *result = "Unknown return code"; + + switch (ret) { + case RET_PASS: + ksft_inc_pass_cnt(); + result = PASS; + break; + case RET_ERROR: + result = ERROR; + break; + case RET_FAIL: + ksft_inc_fail_cnt(); + result = FAIL; + break; + } + printf("Result: %s\n", result); +} + +/* log level macros */ +#define info(message, vargs...) \ +do { \ + if (_verbose >= VINFO) \ + fprintf(stderr, "\t%s: "message, INFO, ##vargs); \ +} while (0) + +#define error(message, err, args...) \ +do { \ + if (_verbose >= VCRITICAL) {\ + if (err) \ + fprintf(stderr, "\t%s: %s: "message, \ + ERROR, strerror(err), ##args); \ + else \ + fprintf(stderr, "\t%s: "message, ERROR, ##args); \ + } \ +} while (0) + +#define fail(message, args...) \ +do { \ + if (_verbose >= VCRITICAL) \ + fprintf(stderr, "\t%s: "message, FAIL, ##args); \ +} while (0) + +#endif diff --git a/tools/testing/selftests/futex/run.sh b/tools/testing/selftests/futex/run.sh new file mode 100755 index 000000000000..4126312ad64e --- /dev/null +++ b/tools/testing/selftests/futex/run.sh @@ -0,0 +1,33 @@ +#!/bin/sh + +############################################################################### +# +# Copyright © International Business Machines Corp., 2009 +# +# This program is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 2 of the License, or +# (at your option) any later version. +# +# DESCRIPTION +# Run all tests under the functional, performance, and stress directories. +# Format and summarize the results. +# +# AUTHOR +# Darren Hart <dvhart@linux.intel.com> +# +# HISTORY +# 2009-Nov-9: Initial version by Darren Hart <dvhart@linux.intel.com> +# +############################################################################### + +# Test for a color capable shell and pass the result to the subdir scripts +USE_COLOR=0 +tput setf 7 +if [ $? -eq 0 ]; then + USE_COLOR=1 + tput sgr0 +fi +export USE_COLOR + +(cd functional; ./run.sh) diff --git a/tools/testing/selftests/gen_kselftest_tar.sh b/tools/testing/selftests/gen_kselftest_tar.sh new file mode 100755 index 000000000000..17d5bd0c0936 --- /dev/null +++ b/tools/testing/selftests/gen_kselftest_tar.sh @@ -0,0 +1,55 @@ +#!/bin/bash +# +# gen_kselftest_tar +# Generate kselftest tarball +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +# main +main() +{ + if [ "$#" -eq 0 ]; then + echo "$0: Generating default compression gzip" + copts="cvzf" + ext=".tar.gz" + else + case "$1" in + tar) + copts="cvf" + ext=".tar" + ;; + targz) + copts="cvzf" + ext=".tar.gz" + ;; + tarbz2) + copts="cvjf" + ext=".tar.bz2" + ;; + tarxz) + copts="cvJf" + ext=".tar.xz" + ;; + *) + echo "Unknown tarball format $1" + exit 1 + ;; + esac + fi + + install_dir=./kselftest + +# Run install using INSTALL_KSFT_PATH override to generate install +# directory +./kselftest_install.sh +tar $copts kselftest${ext} $install_dir +echo "Kselftest archive kselftest${ext} created!" + +# clean up install directory +rm -rf kselftest +} + +main "$@" diff --git a/tools/testing/selftests/ipc/Makefile b/tools/testing/selftests/ipc/Makefile index 74bbefdeaf4c..25d2e702c68a 100644 --- a/tools/testing/selftests/ipc/Makefile +++ b/tools/testing/selftests/ipc/Makefile @@ -12,14 +12,11 @@ endif CFLAGS += -I../../../../usr/include/ all: -ifeq ($(ARCH),x86) - gcc $(CFLAGS) msgque.c -o msgque_test -else - echo "Not an x86 target, can't build msgque selftest" -endif + $(CC) $(CFLAGS) msgque.c -o msgque_test + +TEST_PROGS := msgque_test -run_tests: all - ./msgque_test +include ../lib.mk clean: rm -fr ./msgque_test diff --git a/tools/testing/selftests/kcmp/Makefile b/tools/testing/selftests/kcmp/Makefile index ff0eefdc6ceb..2ae7450a9a89 100644 --- a/tools/testing/selftests/kcmp/Makefile +++ b/tools/testing/selftests/kcmp/Makefile @@ -1,10 +1,10 @@ -CC := $(CROSS_COMPILE)$(CC) CFLAGS += -I../../../../usr/include/ all: kcmp_test -run_tests: all - @./kcmp_test || echo "kcmp_test: [FAIL]" +TEST_PROGS := kcmp_test + +include ../lib.mk clean: $(RM) kcmp_test kcmp-test-file diff --git a/tools/testing/selftests/kselftest.h b/tools/testing/selftests/kselftest.h index 572c8888167a..ef1c80d67ac7 100644 --- a/tools/testing/selftests/kselftest.h +++ b/tools/testing/selftests/kselftest.h @@ -13,6 +13,13 @@ #include <stdlib.h> #include <unistd.h> +/* define kselftest exit codes */ +#define KSFT_PASS 0 +#define KSFT_FAIL 1 +#define KSFT_XFAIL 2 +#define KSFT_XPASS 3 +#define KSFT_SKIP 4 + /* counters */ struct ksft_count { unsigned int ksft_pass; @@ -40,23 +47,23 @@ static inline void ksft_print_cnts(void) static inline int ksft_exit_pass(void) { - exit(0); + exit(KSFT_PASS); } static inline int ksft_exit_fail(void) { - exit(1); + exit(KSFT_FAIL); } static inline int ksft_exit_xfail(void) { - exit(2); + exit(KSFT_XFAIL); } static inline int ksft_exit_xpass(void) { - exit(3); + exit(KSFT_XPASS); } static inline int ksft_exit_skip(void) { - exit(4); + exit(KSFT_SKIP); } #endif /* __KSELFTEST_H */ diff --git a/tools/testing/selftests/kselftest_install.sh b/tools/testing/selftests/kselftest_install.sh new file mode 100755 index 000000000000..1555fbdb08da --- /dev/null +++ b/tools/testing/selftests/kselftest_install.sh @@ -0,0 +1,37 @@ +#!/bin/bash +# +# Kselftest Install +# Install kselftest tests +# Author: Shuah Khan <shuahkh@osg.samsung.com> +# Copyright (C) 2015 Samsung Electronics Co., Ltd. + +# This software may be freely redistributed under the terms of the GNU +# General Public License (GPLv2). + +install_loc=`pwd` + +main() +{ + if [ $(basename $install_loc) != "selftests" ]; then + echo "$0: Please run it in selftests directory ..." + exit 1; + fi + if [ "$#" -eq 0 ]; then + echo "$0: Installing in default location - $install_loc ..." + elif [ ! -d "$1" ]; then + echo "$0: $1 doesn't exist!!" + exit 1; + else + install_loc=$1 + echo "$0: Installing in specified location - $install_loc ..." + fi + + install_dir=$install_loc/kselftest + +# Create install directory + mkdir -p $install_dir +# Build tests + INSTALL_PATH=$install_dir make install +} + +main "$@" diff --git a/tools/testing/selftests/lib.mk b/tools/testing/selftests/lib.mk new file mode 100644 index 000000000000..ee412bab7ed4 --- /dev/null +++ b/tools/testing/selftests/lib.mk @@ -0,0 +1,38 @@ +# This mimics the top-level Makefile. We do it explicitly here so that this +# Makefile can operate with or without the kbuild infrastructure. +CC := $(CROSS_COMPILE)gcc + +define RUN_TESTS + @for TEST in $(TEST_PROGS); do \ + (./$$TEST && echo "selftests: $$TEST [PASS]") || echo "selftests: $$TEST [FAIL]"; \ + done; +endef + +run_tests: all + $(RUN_TESTS) + +define INSTALL_RULE + mkdir -p $(INSTALL_PATH) + @for TEST_DIR in $(TEST_DIRS); do\ + cp -r $$TEST_DIR $(INSTALL_PATH); \ + done; + install -t $(INSTALL_PATH) $(TEST_PROGS) $(TEST_PROGS_EXTENDED) $(TEST_FILES) +endef + +install: all +ifdef INSTALL_PATH + $(INSTALL_RULE) +else + $(error Error: set INSTALL_PATH to use install) +endif + +define EMIT_TESTS + @for TEST in $(TEST_PROGS); do \ + echo "(./$$TEST && echo \"selftests: $$TEST [PASS]\") || echo \"selftests: $$TEST [FAIL]\""; \ + done; +endef + +emit_tests: + $(EMIT_TESTS) + +.PHONY: run_tests all clean install emit_tests diff --git a/tools/testing/selftests/memfd/Makefile b/tools/testing/selftests/memfd/Makefile index b80cd10d53ba..3e7eb7972511 100644 --- a/tools/testing/selftests/memfd/Makefile +++ b/tools/testing/selftests/memfd/Makefile @@ -1,17 +1,19 @@ +CC = $(CROSS_COMPILE)gcc CFLAGS += -D_FILE_OFFSET_BITS=64 CFLAGS += -I../../../../include/uapi/ CFLAGS += -I../../../../include/ +CFLAGS += -I../../../../usr/include/ all: - gcc $(CFLAGS) memfd_test.c -o memfd_test + $(CC) $(CFLAGS) memfd_test.c -o memfd_test -run_tests: all - gcc $(CFLAGS) memfd_test.c -o memfd_test - @./memfd_test || echo "memfd_test: [FAIL]" +TEST_PROGS := memfd_test + +include ../lib.mk build_fuse: - gcc $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt - gcc $(CFLAGS) fuse_test.c -o fuse_test + $(CC) $(CFLAGS) fuse_mnt.c `pkg-config fuse --cflags --libs` -o fuse_mnt + $(CC) $(CFLAGS) fuse_test.c -o fuse_test run_fuse: build_fuse @./run_fuse_test.sh || echo "fuse_test: [FAIL]" diff --git a/tools/testing/selftests/memory-hotplug/Makefile b/tools/testing/selftests/memory-hotplug/Makefile index d46b8d489cd2..afb2624c7048 100644 --- a/tools/testing/selftests/memory-hotplug/Makefile +++ b/tools/testing/selftests/memory-hotplug/Makefile @@ -1,9 +1,12 @@ all: -run_tests: - @/bin/bash ./on-off-test.sh -r 2 || echo "memory-hotplug selftests: [FAIL]" +include ../lib.mk + +TEST_PROGS := mem-on-off-test.sh +override RUN_TESTS := ./mem-on-off-test.sh -r 2 || echo "selftests: memory-hotplug [FAIL]" +override EMIT_TESTS := echo "$(RUN_TESTS)" run_full_test: - @/bin/bash ./on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" + @/bin/bash ./mem-on-off-test.sh || echo "memory-hotplug selftests: [FAIL]" clean: diff --git a/tools/testing/selftests/memory-hotplug/on-off-test.sh b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh index 6cddde0b96f8..6cddde0b96f8 100644..100755 --- a/tools/testing/selftests/memory-hotplug/on-off-test.sh +++ b/tools/testing/selftests/memory-hotplug/mem-on-off-test.sh diff --git a/tools/testing/selftests/mount/.gitignore b/tools/testing/selftests/mount/.gitignore new file mode 100644 index 000000000000..856ad4107eb3 --- /dev/null +++ b/tools/testing/selftests/mount/.gitignore @@ -0,0 +1 @@ +unprivileged-remount-test diff --git a/tools/testing/selftests/mount/Makefile b/tools/testing/selftests/mount/Makefile index 337d853c2b72..5e35c9c50b72 100644 --- a/tools/testing/selftests/mount/Makefile +++ b/tools/testing/selftests/mount/Makefile @@ -1,17 +1,21 @@ # Makefile for mount selftests. - +CFLAGS = -Wall \ + -O2 all: unprivileged-remount-test unprivileged-remount-test: unprivileged-remount-test.c - gcc -Wall -O2 unprivileged-remount-test.c -o unprivileged-remount-test + $(CC) $(CFLAGS) unprivileged-remount-test.c -o unprivileged-remount-test -# Allow specific tests to be selected. -test_unprivileged_remount: unprivileged-remount-test - @if [ -f /proc/self/uid_map ] ; then ./unprivileged-remount-test ; fi +include ../lib.mk -run_tests: all test_unprivileged_remount +TEST_PROGS := unprivileged-remount-test +override RUN_TESTS := if [ -f /proc/self/uid_map ] ; \ + then \ + ./unprivileged-remount-test ; \ + else \ + echo "WARN: No /proc/self/uid_map exist, test skipped." ; \ + fi +override EMIT_TESTS := echo "$(RUN_TESTS)" clean: rm -f unprivileged-remount-test - -.PHONY: all test_unprivileged_remount diff --git a/tools/testing/selftests/mqueue/Makefile b/tools/testing/selftests/mqueue/Makefile index 8056e2e68fa4..0e3b41eb85cd 100644 --- a/tools/testing/selftests/mqueue/Makefile +++ b/tools/testing/selftests/mqueue/Makefile @@ -1,10 +1,22 @@ +CFLAGS = -O2 + all: - gcc -O2 mq_open_tests.c -o mq_open_tests -lrt - gcc -O2 -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + $(CC) $(CFLAGS) mq_open_tests.c -o mq_open_tests -lrt + $(CC) $(CFLAGS) -o mq_perf_tests mq_perf_tests.c -lrt -lpthread -lpopt + +include ../lib.mk + +override define RUN_TESTS + @./mq_open_tests /test1 || echo "selftests: mq_open_tests [FAIL]" + @./mq_perf_tests || echo "selftests: mq_perf_tests [FAIL]" +endef + +TEST_PROGS := mq_open_tests mq_perf_tests -run_tests: - @./mq_open_tests /test1 || echo "mq_open_tests: [FAIL]" - @./mq_perf_tests || echo "mq_perf_tests: [FAIL]" +override define EMIT_TESTS + echo "./mq_open_tests /test1 || echo \"selftests: mq_open_tests [FAIL]\"" + echo "./mq_perf_tests || echo \"selftests: mq_perf_tests [FAIL]\"" +endef clean: rm -f mq_open_tests mq_perf_tests diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile index 62f22cc9941c..fac4782c51d8 100644 --- a/tools/testing/selftests/net/Makefile +++ b/tools/testing/selftests/net/Makefile @@ -1,6 +1,5 @@ # Makefile for net selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -O2 -g CFLAGS += -I../../../../usr/include/ @@ -11,9 +10,10 @@ all: $(NET_PROGS) %: %.c $(CC) $(CFLAGS) -o $@ $^ -run_tests: all - @/bin/sh ./run_netsocktests || echo "sockettests: [FAIL]" - @/bin/sh ./run_afpackettests || echo "afpackettests: [FAIL]" - ./test_bpf.sh +TEST_PROGS := run_netsocktests run_afpackettests test_bpf.sh +TEST_FILES := $(NET_PROGS) + +include ../lib.mk + clean: $(RM) $(NET_PROGS) diff --git a/tools/testing/selftests/net/psock_fanout.c b/tools/testing/selftests/net/psock_fanout.c index 6f6733331d95..08c2a36ef7a9 100644 --- a/tools/testing/selftests/net/psock_fanout.c +++ b/tools/testing/selftests/net/psock_fanout.c @@ -272,7 +272,7 @@ int main(int argc, char **argv) const int expect_hash[2][2] = { { 15, 5 }, { 20, 5 } }; const int expect_hash_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_lb[2][2] = { { 10, 10 }, { 18, 17 } }; - const int expect_rb[2][2] = { { 20, 0 }, { 20, 15 } }; + const int expect_rb[2][2] = { { 15, 5 }, { 20, 15 } }; const int expect_cpu0[2][2] = { { 20, 0 }, { 20, 0 } }; const int expect_cpu1[2][2] = { { 0, 20 }, { 0, 20 } }; int port_off = 2, tries = 5, ret; diff --git a/tools/testing/selftests/net/run_afpackettests b/tools/testing/selftests/net/run_afpackettests index 5246e782d6e8..5246e782d6e8 100644..100755 --- a/tools/testing/selftests/net/run_afpackettests +++ b/tools/testing/selftests/net/run_afpackettests diff --git a/tools/testing/selftests/net/run_netsocktests b/tools/testing/selftests/net/run_netsocktests index c09a682df56a..c09a682df56a 100644..100755 --- a/tools/testing/selftests/net/run_netsocktests +++ b/tools/testing/selftests/net/run_netsocktests diff --git a/tools/testing/selftests/powerpc/Makefile b/tools/testing/selftests/powerpc/Makefile index 1d5e7ad2c460..03ca2e64b3fc 100644 --- a/tools/testing/selftests/powerpc/Makefile +++ b/tools/testing/selftests/powerpc/Makefile @@ -8,27 +8,41 @@ ifeq ($(ARCH),powerpc) GIT_VERSION = $(shell git describe --always --long --dirty || echo "unknown") -CC := $(CROSS_COMPILE)$(CC) CFLAGS := -Wall -O2 -flto -Wall -Werror -DGIT_VERSION='"$(GIT_VERSION)"' -I$(CURDIR) $(CFLAGS) -export CC CFLAGS +export CFLAGS -TARGETS = pmu copyloops mm tm primitives stringloops +SUB_DIRS = pmu copyloops mm tm primitives stringloops vphn switch_endian dscr endif -all: $(TARGETS) +all: $(SUB_DIRS) -$(TARGETS): +$(SUB_DIRS): $(MAKE) -k -C $@ all -run_tests: all - @for TARGET in $(TARGETS); do \ +include ../lib.mk + +override define RUN_TESTS + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET run_tests; \ done; +endef + +override define INSTALL_RULE + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -C $$TARGET install; \ + done; +endef + +override define EMIT_TESTS + @for TARGET in $(SUB_DIRS); do \ + $(MAKE) -s -C $$TARGET emit_tests; \ + done; +endef clean: - @for TARGET in $(TARGETS); do \ + @for TARGET in $(SUB_DIRS); do \ $(MAKE) -C $$TARGET clean; \ done; rm -f tags @@ -36,4 +50,4 @@ clean: tags: find . -name '*.c' -o -name '*.h' | xargs ctags -.PHONY: all run_tests clean tags $(TARGETS) +.PHONY: tags $(SUB_DIRS) diff --git a/tools/testing/selftests/powerpc/copyloops/Makefile b/tools/testing/selftests/powerpc/copyloops/Makefile index 6f2d3be227f9..384843ea0d40 100644 --- a/tools/testing/selftests/powerpc/copyloops/Makefile +++ b/tools/testing/selftests/powerpc/copyloops/Makefile @@ -2,28 +2,24 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) CFLAGS += -D SELFTEST +CFLAGS += -maltivec # Use our CFLAGS for the implicit .S rule ASFLAGS = $(CFLAGS) -PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 +TEST_PROGS := copyuser_64 copyuser_power7 memcpy_64 memcpy_power7 EXTRA_SOURCES := validate.c ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) copyuser_64: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_base copyuser_power7: CPPFLAGS += -D COPY_LOOP=test___copy_tofrom_user_power7 memcpy_64: CPPFLAGS += -D COPY_LOOP=test_memcpy memcpy_power7: CPPFLAGS += -D COPY_LOOP=test_memcpy_power7 -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h index d1dc37425510..50ae7d2091ce 100644 --- a/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h +++ b/tools/testing/selftests/powerpc/copyloops/asm/ppc_asm.h @@ -4,39 +4,6 @@ #define r1 1 -#define vr0 0 -#define vr1 1 -#define vr2 2 -#define vr3 3 -#define vr4 4 -#define vr5 5 -#define vr6 6 -#define vr7 7 -#define vr8 8 -#define vr9 9 -#define vr10 10 -#define vr11 11 -#define vr12 12 -#define vr13 13 -#define vr14 14 -#define vr15 15 -#define vr16 16 -#define vr17 17 -#define vr18 18 -#define vr19 19 -#define vr20 20 -#define vr21 21 -#define vr22 22 -#define vr23 23 -#define vr24 24 -#define vr25 25 -#define vr26 26 -#define vr27 27 -#define vr28 28 -#define vr29 29 -#define vr30 30 -#define vr31 31 - #define R14 r14 #define R15 r15 #define R16 r16 diff --git a/tools/testing/selftests/powerpc/dscr/.gitignore b/tools/testing/selftests/powerpc/dscr/.gitignore new file mode 100644 index 000000000000..b585c6c1564a --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/.gitignore @@ -0,0 +1,7 @@ +dscr_default_test +dscr_explicit_test +dscr_inherit_exec_test +dscr_inherit_test +dscr_sysfs_test +dscr_sysfs_thread_test +dscr_user_test diff --git a/tools/testing/selftests/powerpc/dscr/Makefile b/tools/testing/selftests/powerpc/dscr/Makefile new file mode 100644 index 000000000000..49327ee84e3a --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/Makefile @@ -0,0 +1,14 @@ +TEST_PROGS := dscr_default_test dscr_explicit_test dscr_user_test \ + dscr_inherit_test dscr_inherit_exec_test dscr_sysfs_test \ + dscr_sysfs_thread_test + +dscr_default_test: LDLIBS += -lpthread + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/dscr/dscr.h b/tools/testing/selftests/powerpc/dscr/dscr.h new file mode 100644 index 000000000000..a36af1b2c2bb --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr.h @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) + * + * This header file contains helper functions and macros + * required for all the DSCR related test cases. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#ifndef _SELFTESTS_POWERPC_DSCR_DSCR_H +#define _SELFTESTS_POWERPC_DSCR_DSCR_H + +#include <unistd.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <fcntl.h> +#include <dirent.h> +#include <pthread.h> +#include <sched.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <sys/wait.h> + +#include "utils.h" + +#define SPRN_DSCR 0x11 /* Privilege state SPR */ +#define SPRN_DSCR_USR 0x03 /* Problem state SPR */ +#define THREADS 100 /* Max threads */ +#define COUNT 100 /* Max iterations */ +#define DSCR_MAX 16 /* Max DSCR value */ +#define LEN_MAX 100 /* Max name length */ + +#define DSCR_DEFAULT "/sys/devices/system/cpu/dscr_default" +#define CPU_PATH "/sys/devices/system/cpu/" + +#define rmb() asm volatile("lwsync":::"memory") +#define wmb() asm volatile("lwsync":::"memory") + +#define ACCESS_ONCE(x) (*(volatile typeof(x) *)&(x)) + +/* Prilvilege state DSCR access */ +inline unsigned long get_dscr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR)); + + return ret; +} + +inline void set_dscr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR)); +} + +/* Problem state DSCR access */ +inline unsigned long get_dscr_usr(void) +{ + unsigned long ret; + + asm volatile("mfspr %0,%1" : "=r" (ret): "i" (SPRN_DSCR_USR)); + + return ret; +} + +inline void set_dscr_usr(unsigned long val) +{ + asm volatile("mtspr %1,%0" : : "r" (val), "i" (SPRN_DSCR_USR)); +} + +/* Default DSCR access */ +unsigned long get_default_dscr(void) +{ + int fd = -1, ret; + char buf[16]; + unsigned long val; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDONLY); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + memset(buf, 0, sizeof(buf)); + lseek(fd, 0, SEEK_SET); + ret = read(fd, buf, sizeof(buf)); + if (ret == -1) { + perror("read() failed"); + exit(1); + } + sscanf(buf, "%lx", &val); + close(fd); + return val; +} + +void set_default_dscr(unsigned long val) +{ + int fd = -1, ret; + char buf[16]; + + if (fd == -1) { + fd = open(DSCR_DEFAULT, O_RDWR); + if (fd == -1) { + perror("open() failed"); + exit(1); + } + } + sprintf(buf, "%lx\n", val); + ret = write(fd, buf, strlen(buf)); + if (ret == -1) { + perror("write() failed"); + exit(1); + } + close(fd); +} + +double uniform_deviate(int seed) +{ + return seed * (1.0 / (RAND_MAX + 1.0)); +} +#endif /* _SELFTESTS_POWERPC_DSCR_DSCR_H */ diff --git a/tools/testing/selftests/powerpc/dscr/dscr_default_test.c b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c new file mode 100644 index 000000000000..df17c3bab0a7 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_default_test.c @@ -0,0 +1,127 @@ +/* + * POWER Data Stream Control Register (DSCR) default test + * + * This test modifies the system wide default DSCR through + * it's sysfs interface and then verifies that all threads + * see the correct changed DSCR value immediately. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static unsigned long dscr; /* System DSCR default */ +static unsigned long sequence; +static unsigned long result[THREADS]; + +static void *do_test(void *in) +{ + unsigned long thread = (unsigned long)in; + unsigned long i; + + for (i = 0; i < COUNT; i++) { + unsigned long d, cur_dscr, cur_dscr_usr; + unsigned long s1, s2; + + s1 = ACCESS_ONCE(sequence); + if (s1 & 1) + continue; + rmb(); + + d = dscr; + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + rmb(); + s2 = sequence; + + if (s1 != s2) + continue; + + if (cur_dscr != d) { + fprintf(stderr, "thread %ld kernel DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + + if (cur_dscr_usr != d) { + fprintf(stderr, "thread %ld user DSCR should be %ld " + "but is %ld\n", thread, d, cur_dscr_usr); + result[thread] = 1; + pthread_exit(&result[thread]); + } + } + result[thread] = 0; + pthread_exit(&result[thread]); +} + +int dscr_default(void) +{ + pthread_t threads[THREADS]; + unsigned long i, *status[THREADS]; + unsigned long orig_dscr_default; + + orig_dscr_default = get_default_dscr(); + + /* Initial DSCR default */ + dscr = 1; + set_default_dscr(dscr); + + /* Spawn all testing threads */ + for (i = 0; i < THREADS; i++) { + if (pthread_create(&threads[i], NULL, do_test, (void *)i)) { + perror("pthread_create() failed"); + goto fail; + } + } + + srand(getpid()); + + /* Keep changing the DSCR default */ + for (i = 0; i < COUNT; i++) { + double ret = uniform_deviate(rand()); + + if (ret < 0.0001) { + sequence++; + wmb(); + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_default_dscr(dscr); + + wmb(); + sequence++; + } + } + + /* Individual testing thread exit status */ + for (i = 0; i < THREADS; i++) { + if (pthread_join(threads[i], (void **)&(status[i]))) { + perror("pthread_join() failed"); + goto fail; + } + + if (*status[i]) { + printf("%ldth thread failed to join with %ld status\n", + i, *status[i]); + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_default, "dscr_default_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c new file mode 100644 index 000000000000..ad9c3ec26048 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_explicit_test.c @@ -0,0 +1,71 @@ +/* + * POWER Data Stream Control Register (DSCR) explicit test + * + * This test modifies the DSCR value using mtspr instruction and + * verifies the change with mfspr instruction. It uses both the + * privilege state SPR and the problem state SPR for this purpose. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_explicit(void) +{ + unsigned long i, dscr = 0; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + double ret = uniform_deviate(rand()); + + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr(dscr); + } + + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr); + return 1; + } + + ret = uniform_deviate(rand()); + if (ret < 0.001) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + set_dscr_usr(dscr); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld but " + "is %ld\n", dscr, cur_dscr_usr); + return 1; + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_explicit, "dscr_explicit_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c new file mode 100644 index 000000000000..8265504de571 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_exec_test.c @@ -0,0 +1,117 @@ +/* + * POWER Data Stream Control Register (DSCR) fork exec test + * + * This testcase modifies the DSCR using mtspr, forks & execs and + * verifies that the child is using the changed DSCR using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static char prog[LEN_MAX]; + +static void do_exec(unsigned long parent_dscr) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (cur_dscr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (kernel value)\n", parent_dscr); + exit(1); + } + + if (cur_dscr_usr != parent_dscr) { + fprintf(stderr, "Parent DSCR %ld was not inherited " + "over exec (user value)\n", parent_dscr); + exit(1); + } + exit(0); +} + +int dscr_inherit_exec(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + for (i = 0; i < COUNT; i++) { + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (dscr == get_default_dscr()) + continue; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + char dscr_str[16]; + + sprintf(dscr_str, "%ld", dscr); + execlp(prog, prog, "exec", dscr_str, NULL); + exit(1); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + if (argc == 3 && !strcmp(argv[1], "exec")) { + unsigned long parent_dscr; + + parent_dscr = atoi(argv[2]); + do_exec(parent_dscr); + } else if (argc != 1) { + fprintf(stderr, "Usage: %s\n", argv[0]); + exit(1); + } + + strncpy(prog, argv[0], strlen(argv[0])); + return test_harness(dscr_inherit_exec, "dscr_inherit_exec_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c new file mode 100644 index 000000000000..4e414caf7f40 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_inherit_test.c @@ -0,0 +1,95 @@ +/* + * POWER Data Stream Control Register (DSCR) fork test + * + * This testcase modifies the DSCR using mtspr, forks and then + * verifies that the child process has the correct changed DSCR + * value using mfspr. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2012, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +int dscr_inherit(void) +{ + unsigned long i, dscr = 0; + pid_t pid; + + srand(getpid()); + set_dscr(dscr); + + for (i = 0; i < COUNT; i++) { + unsigned long cur_dscr, cur_dscr_usr; + + dscr++; + if (dscr > DSCR_MAX) + dscr = 0; + + if (i % 2 == 0) + set_dscr_usr(dscr); + else + set_dscr(dscr); + + /* + * XXX: Force a context switch out so that DSCR + * current value is copied into the thread struct + * which is required for the child to inherit the + * changed value. + */ + sleep(1); + + pid = fork(); + if (pid == -1) { + perror("fork() failed"); + exit(1); + } else if (pid) { + int status; + + if (waitpid(pid, &status, 0) == -1) { + perror("waitpid() failed"); + exit(1); + } + + if (!WIFEXITED(status)) { + fprintf(stderr, "Child didn't exit cleanly\n"); + exit(1); + } + + if (WEXITSTATUS(status) != 0) { + fprintf(stderr, "Child didn't exit cleanly\n"); + return 1; + } + } else { + cur_dscr = get_dscr(); + if (cur_dscr != dscr) { + fprintf(stderr, "Kernel DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr); + exit(1); + } + + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr_usr != dscr) { + fprintf(stderr, "User DSCR should be %ld " + "but is %ld\n", dscr, cur_dscr_usr); + exit(1); + } + exit(0); + } + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_inherit, "dscr_inherit_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c new file mode 100644 index 000000000000..17fb1b43c320 --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_test.c @@ -0,0 +1,97 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs interface test + * + * This test updates to system wide DSCR default through the sysfs interface + * and then verifies that all the CPU specific DSCR defaults are updated as + * well verified from their sysfs interfaces. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_cpu_dscr_default(char *file, unsigned long val) +{ + char buf[10]; + int fd, rc; + + fd = open(file, O_RDWR); + if (fd == -1) { + perror("open() failed"); + return 1; + } + + rc = read(fd, buf, sizeof(buf)); + if (rc == -1) { + perror("read() failed"); + return 1; + } + close(fd); + + buf[rc] = '\0'; + if (strtol(buf, NULL, 16) != val) { + printf("DSCR match failed: %ld (system) %ld (cpu)\n", + val, strtol(buf, NULL, 16)); + return 1; + } + return 0; +} + +static int check_all_cpu_dscr_defaults(unsigned long val) +{ + DIR *sysfs; + struct dirent *dp; + char file[LEN_MAX]; + + sysfs = opendir(CPU_PATH); + if (!sysfs) { + perror("opendir() failed"); + return 1; + } + + while ((dp = readdir(sysfs))) { + if (!(dp->d_type & DT_DIR)) + continue; + if (!strcmp(dp->d_name, "cpuidle")) + continue; + if (!strstr(dp->d_name, "cpu")) + continue; + + sprintf(file, "%s%s/dscr", CPU_PATH, dp->d_name); + if (access(file, F_OK)) + continue; + + if (check_cpu_dscr_default(file, val)) + return 1; + } + closedir(sysfs); + return 0; +} + +int dscr_sysfs(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_all_cpu_dscr_defaults(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs, "dscr_sysfs_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c new file mode 100644 index 000000000000..ad97b592eccc --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_sysfs_thread_test.c @@ -0,0 +1,80 @@ +/* + * POWER Data Stream Control Register (DSCR) sysfs thread test + * + * This test updates the system wide DSCR default value through + * sysfs interface which should then update all the CPU specific + * DSCR default values which must also be then visible to threads + * executing on individual CPUs on the system. + * + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#define _GNU_SOURCE +#include "dscr.h" + +static int test_thread_dscr(unsigned long val) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + + if (val != cur_dscr) { + printf("[cpu %d] Kernel DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr); + return 1; + } + + if (val != cur_dscr_usr) { + printf("[cpu %d] User DSCR should be %ld but is %ld\n", + sched_getcpu(), val, cur_dscr_usr); + return 1; + } + return 0; +} + +static int check_cpu_dscr_thread(unsigned long val) +{ + cpu_set_t mask; + int cpu; + + for (cpu = 0; cpu < CPU_SETSIZE; cpu++) { + CPU_ZERO(&mask); + CPU_SET(cpu, &mask); + if (sched_setaffinity(0, sizeof(mask), &mask)) + continue; + + if (test_thread_dscr(val)) + return 1; + } + return 0; + +} + +int dscr_sysfs_thread(void) +{ + unsigned long orig_dscr_default; + int i, j; + + orig_dscr_default = get_default_dscr(); + for (i = 0; i < COUNT; i++) { + for (j = 0; j < DSCR_MAX; j++) { + set_default_dscr(j); + if (check_cpu_dscr_thread(j)) + goto fail; + } + } + set_default_dscr(orig_dscr_default); + return 0; +fail: + set_default_dscr(orig_dscr_default); + return 1; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_sysfs_thread, "dscr_sysfs_thread_test"); +} diff --git a/tools/testing/selftests/powerpc/dscr/dscr_user_test.c b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c new file mode 100644 index 000000000000..77d16b5e7dca --- /dev/null +++ b/tools/testing/selftests/powerpc/dscr/dscr_user_test.c @@ -0,0 +1,61 @@ +/* + * POWER Data Stream Control Register (DSCR) SPR test + * + * This test modifies the DSCR value through both the SPR number + * based mtspr instruction and then makes sure that the same is + * reflected through mfspr instruction using either of the SPR + * numbers. + * + * When using the privilege state SPR, the instructions such as + * mfspr or mtspr are priviledged and the kernel emulates them + * for us. Instructions using problem state SPR can be exuecuted + * directly without any emulation if the HW supports them. Else + * they also get emulated by the kernel. + * + * Copyright 2013, Anton Blanchard, IBM Corporation. + * Copyright 2015, Anshuman Khandual, IBM Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 as published + * by the Free Software Foundation. + */ +#include "dscr.h" + +static int check_dscr(char *str) +{ + unsigned long cur_dscr, cur_dscr_usr; + + cur_dscr = get_dscr(); + cur_dscr_usr = get_dscr_usr(); + if (cur_dscr != cur_dscr_usr) { + printf("%s set, kernel get %lx != user get %lx\n", + str, cur_dscr, cur_dscr_usr); + return 1; + } + return 0; +} + +int dscr_user(void) +{ + int i; + + check_dscr(""); + + for (i = 0; i < COUNT; i++) { + set_dscr(i); + if (check_dscr("kernel")) + return 1; + } + + for (i = 0; i < COUNT; i++) { + set_dscr_usr(i); + if (check_dscr("user")) + return 1; + } + return 0; +} + +int main(int argc, char *argv[]) +{ + return test_harness(dscr_user, "dscr_user_test"); +} diff --git a/tools/testing/selftests/powerpc/harness.c b/tools/testing/selftests/powerpc/harness.c index 8ebc58a09311..f7997affd143 100644 --- a/tools/testing/selftests/powerpc/harness.c +++ b/tools/testing/selftests/powerpc/harness.c @@ -11,6 +11,10 @@ #include <sys/types.h> #include <sys/wait.h> #include <unistd.h> +#include <elf.h> +#include <fcntl.h> +#include <link.h> +#include <sys/stat.h> #include "subunit.h" #include "utils.h" @@ -112,3 +116,46 @@ int test_harness(int (test_function)(void), char *name) return rc; } + +static char auxv[4096]; + +void *get_auxv_entry(int type) +{ + ElfW(auxv_t) *p; + void *result; + ssize_t num; + int fd; + + fd = open("/proc/self/auxv", O_RDONLY); + if (fd == -1) { + perror("open"); + return NULL; + } + + result = NULL; + + num = read(fd, auxv, sizeof(auxv)); + if (num < 0) { + perror("read"); + goto out; + } + + if (num > sizeof(auxv)) { + printf("Overflowed auxv buffer\n"); + goto out; + } + + p = (ElfW(auxv_t) *)auxv; + + while (p->a_type != AT_NULL) { + if (p->a_type == type) { + result = (void *)p->a_un.a_val; + break; + } + + p++; + } +out: + close(fd); + return result; +} diff --git a/tools/testing/selftests/powerpc/mm/Makefile b/tools/testing/selftests/powerpc/mm/Makefile index a14c538dd7f8..41cc3ed66818 100644 --- a/tools/testing/selftests/powerpc/mm/Makefile +++ b/tools/testing/selftests/powerpc/mm/Makefile @@ -1,21 +1,16 @@ noarg: $(MAKE) -C ../ -PROGS := hugetlb_vs_thp_test subpage_prot +TEST_PROGS := hugetlb_vs_thp_test subpage_prot -all: $(PROGS) tempfile +all: $(TEST_PROGS) tempfile -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk tempfile: dd if=/dev/zero of=tempfile bs=64k count=1 clean: - rm -f $(PROGS) tempfile - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) tempfile diff --git a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c index 3d8e5b033e1d..49003674de4f 100644 --- a/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c +++ b/tools/testing/selftests/powerpc/mm/hugetlb_vs_thp_test.c @@ -21,9 +21,13 @@ static int test_body(void) * Typically the mmap will fail because no huge pages are * allocated on the system. But if there are huge pages * allocated the mmap will succeed. That's fine too, we just - * munmap here before continuing. + * munmap here before continuing. munmap() length of + * MAP_HUGETLB memory must be hugepage aligned. */ - munmap(addr, SIZE); + if (munmap(addr, SIZE)) { + perror("munmap"); + return 1; + } } p = mmap(addr, SIZE, PROT_READ | PROT_WRITE, diff --git a/tools/testing/selftests/powerpc/pmu/Makefile b/tools/testing/selftests/powerpc/pmu/Makefile index c9f4263906a5..a9099d9f8f39 100644 --- a/tools/testing/selftests/powerpc/pmu/Makefile +++ b/tools/testing/selftests/powerpc/pmu/Makefile @@ -1,38 +1,42 @@ noarg: $(MAKE) -C ../ -PROGS := count_instructions l3_bank_test per_event_excludes +TEST_PROGS := count_instructions l3_bank_test per_event_excludes EXTRA_SOURCES := ../harness.c event.c lib.c -SUB_TARGETS = ebb +all: $(TEST_PROGS) ebb -all: $(PROGS) $(SUB_TARGETS) - -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) # loop.S can only be built 64-bit count_instructions: loop.S count_instructions.c $(EXTRA_SOURCES) $(CC) $(CFLAGS) -m64 -o $@ $^ -run_tests: all sub_run_tests - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk -clean: sub_clean - rm -f $(PROGS) loop.o +DEFAULT_RUN_TESTS := $(RUN_TESTS) +override define RUN_TESTS + $(DEFAULT_RUN_TESTS) + $(MAKE) -C ebb run_tests +endef -$(SUB_TARGETS): - $(MAKE) -k -C $@ all +DEFAULT_EMIT_TESTS := $(EMIT_TESTS) +override define EMIT_TESTS + $(DEFAULT_EMIT_TESTS) + $(MAKE) -s -C ebb emit_tests +endef -sub_run_tests: all - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET run_tests; \ - done; +DEFAULT_INSTALL_RULE := $(INSTALL_RULE) +override define INSTALL_RULE + $(DEFAULT_INSTALL_RULE) + $(MAKE) -C ebb install +endef -sub_clean: - @for TARGET in $(SUB_TARGETS); do \ - $(MAKE) -C $$TARGET clean; \ - done; +clean: + rm -f $(TEST_PROGS) loop.o + $(MAKE) -C ebb clean + +ebb: + $(MAKE) -k -C $@ all -.PHONY: all run_tests clean sub_run_tests sub_clean $(SUB_TARGETS) +.PHONY: all run_tests clean ebb diff --git a/tools/testing/selftests/powerpc/pmu/ebb/Makefile b/tools/testing/selftests/powerpc/pmu/ebb/Makefile index 3dc4332698cb..5cdc9dbf2b27 100644 --- a/tools/testing/selftests/powerpc/pmu/ebb/Makefile +++ b/tools/testing/selftests/powerpc/pmu/ebb/Makefile @@ -4,7 +4,7 @@ noarg: # The EBB handler is 64-bit code and everything links against it CFLAGS += -m64 -PROGS := reg_access_test event_attributes_test cycles_test \ +TEST_PROGS := reg_access_test event_attributes_test cycles_test \ cycles_with_freeze_test pmc56_overflow_test \ ebb_vs_cpu_event_test cpu_event_vs_ebb_test \ cpu_event_pinned_vs_ebb_test task_event_vs_ebb_test \ @@ -16,18 +16,15 @@ PROGS := reg_access_test event_attributes_test cycles_test \ lost_exception_test no_handler_test \ cycles_with_mmcr2_test -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S +$(TEST_PROGS): ../../harness.c ../event.c ../lib.c ebb.c ebb_handler.S trace.c busy_loop.S instruction_count_test: ../loop.S lost_exception_test: ../lib.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../../lib.mk clean: - rm -f $(PROGS) + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/pmu/lib.c b/tools/testing/selftests/powerpc/pmu/lib.c index 9768dea37bf3..a07104c2afe6 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.c +++ b/tools/testing/selftests/powerpc/pmu/lib.c @@ -5,15 +5,10 @@ #define _GNU_SOURCE /* For CPU_ZERO etc. */ -#include <elf.h> #include <errno.h> -#include <fcntl.h> -#include <link.h> #include <sched.h> #include <setjmp.h> #include <stdlib.h> -#include <sys/stat.h> -#include <sys/types.h> #include <sys/wait.h> #include "utils.h" @@ -256,45 +251,3 @@ out: return rc; } -static char auxv[4096]; - -void *get_auxv_entry(int type) -{ - ElfW(auxv_t) *p; - void *result; - ssize_t num; - int fd; - - fd = open("/proc/self/auxv", O_RDONLY); - if (fd == -1) { - perror("open"); - return NULL; - } - - result = NULL; - - num = read(fd, auxv, sizeof(auxv)); - if (num < 0) { - perror("read"); - goto out; - } - - if (num > sizeof(auxv)) { - printf("Overflowed auxv buffer\n"); - goto out; - } - - p = (ElfW(auxv_t) *)auxv; - - while (p->a_type != AT_NULL) { - if (p->a_type == type) { - result = (void *)p->a_un.a_val; - break; - } - - p++; - } -out: - close(fd); - return result; -} diff --git a/tools/testing/selftests/powerpc/pmu/lib.h b/tools/testing/selftests/powerpc/pmu/lib.h index 0f0339c8a6f6..ca5d72ae3be6 100644 --- a/tools/testing/selftests/powerpc/pmu/lib.h +++ b/tools/testing/selftests/powerpc/pmu/lib.h @@ -29,7 +29,6 @@ extern int notify_parent(union pipe write_pipe); extern int notify_parent_of_error(union pipe write_pipe); extern pid_t eat_cpu(int (test_function)(void)); extern bool require_paranoia_below(int level); -extern void *get_auxv_entry(int type); struct addr_range { uint64_t first, last; diff --git a/tools/testing/selftests/powerpc/primitives/Makefile b/tools/testing/selftests/powerpc/primitives/Makefile index ea737ca01732..b68c6221d3d1 100644 --- a/tools/testing/selftests/powerpc/primitives/Makefile +++ b/tools/testing/selftests/powerpc/primitives/Makefile @@ -1,17 +1,12 @@ CFLAGS += -I$(CURDIR) -PROGS := load_unaligned_zeropad +TEST_PROGS := load_unaligned_zeropad -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/stringloops/Makefile b/tools/testing/selftests/powerpc/stringloops/Makefile index 506d77346477..2a728f4d2873 100644 --- a/tools/testing/selftests/powerpc/stringloops/Makefile +++ b/tools/testing/selftests/powerpc/stringloops/Makefile @@ -2,19 +2,14 @@ CFLAGS += -m64 CFLAGS += -I$(CURDIR) -PROGS := memcmp +TEST_PROGS := memcmp EXTRA_SOURCES := memcmp_64.S ../harness.c -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): $(EXTRA_SOURCES) +$(TEST_PROGS): $(EXTRA_SOURCES) -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +include ../../lib.mk clean: - rm -f $(PROGS) *.o - -.PHONY: all run_tests clean + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/switch_endian/.gitignore b/tools/testing/selftests/powerpc/switch_endian/.gitignore new file mode 100644 index 000000000000..89e762eab676 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/.gitignore @@ -0,0 +1,2 @@ +switch_endian_test +check-reversed.S diff --git a/tools/testing/selftests/powerpc/switch_endian/Makefile b/tools/testing/selftests/powerpc/switch_endian/Makefile new file mode 100644 index 000000000000..e21d10674e54 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/Makefile @@ -0,0 +1,18 @@ +TEST_PROGS := switch_endian_test + +ASFLAGS += -O2 -Wall -g -nostdlib -m64 + +all: $(TEST_PROGS) + +switch_endian_test: check-reversed.S + +check-reversed.o: check.o + $(CROSS_COMPILE)objcopy -j .text --reverse-bytes=4 -O binary $< $@ + +check-reversed.S: check-reversed.o + hexdump -v -e '/1 ".byte 0x%02X\n"' $< > $@ + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) *.o check-reversed.S diff --git a/tools/testing/selftests/powerpc/switch_endian/check.S b/tools/testing/selftests/powerpc/switch_endian/check.S new file mode 100644 index 000000000000..e2484d2c24f4 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/check.S @@ -0,0 +1,100 @@ +#include "common.h" + +/* + * Checks that registers contain what we expect, ie. they were not clobbered by + * the syscall. + * + * r15: pattern to check registers against. + * + * At the end r3 == 0 if everything's OK. + */ + nop # guaranteed to be illegal in reverse-endian + mr r9,r15 + cmpd r9,r3 # check r3 + bne 1f + addi r9,r15,4 # check r4 + cmpd r9,r4 + bne 1f + lis r9,0x00FF # check CR + ori r9,r9,0xF000 + mfcr r10 + and r10,r10,r9 + cmpw r9,r10 + addi r9,r15,34 + bne 1f + addi r9,r15,32 # check LR + mflr r10 + cmpd r9,r10 + bne 1f + addi r9,r15,5 # check r5 + cmpd r9,r5 + bne 1f + addi r9,r15,6 # check r6 + cmpd r9,r6 + bne 1f + addi r9,r15,7 # check r7 + cmpd r9,r7 + bne 1f + addi r9,r15,8 # check r8 + cmpd r9,r8 + bne 1f + addi r9,r15,13 # check r13 + cmpd r9,r13 + bne 1f + addi r9,r15,14 # check r14 + cmpd r9,r14 + bne 1f + addi r9,r15,16 # check r16 + cmpd r9,r16 + bne 1f + addi r9,r15,17 # check r17 + cmpd r9,r17 + bne 1f + addi r9,r15,18 # check r18 + cmpd r9,r18 + bne 1f + addi r9,r15,19 # check r19 + cmpd r9,r19 + bne 1f + addi r9,r15,20 # check r20 + cmpd r9,r20 + bne 1f + addi r9,r15,21 # check r21 + cmpd r9,r21 + bne 1f + addi r9,r15,22 # check r22 + cmpd r9,r22 + bne 1f + addi r9,r15,23 # check r23 + cmpd r9,r23 + bne 1f + addi r9,r15,24 # check r24 + cmpd r9,r24 + bne 1f + addi r9,r15,25 # check r25 + cmpd r9,r25 + bne 1f + addi r9,r15,26 # check r26 + cmpd r9,r26 + bne 1f + addi r9,r15,27 # check r27 + cmpd r9,r27 + bne 1f + addi r9,r15,28 # check r28 + cmpd r9,r28 + bne 1f + addi r9,r15,29 # check r29 + cmpd r9,r29 + bne 1f + addi r9,r15,30 # check r30 + cmpd r9,r30 + bne 1f + addi r9,r15,31 # check r31 + cmpd r9,r31 + bne 1f + b 2f +1: mr r3, r9 + li r0, __NR_exit + sc +2: li r0, __NR_switch_endian + nop diff --git a/tools/testing/selftests/powerpc/switch_endian/common.h b/tools/testing/selftests/powerpc/switch_endian/common.h new file mode 100644 index 000000000000..69e399698c64 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/common.h @@ -0,0 +1,6 @@ +#include <ppc-asm.h> +#include <asm/unistd.h> + +#ifndef __NR_switch_endian +#define __NR_switch_endian 363 +#endif diff --git a/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S new file mode 100644 index 000000000000..ef7c971abb67 --- /dev/null +++ b/tools/testing/selftests/powerpc/switch_endian/switch_endian_test.S @@ -0,0 +1,81 @@ +#include "common.h" + + .data + .balign 8 +message: + .ascii "success: switch_endian_test\n\0" + + .section ".toc" + .balign 8 +pattern: + .llong 0x5555AAAA5555AAAA + + .text +FUNC_START(_start) + /* Load the pattern */ + ld r15, pattern@TOC(%r2) + + /* Setup CR, only CR2-CR4 are maintained */ + lis r3, 0x00FF + ori r3, r3, 0xF000 + mtcr r3 + + /* Load the pattern slightly modified into the registers */ + mr r3, r15 + addi r4, r15, 4 + + addi r5, r15, 32 + mtlr r5 + + addi r5, r15, 5 + addi r6, r15, 6 + addi r7, r15, 7 + addi r8, r15, 8 + + /* r9 - r12 are clobbered */ + + addi r13, r15, 13 + addi r14, r15, 14 + + /* Skip r15 we're using it */ + + addi r16, r15, 16 + addi r17, r15, 17 + addi r18, r15, 18 + addi r19, r15, 19 + addi r20, r15, 20 + addi r21, r15, 21 + addi r22, r15, 22 + addi r23, r15, 23 + addi r24, r15, 24 + addi r25, r15, 25 + addi r26, r15, 26 + addi r27, r15, 27 + addi r28, r15, 28 + addi r29, r15, 29 + addi r30, r15, 30 + addi r31, r15, 31 + + /* + * Call the syscall to switch endian. + * It clobbers r9-r12, XER, CTR and CR0-1,5-7. + */ + li r0, __NR_switch_endian + sc + +#include "check-reversed.S" + + /* Flip back, r0 already has the switch syscall number */ + .long 0x02000044 /* sc */ + +#include "check.S" + + li r0, __NR_write + li r3, 1 /* stdout */ + ld r4, message@got(%r2) + li r5, 28 /* strlen(message3) */ + sc + li r0, __NR_exit + li r3, 0 + sc + b . diff --git a/tools/testing/selftests/powerpc/tm/.gitignore b/tools/testing/selftests/powerpc/tm/.gitignore index 33d02cc54a3e..2699635d2cd9 100644 --- a/tools/testing/selftests/powerpc/tm/.gitignore +++ b/tools/testing/selftests/powerpc/tm/.gitignore @@ -1 +1,2 @@ tm-resched-dscr +tm-syscall diff --git a/tools/testing/selftests/powerpc/tm/Makefile b/tools/testing/selftests/powerpc/tm/Makefile index 2cede239a074..4bea62a319dc 100644 --- a/tools/testing/selftests/powerpc/tm/Makefile +++ b/tools/testing/selftests/powerpc/tm/Makefile @@ -1,15 +1,13 @@ -PROGS := tm-resched-dscr +TEST_PROGS := tm-resched-dscr tm-syscall -all: $(PROGS) +all: $(TEST_PROGS) -$(PROGS): ../harness.c +$(TEST_PROGS): ../harness.c -run_tests: all - @-for PROG in $(PROGS); do \ - ./$$PROG; \ - done; +tm-syscall: tm-syscall-asm.S +tm-syscall: CFLAGS += -mhtm -I../../../../../usr/include -clean: - rm -f $(PROGS) *.o +include ../../lib.mk -.PHONY: all run_tests clean +clean: + rm -f $(TEST_PROGS) *.o diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S new file mode 100644 index 000000000000..431f61ae2368 --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-syscall-asm.S @@ -0,0 +1,27 @@ +#include <ppc-asm.h> +#include <asm/unistd.h> + + .text +FUNC_START(getppid_tm_active) + tbegin. + beq 1f + li r0, __NR_getppid + sc + tend. + blr +1: + li r3, -1 + blr + +FUNC_START(getppid_tm_suspended) + tbegin. + beq 1f + li r0, __NR_getppid + tsuspend. + sc + tresume. + tend. + blr +1: + li r3, -1 + blr diff --git a/tools/testing/selftests/powerpc/tm/tm-syscall.c b/tools/testing/selftests/powerpc/tm/tm-syscall.c new file mode 100644 index 000000000000..1276e23da63b --- /dev/null +++ b/tools/testing/selftests/powerpc/tm/tm-syscall.c @@ -0,0 +1,122 @@ +/* + * Copyright 2015, Sam Bobroff, IBM Corp. + * Licensed under GPLv2. + * + * Test the kernel's system call code to ensure that a system call + * made from within an active HTM transaction is aborted with the + * correct failure code. + * Conversely, ensure that a system call made from within a + * suspended transaction can succeed. + */ + +#include <stdio.h> +#include <unistd.h> +#include <sys/syscall.h> +#include <asm/tm.h> +#include <asm/cputable.h> +#include <linux/auxvec.h> +#include <sys/time.h> +#include <stdlib.h> + +#include "utils.h" + +extern int getppid_tm_active(void); +extern int getppid_tm_suspended(void); + +unsigned retries = 0; + +#define TEST_DURATION 10 /* seconds */ +#define TM_RETRIES 100 + +long failure_code(void) +{ + return __builtin_get_texasru() >> 24; +} + +bool failure_is_persistent(void) +{ + return (failure_code() & TM_CAUSE_PERSISTENT) == TM_CAUSE_PERSISTENT; +} + +bool failure_is_syscall(void) +{ + return (failure_code() & TM_CAUSE_SYSCALL) == TM_CAUSE_SYSCALL; +} + +pid_t getppid_tm(bool suspend) +{ + int i; + pid_t pid; + + for (i = 0; i < TM_RETRIES; i++) { + if (suspend) + pid = getppid_tm_suspended(); + else + pid = getppid_tm_active(); + + if (pid >= 0) + return pid; + + if (failure_is_persistent()) { + if (failure_is_syscall()) + return -1; + + printf("Unexpected persistent transaction failure.\n"); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + exit(-1); + } + + retries++; + } + + printf("Exceeded limit of %d temporary transaction failures.\n", TM_RETRIES); + printf("TEXASR 0x%016lx, TFIAR 0x%016lx.\n", + __builtin_get_texasr(), __builtin_get_tfiar()); + + exit(-1); +} + +int tm_syscall(void) +{ + unsigned count = 0; + struct timeval end, now; + + SKIP_IF(!((long)get_auxv_entry(AT_HWCAP2) + & PPC_FEATURE2_HTM_NOSC)); + setbuf(stdout, NULL); + + printf("Testing transactional syscalls for %d seconds...\n", TEST_DURATION); + + gettimeofday(&end, NULL); + now.tv_sec = TEST_DURATION; + now.tv_usec = 0; + timeradd(&end, &now, &end); + + for (count = 0; timercmp(&now, &end, <); count++) { + /* + * Test a syscall within a suspended transaction and verify + * that it succeeds. + */ + FAIL_IF(getppid_tm(true) == -1); /* Should succeed. */ + + /* + * Test a syscall within an active transaction and verify that + * it fails with the correct failure code. + */ + FAIL_IF(getppid_tm(false) != -1); /* Should fail... */ + FAIL_IF(!failure_is_persistent()); /* ...persistently... */ + FAIL_IF(!failure_is_syscall()); /* ...with code syscall. */ + gettimeofday(&now, 0); + } + + printf("%d active and suspended transactions behaved correctly.\n", count); + printf("(There were %d transaction retries.)\n", retries); + + return 0; +} + +int main(void) +{ + return test_harness(tm_syscall, "tm_syscall"); +} diff --git a/tools/testing/selftests/powerpc/utils.h b/tools/testing/selftests/powerpc/utils.h index a93777ae0684..b7d41086bb0a 100644 --- a/tools/testing/selftests/powerpc/utils.h +++ b/tools/testing/selftests/powerpc/utils.h @@ -15,11 +15,12 @@ typedef signed long long s64; /* Just for familiarity */ typedef uint32_t u32; +typedef uint16_t u16; typedef uint8_t u8; int test_harness(int (test_function)(void), char *name); - +extern void *get_auxv_entry(int type); /* Yes, this is evil */ #define FAIL_IF(x) \ diff --git a/tools/testing/selftests/powerpc/vphn/.gitignore b/tools/testing/selftests/powerpc/vphn/.gitignore new file mode 100644 index 000000000000..7c04395010cb --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/.gitignore @@ -0,0 +1 @@ +test-vphn diff --git a/tools/testing/selftests/powerpc/vphn/Makefile b/tools/testing/selftests/powerpc/vphn/Makefile new file mode 100644 index 000000000000..a485f2e286ae --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/Makefile @@ -0,0 +1,12 @@ +TEST_PROGS := test-vphn + +CFLAGS += -m64 + +all: $(TEST_PROGS) + +$(TEST_PROGS): ../harness.c + +include ../../lib.mk + +clean: + rm -f $(TEST_PROGS) diff --git a/tools/testing/selftests/powerpc/vphn/test-vphn.c b/tools/testing/selftests/powerpc/vphn/test-vphn.c new file mode 100644 index 000000000000..5742f6876b25 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/test-vphn.c @@ -0,0 +1,410 @@ +#include <stdio.h> +#include <byteswap.h> +#include "utils.h" +#include "subunit.h" + +#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ +#define cpu_to_be32(x) bswap_32(x) +#define be32_to_cpu(x) bswap_32(x) +#define be16_to_cpup(x) bswap_16(*x) +#define cpu_to_be64(x) bswap_64(x) +#else +#define cpu_to_be32(x) (x) +#define be32_to_cpu(x) (x) +#define be16_to_cpup(x) (*x) +#define cpu_to_be64(x) (x) +#endif + +#include "vphn.c" + +static struct test { + char *descr; + long input[VPHN_REGISTER_COUNT]; + u32 expected[VPHN_ASSOC_BUFSIZE]; +} all_tests[] = { + { + "vphn: no data", + { + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000000 + } + }, + { + "vphn: 1 x 16-bit value", + { + 0x8001ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 16-bit values", + { + 0x80018002ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 3 x 16-bit values", + { + 0x800180028003ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + "vphn: 4 x 16-bit values", + { + 0x8001800280038004, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000004, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004 + } + }, + { + /* Parsing the next 16-bit value out of the next 64-bit input + * value. + */ + "vphn: 5 x 16-bit values", + { + 0x8001800280038004, + 0x8005ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + }, + { + 0x00000005, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 24 x 16-bit values", + { + 0x8001800280038004, + 0x8005800680078008, + 0x8009800a800b800c, + 0x800d800e800f8010, + 0x8011801280138014, + 0x8015801680178018 + }, + { + 0x00000018, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c, + 0x0000000d, + 0x0000000e, + 0x0000000f, + 0x00000010, + 0x00000011, + 0x00000012, + 0x00000013, + 0x00000014, + 0x00000015, + 0x00000016, + 0x00000017, + 0x00000018 + } + }, + { + "vphn: 1 x 32-bit value", + { + 0x00000001ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000001, + 0x00000001 + } + }, + { + "vphn: 2 x 32-bit values", + { + 0x0000000100000002, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parsing the next 32-bit value out of the next 64-bit input + * value. + */ + "vphn: 3 x 32-bit values", + { + 0x0000000100000002, + 0x00000003ffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003 + } + }, + { + /* Parse at most 6 x 64-bit input values */ + "vphn: 12 x 32-bit values", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b0000000c + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: 16-bit value followed by 32-bit value", + { + 0x800100000002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + "vphn: 32-bit value followed by 16-bit value", + { + 0x000000018002ffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000002, + 0x00000001, + 0x00000002 + } + }, + { + /* Parse a 32-bit value split accross two consecutives 64-bit + * input values. + */ + "vphn: 16-bit value followed by 2 x 32-bit values", + { + 0x8001000000020000, + 0x0003ffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005 + } + }, + { + /* The lower bits in 0x0001ffff don't get mixed up with the + * 0xffff terminator. + */ + "vphn: 32-bit value has all ones in 16 lower bits", + { + 0x0001ffff80028003, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff, + 0xffffffffffffffff + }, + { + 0x00000003, + 0x0001ffff, + 0x00000002, + 0x00000003 + } + }, + { + /* The following input doesn't follow the specification. + */ + "vphn: last 32-bit value is truncated", + { + 0x0000000100000002, + 0x0000000300000004, + 0x0000000500000006, + 0x0000000700000008, + 0x000000090000000a, + 0x0000000b800c2bad + }, + { + 0x0000000c, + 0x00000001, + 0x00000002, + 0x00000003, + 0x00000004, + 0x00000005, + 0x00000006, + 0x00000007, + 0x00000008, + 0x00000009, + 0x0000000a, + 0x0000000b, + 0x0000000c + } + }, + { + "vphn: garbage after terminator", + { + 0xffff2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad, + 0x2bad2bad2bad2bad + }, + { + 0x00000000 + } + }, + { + NULL + } +}; + +static int test_one(struct test *test) +{ + __be32 output[VPHN_ASSOC_BUFSIZE] = { 0 }; + int i, len; + + vphn_unpack_associativity(test->input, output); + + len = be32_to_cpu(output[0]); + if (len != test->expected[0]) { + printf("expected %d elements, got %d\n", test->expected[0], + len); + return 1; + } + + for (i = 1; i < len; i++) { + u32 val = be32_to_cpu(output[i]); + if (val != test->expected[i]) { + printf("element #%d is 0x%x, should be 0x%x\n", i, val, + test->expected[i]); + return 1; + } + } + + return 0; +} + +static int test_vphn(void) +{ + static struct test *test; + + for (test = all_tests; test->descr; test++) { + int ret; + + ret = test_one(test); + test_finish(test->descr, ret); + if (ret) + return ret; + } + + return 0; +} + +int main(int argc, char **argv) +{ + return test_harness(test_vphn, "test-vphn"); +} diff --git a/tools/testing/selftests/powerpc/vphn/vphn.c b/tools/testing/selftests/powerpc/vphn/vphn.c new file mode 120000 index 000000000000..186b906e66d5 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/vphn.c @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.c
\ No newline at end of file diff --git a/tools/testing/selftests/powerpc/vphn/vphn.h b/tools/testing/selftests/powerpc/vphn/vphn.h new file mode 120000 index 000000000000..7131efe38c65 --- /dev/null +++ b/tools/testing/selftests/powerpc/vphn/vphn.h @@ -0,0 +1 @@ +../../../../../arch/powerpc/mm/vphn.h
\ No newline at end of file diff --git a/tools/testing/selftests/ptrace/Makefile b/tools/testing/selftests/ptrace/Makefile index 47ae2d385ce8..453927fea90c 100644 --- a/tools/testing/selftests/ptrace/Makefile +++ b/tools/testing/selftests/ptrace/Makefile @@ -6,5 +6,6 @@ all: peeksiginfo clean: rm -f peeksiginfo -run_tests: all - @./peeksiginfo || echo "peeksiginfo selftests: [FAIL]" +TEST_PROGS := peeksiginfo + +include ../lib.mk diff --git a/tools/testing/selftests/rcutorture/bin/configinit.sh b/tools/testing/selftests/rcutorture/bin/configinit.sh index 15f1a17ca96e..3f81a1095206 100755 --- a/tools/testing/selftests/rcutorture/bin/configinit.sh +++ b/tools/testing/selftests/rcutorture/bin/configinit.sh @@ -66,7 +66,7 @@ make $buildloc $TORTURE_DEFCONFIG > $builddir/Make.defconfig.out 2>&1 mv $builddir/.config $builddir/.config.sav sh $T/upd.sh < $builddir/.config.sav > $builddir/.config cp $builddir/.config $builddir/.config.new -yes '' | make $buildloc oldconfig > $builddir/Make.modconfig.out 2>&1 +yes '' | make $buildloc oldconfig > $builddir/Make.oldconfig.out 2> $builddir/Make.oldconfig.err # verify new config matches specification. configcheck.sh $builddir/.config $c diff --git a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh index 4f5b20f367a9..d86bdd6b6cc2 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm-recheck.sh @@ -43,6 +43,10 @@ do if test -f "$i/console.log" then configcheck.sh $i/.config $i/ConfigFragment + if test -r $i/Make.oldconfig.err + then + cat $i/Make.oldconfig.err + fi parse-build.sh $i/Make.out $configfile parse-torture.sh $i/console.log $configfile parse-console.sh $i/console.log $configfile diff --git a/tools/testing/selftests/rcutorture/bin/kvm.sh b/tools/testing/selftests/rcutorture/bin/kvm.sh index 368d64ac779e..fbe2dbff1e21 100755 --- a/tools/testing/selftests/rcutorture/bin/kvm.sh +++ b/tools/testing/selftests/rcutorture/bin/kvm.sh @@ -55,7 +55,7 @@ usage () { echo " --bootargs kernel-boot-arguments" echo " --bootimage relative-path-to-kernel-boot-image" echo " --buildonly" - echo " --configs \"config-file list\"" + echo " --configs \"config-file list w/ repeat factor (3*TINY01)\"" echo " --cpus N" echo " --datestamp string" echo " --defconfig string" @@ -178,13 +178,26 @@ fi touch $T/cfgcpu for CF in $configs do - if test -f "$CONFIGFRAG/$CF" + case $CF in + [0-9]\**|[0-9][0-9]\**|[0-9][0-9][0-9]\**) + config_reps=`echo $CF | sed -e 's/\*.*$//'` + CF1=`echo $CF | sed -e 's/^[^*]*\*//'` + ;; + *) + config_reps=1 + CF1=$CF + ;; + esac + if test -f "$CONFIGFRAG/$CF1" then - cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF` - cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF" "$cpu_count"` - echo $CF $cpu_count >> $T/cfgcpu + cpu_count=`configNR_CPUS.sh $CONFIGFRAG/$CF1` + cpu_count=`configfrag_boot_cpus "$TORTURE_BOOTARGS" "$CONFIGFRAG/$CF1" "$cpu_count"` + for ((cur_rep=0;cur_rep<$config_reps;cur_rep++)) + do + echo $CF1 $cpu_count >> $T/cfgcpu + done else - echo "The --configs file $CF does not exist, terminating." + echo "The --configs file $CF1 does not exist, terminating." exit 1 fi done @@ -310,7 +323,7 @@ function dump(first, pastlast) cfr[jn] = cf[j] "." cfrep[cf[j]]; } if (cpusr[jn] > ncpus && ncpus != 0) - ovf = "(!)"; + ovf = "-ovf"; else ovf = ""; print "echo ", cfr[jn], cpusr[jn] ovf ": Starting build. `date`"; diff --git a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon index d2d2a86139db..f824b4c9d9d9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/CFcommon +++ b/tools/testing/selftests/rcutorture/configs/rcu/CFcommon @@ -1,2 +1,5 @@ CONFIG_RCU_TORTURE_TEST=y CONFIG_PRINTK_TIME=y +CONFIG_RCU_TORTURE_TEST_SLOW_CLEANUP=y +CONFIG_RCU_TORTURE_TEST_SLOW_INIT=y +CONFIG_RCU_TORTURE_TEST_SLOW_PREINIT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N index 9fbb41b9b314..1a087c3c8bb8 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-N @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P index 4b6f272dba27..4837430a71c0 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P @@ -5,3 +5,4 @@ CONFIG_HOTPLUG_CPU=y CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot index 238bfe3bd0cc..84a7d51b7481 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/SRCU-P.boot @@ -1 +1 @@ -rcutorture.torture_type=srcu +rcutorture.torture_type=srcud diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 index 97f0a0b27ef7..2cc0e60eba6e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS01 @@ -5,5 +5,6 @@ CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y CONFIG_DEBUG_LOCK_ALLOC=y -CONFIG_PROVE_RCU=y -CONFIG_TASKS_RCU=y +CONFIG_PROVE_LOCKING=n +#CHECK#CONFIG_PROVE_RCU=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 index 696d2ea74d13..ad2be91e5ee7 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS02 @@ -2,4 +2,3 @@ CONFIG_SMP=n CONFIG_PREEMPT_NONE=y CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=n -CONFIG_TASKS_RCU=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 index 9c60da5b5d1d..c70c51d5ded1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TASKS03 @@ -6,8 +6,8 @@ CONFIG_HIBERNATION=n CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y -CONFIG_TASKS_RCU=y CONFIG_HZ_PERIODIC=n CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=y CONFIG_NO_HZ_FULL_ALL=y +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 index 36e41df3d27a..f1892e0371c9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02 @@ -8,7 +8,7 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_DEBUG_OBJECTS_RCU_HEAD=n CONFIG_PREEMPT_COUNT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot index 0f0802730014..6c1a292a65fb 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TINY02.boot @@ -1,2 +1,3 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 +rcutorture.torture_type=rcu_bh diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 index f8a10a7500c6..8e9137f66831 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE01 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE01 @@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 index 629122fb8b4a..aeea6a204d14 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02 @@ -14,10 +14,10 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T index a25de47888a4..2ac9e68ea3d1 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE02-T @@ -14,7 +14,6 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 CONFIG_RCU_FANOUT_LEAF=3 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=n diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 index 53f24e0a0ab6..72aa7d87ea99 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE03 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=8 +CONFIG_NR_CPUS=16 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -9,12 +9,12 @@ CONFIG_NO_HZ_IDLE=n CONFIG_NO_HZ_FULL=n CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y -CONFIG_RCU_FANOUT=4 -CONFIG_RCU_FANOUT_LEAF=4 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=2 +CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=y CONFIG_RCU_KTHREAD_PRIO=2 CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot new file mode 100644 index 000000000000..120c0c88d100 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE03.boot @@ -0,0 +1 @@ +rcutorture.onoff_interval=1 rcutorture.onoff_holdoff=30 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 index 0f84db35b36d..3f5112751cda 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE04 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE04 @@ -13,10 +13,10 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n -CONFIG_RCU_FANOUT=2 -CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n +CONFIG_RCU_FANOUT=4 +CONFIG_RCU_FANOUT_LEAF=4 CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 index 212e3bfd2b2a..c04dfea6fd21 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE05 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE05 @@ -12,11 +12,11 @@ CONFIG_RCU_TRACE=n CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_NONE=y CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 index 7eee63b44218..f51d2c73a68e 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06 @@ -14,10 +14,10 @@ CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=6 CONFIG_RCU_FANOUT_LEAF=6 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=y CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=y +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot index da9a03a398db..dd90f28ed700 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE06.boot @@ -1,3 +1,4 @@ rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_bh=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 index 92a97fa97dec..f422af4ff5a3 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE07 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE07 @@ -15,8 +15,8 @@ CONFIG_RCU_TRACE=y CONFIG_HOTPLUG_CPU=y CONFIG_RCU_FANOUT=2 CONFIG_RCU_FANOUT_LEAF=2 -CONFIG_RCU_FANOUT_EXACT=n CONFIG_RCU_NOCB_CPU=n CONFIG_DEBUG_LOCK_ALLOC=n -CONFIG_RCU_CPU_STALL_INFO=y +CONFIG_RCU_CPU_STALL_INFO=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 index 5812027d6f9f..a24d2ca30646 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08 @@ -1,5 +1,5 @@ CONFIG_SMP=y -CONFIG_NR_CPUS=16 +CONFIG_NR_CPUS=8 CONFIG_PREEMPT_NONE=n CONFIG_PREEMPT_VOLUNTARY=n CONFIG_PREEMPT=y @@ -13,13 +13,13 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_PROVE_LOCKING=y -CONFIG_PROVE_RCU=y +#CHECK#CONFIG_PROVE_RCU=y CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +CONFIG_RCU_EXPERT=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T index 3eaeccacb083..b2b8cea69dc9 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T @@ -13,7 +13,6 @@ CONFIG_HOTPLUG_CPU=n CONFIG_SUSPEND=n CONFIG_HIBERNATION=n CONFIG_RCU_FANOUT=3 -CONFIG_RCU_FANOUT_EXACT=y CONFIG_RCU_FANOUT_LEAF=2 CONFIG_RCU_NOCB_CPU=y CONFIG_RCU_NOCB_CPU_ALL=y diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot new file mode 100644 index 000000000000..883149b5f2d1 --- /dev/null +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08-T.boot @@ -0,0 +1 @@ +rcutree.rcu_fanout_exact=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot index 2561daf605ad..fb066dc82769 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE08.boot @@ -1,3 +1,4 @@ rcutorture.torture_type=sched rcupdate.rcu_self_test=1 rcupdate.rcu_self_test_sched=1 +rcutree.rcu_fanout_exact=1 diff --git a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 index 6076b36f6c0b..aa4ed08d999d 100644 --- a/tools/testing/selftests/rcutorture/configs/rcu/TREE09 +++ b/tools/testing/selftests/rcutorture/configs/rcu/TREE09 @@ -16,3 +16,4 @@ CONFIG_DEBUG_LOCK_ALLOC=n CONFIG_RCU_CPU_STALL_INFO=n CONFIG_RCU_BOOST=n CONFIG_DEBUG_OBJECTS_RCU_HEAD=n +#CHECK#CONFIG_RCU_EXPERT=n diff --git a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt index ec03c883db00..b24c0004fc49 100644 --- a/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt +++ b/tools/testing/selftests/rcutorture/doc/TREE_RCU-kconfig.txt @@ -12,13 +12,12 @@ CONFIG_NO_HZ_IDLE -- Do those not otherwise specified. (Groups of two.) CONFIG_NO_HZ_FULL -- Do two, one with CONFIG_NO_HZ_FULL_SYSIDLE. CONFIG_NO_HZ_FULL_SYSIDLE -- Do one. CONFIG_PREEMPT -- Do half. (First three and #8.) -CONFIG_PROVE_LOCKING -- Do all but two, covering CONFIG_PROVE_RCU and not. -CONFIG_PROVE_RCU -- Do all but one under CONFIG_PROVE_LOCKING. +CONFIG_PROVE_LOCKING -- Do several, covering CONFIG_DEBUG_LOCK_ALLOC=y and not. +CONFIG_PROVE_RCU -- Hardwired to CONFIG_PROVE_LOCKING. CONFIG_RCU_BOOST -- one of PREEMPT_RCU. CONFIG_RCU_KTHREAD_PRIO -- set to 2 for _BOOST testing. -CONFIG_RCU_CPU_STALL_INFO -- Do one. -CONFIG_RCU_FANOUT -- Cover hierarchy as currently, but overlap with others. -CONFIG_RCU_FANOUT_EXACT -- Do one. +CONFIG_RCU_CPU_STALL_INFO -- Now default, avoid at least twice. +CONFIG_RCU_FANOUT -- Cover hierarchy, but overlap with others. CONFIG_RCU_FANOUT_LEAF -- Do one non-default. CONFIG_RCU_FAST_NO_HZ -- Do one, but not with CONFIG_RCU_NOCB_CPU_ALL. CONFIG_RCU_NOCB_CPU -- Do three, see below. @@ -27,28 +26,19 @@ CONFIG_RCU_NOCB_CPU_NONE -- Do one. CONFIG_RCU_NOCB_CPU_ZERO -- Do one. CONFIG_RCU_TRACE -- Do half. CONFIG_SMP -- Need one !SMP for PREEMPT_RCU. +!RCU_EXPERT -- Do a few, but these have to be vanilla configurations. RCU-bh: Do one with PREEMPT and one with !PREEMPT. RCU-sched: Do one with PREEMPT but not BOOST. -Hierarchy: - -TREE01. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=8, CONFIG_RCU_FANOUT_EXACT=n. -TREE02. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=3. -TREE03. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=4, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=4. -TREE04. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE05. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=n - CONFIG_RCU_FANOUT_LEAF=6. -TREE06. CONFIG_NR_CPUS=8, CONFIG_RCU_FANOUT=6, CONFIG_RCU_FANOUT_EXACT=y - CONFIG_RCU_FANOUT_LEAF=6. -TREE07. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=2, CONFIG_RCU_FANOUT_EXACT=n, - CONFIG_RCU_FANOUT_LEAF=2. -TREE08. CONFIG_NR_CPUS=16, CONFIG_RCU_FANOUT=3, CONFIG_RCU_FANOUT_EXACT=y, - CONFIG_RCU_FANOUT_LEAF=2. -TREE09. CONFIG_NR_CPUS=1. +Boot parameters: + +nohz_full - do at least one. +maxcpu -- do at least one. +rcupdate.rcu_self_test_bh -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test_sched -- Do at least one each, offloaded and not. +rcupdate.rcu_self_test -- Do at least one each, offloaded and not. +rcutree.rcu_fanout_exact -- Do at least one. Kconfig Parameters Ignored: diff --git a/tools/testing/selftests/seccomp/.gitignore b/tools/testing/selftests/seccomp/.gitignore new file mode 100644 index 000000000000..346d83ca8069 --- /dev/null +++ b/tools/testing/selftests/seccomp/.gitignore @@ -0,0 +1 @@ +seccomp_bpf diff --git a/tools/testing/selftests/seccomp/Makefile b/tools/testing/selftests/seccomp/Makefile new file mode 100644 index 000000000000..8401e87e34e1 --- /dev/null +++ b/tools/testing/selftests/seccomp/Makefile @@ -0,0 +1,10 @@ +TEST_PROGS := seccomp_bpf +CFLAGS += -Wl,-no-as-needed -Wall +LDFLAGS += -lpthread + +all: $(TEST_PROGS) + +include ../lib.mk + +clean: + $(RM) $(TEST_PROGS) diff --git a/tools/testing/selftests/seccomp/seccomp_bpf.c b/tools/testing/selftests/seccomp/seccomp_bpf.c new file mode 100644 index 000000000000..c5abe7fd7590 --- /dev/null +++ b/tools/testing/selftests/seccomp/seccomp_bpf.c @@ -0,0 +1,2109 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * Test code for seccomp bpf. + */ + +#include <asm/siginfo.h> +#define __have_siginfo_t 1 +#define __have_sigval_t 1 +#define __have_sigevent_t 1 + +#include <errno.h> +#include <linux/filter.h> +#include <sys/prctl.h> +#include <sys/ptrace.h> +#include <sys/user.h> +#include <linux/prctl.h> +#include <linux/ptrace.h> +#include <linux/seccomp.h> +#include <poll.h> +#include <pthread.h> +#include <semaphore.h> +#include <signal.h> +#include <stddef.h> +#include <stdbool.h> +#include <string.h> +#include <linux/elf.h> +#include <sys/uio.h> + +#define _GNU_SOURCE +#include <unistd.h> +#include <sys/syscall.h> + +#include "test_harness.h" + +#ifndef PR_SET_PTRACER +# define PR_SET_PTRACER 0x59616d61 +#endif + +#ifndef PR_SET_NO_NEW_PRIVS +#define PR_SET_NO_NEW_PRIVS 38 +#define PR_GET_NO_NEW_PRIVS 39 +#endif + +#ifndef PR_SECCOMP_EXT +#define PR_SECCOMP_EXT 43 +#endif + +#ifndef SECCOMP_EXT_ACT +#define SECCOMP_EXT_ACT 1 +#endif + +#ifndef SECCOMP_EXT_ACT_TSYNC +#define SECCOMP_EXT_ACT_TSYNC 1 +#endif + +#ifndef SECCOMP_MODE_STRICT +#define SECCOMP_MODE_STRICT 1 +#endif + +#ifndef SECCOMP_MODE_FILTER +#define SECCOMP_MODE_FILTER 2 +#endif + +#ifndef SECCOMP_RET_KILL +#define SECCOMP_RET_KILL 0x00000000U /* kill the task immediately */ +#define SECCOMP_RET_TRAP 0x00030000U /* disallow and force a SIGSYS */ +#define SECCOMP_RET_ERRNO 0x00050000U /* returns an errno */ +#define SECCOMP_RET_TRACE 0x7ff00000U /* pass to a tracer or disallow */ +#define SECCOMP_RET_ALLOW 0x7fff0000U /* allow */ + +/* Masks for the return value sections. */ +#define SECCOMP_RET_ACTION 0x7fff0000U +#define SECCOMP_RET_DATA 0x0000ffffU + +struct seccomp_data { + int nr; + __u32 arch; + __u64 instruction_pointer; + __u64 args[6]; +}; +#endif + +#define syscall_arg(_n) (offsetof(struct seccomp_data, args[_n])) + +#define SIBLING_EXIT_UNKILLED 0xbadbeef +#define SIBLING_EXIT_FAILURE 0xbadface +#define SIBLING_EXIT_NEWPRIVS 0xbadfeed + +TEST(mode_strict_support) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_exit, 1); +} + +TEST_SIGNAL(mode_strict_cannot_call_prctl, SIGKILL) +{ + long ret; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, NULL, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support CONFIG_SECCOMP"); + } + syscall(__NR_prctl, PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + NULL, NULL, NULL); + EXPECT_FALSE(true) { + TH_LOG("Unreachable!"); + } +} + +/* Note! This doesn't test no new privs behavior */ +TEST(no_new_privs_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } +} + +/* Tests kernel support by checking for a copy_from_user() fault on * NULL. */ +TEST(mode_filter_support) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, NULL, NULL, NULL); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Kernel does not support CONFIG_SECCOMP_FILTER!"); + } +} + +TEST(mode_filter_without_nnp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, NULL, 0, 0); + ASSERT_LE(0, ret) { + TH_LOG("Expected 0 or unsupported for NO_NEW_PRIVS"); + } + errno = 0; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + /* Succeeds with CAP_SYS_ADMIN, fails without */ + /* TODO(wad) check caps not euid */ + if (geteuid()) { + EXPECT_EQ(-1, ret); + EXPECT_EQ(EACCES, errno); + } else { + EXPECT_EQ(0, ret); + } +} + +#define MAX_INSNS_PER_PATH 32768 + +TEST(filter_size_limits) +{ + int i; + int count = BPF_MAXINSNS + 1; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = count; + + /* Too many filter instructions in a single filter. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_NE(0, ret) { + TH_LOG("Installing %d insn filter was allowed", prog.len); + } + + /* One less is okay, though. */ + prog.len -= 1; + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Installing %d insn filter wasn't allowed", prog.len); + } +} + +TEST(filter_chain_limits) +{ + int i; + int count = BPF_MAXINSNS; + struct sock_filter allow[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter *filter; + struct sock_fprog prog = { }; + long ret; + + filter = calloc(count, sizeof(*filter)); + ASSERT_NE(NULL, filter); + + for (i = 0; i < count; i++) + filter[i] = allow[0]; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + prog.filter = filter; + prog.len = 1; + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + prog.len = count; + + /* Too many total filter instructions. */ + for (i = 0; i < MAX_INSNS_PER_PATH; i++) { + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + if (ret != 0) + break; + } + ASSERT_NE(0, ret) { + TH_LOG("Allowed %d %d-insn filters (total with penalties:%d)", + i, count, i * (count + 4)); + } +} + +TEST(mode_filter_cannot_move_to_strict) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, NULL, 0, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + + +TEST(mode_filter_get_seccomp) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_GET_SECCOMP, 0, 0, 0, 0); + EXPECT_EQ(2, ret); +} + + +TEST(ALLOW_all) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST(empty_prog) +{ + struct sock_filter filter[] = { + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST_SIGNAL(unknown_ret_is_kill_inside, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x10000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +/* return code >= 0x80000000 is unused. */ +TEST_SIGNAL(unknown_ret_is_kill_above_allow, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, 0x90000000U), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + EXPECT_EQ(0, syscall(__NR_getpid)) { + TH_LOG("getpid() shouldn't ever return"); + } +} + +TEST_SIGNAL(KILL_all, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); +} + +TEST_SIGNAL(KILL_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_SIGNAL(KILL_one_arg_one, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(0)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 0x0C0FFEE)); +} + +TEST_SIGNAL(KILL_one_arg_six, SIGSYS) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + /* Only both with lower 32-bit for now. */ + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(5)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 0x0C0FFEE, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + pid_t pid = getpid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(pid, syscall(__NR_getpid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid, 1, 2, 3, 4, 5, 0x0C0FFEE)); +} + +/* TODO(wad) add 64-bit versus 32-bit arg tests. */ +TEST(arg_out_of_range) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, syscall_arg(6)), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + EXPECT_EQ(-1, ret); + EXPECT_EQ(EINVAL, errno); +} + +TEST(ERRNO_valid) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | E2BIG), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(E2BIG, errno); +} + +TEST(ERRNO_zero) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* "errno" of 0 is ok. */ + EXPECT_EQ(0, read(0, NULL, 0)); +} + +TEST(ERRNO_capped) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | 4096), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + pid_t parent = getppid(); + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog); + ASSERT_EQ(0, ret); + + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(-1, read(0, NULL, 0)); + EXPECT_EQ(4095, errno); +} + +FIXTURE_DATA(TRAP) { + struct sock_fprog prog; +}; + +FIXTURE_SETUP(TRAP) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); +} + +FIXTURE_TEARDOWN(TRAP) +{ + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F_SIGNAL(TRAP, dfl, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +/* Ensure that SIGSYS overrides SIG_IGN */ +TEST_F_SIGNAL(TRAP, ign, SIGSYS) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + signal(SIGSYS, SIG_IGN); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + syscall(__NR_getpid); +} + +static struct siginfo TRAP_info; +static volatile int TRAP_nr; +static void TRAP_action(int nr, siginfo_t *info, void *void_context) +{ + memcpy(&TRAP_info, info, sizeof(TRAP_info)); + TRAP_nr = nr; +} + +TEST_F(TRAP, handler) +{ + int ret, test; + struct sigaction act; + sigset_t mask; + + memset(&act, 0, sizeof(act)); + sigemptyset(&mask); + sigaddset(&mask, SIGSYS); + + act.sa_sigaction = &TRAP_action; + act.sa_flags = SA_SIGINFO; + ret = sigaction(SIGSYS, &act, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigaction failed"); + } + ret = sigprocmask(SIG_UNBLOCK, &mask, NULL); + ASSERT_EQ(0, ret) { + TH_LOG("sigprocmask failed"); + } + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog); + ASSERT_EQ(0, ret); + TRAP_nr = 0; + memset(&TRAP_info, 0, sizeof(TRAP_info)); + /* Expect the registers to be rolled back. (nr = error) may vary + * based on arch. */ + ret = syscall(__NR_getpid); + /* Silence gcc warning about volatile. */ + test = TRAP_nr; + EXPECT_EQ(SIGSYS, test); + struct local_sigsys { + void *_call_addr; /* calling user insn */ + int _syscall; /* triggering system call number */ + unsigned int _arch; /* AUDIT_ARCH_* of syscall */ + } *sigsys = (struct local_sigsys *) +#ifdef si_syscall + &(TRAP_info.si_call_addr); +#else + &TRAP_info.si_pid; +#endif + EXPECT_EQ(__NR_getpid, sigsys->_syscall); + /* Make sure arch is non-zero. */ + EXPECT_NE(0, sigsys->_arch); + EXPECT_NE(0, (unsigned long)sigsys->_call_addr); +} + +FIXTURE_DATA(precedence) { + struct sock_fprog allow; + struct sock_fprog trace; + struct sock_fprog error; + struct sock_fprog trap; + struct sock_fprog kill; +}; + +FIXTURE_SETUP(precedence) +{ + struct sock_filter allow_insns[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter trace_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE), + }; + struct sock_filter error_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO), + }; + struct sock_filter trap_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRAP), + }; + struct sock_filter kill_insns[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 1, 0), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + }; + + memset(self, 0, sizeof(*self)); +#define FILTER_ALLOC(_x) \ + self->_x.filter = malloc(sizeof(_x##_insns)); \ + ASSERT_NE(NULL, self->_x.filter); \ + memcpy(self->_x.filter, &_x##_insns, sizeof(_x##_insns)); \ + self->_x.len = (unsigned short)ARRAY_SIZE(_x##_insns) + FILTER_ALLOC(allow); + FILTER_ALLOC(trace); + FILTER_ALLOC(error); + FILTER_ALLOC(trap); + FILTER_ALLOC(kill); +} + +FIXTURE_TEARDOWN(precedence) +{ +#define FILTER_FREE(_x) if (self->_x.filter) free(self->_x.filter) + FILTER_FREE(allow); + FILTER_FREE(trace); + FILTER_FREE(error); + FILTER_FREE(trap); + FILTER_FREE(kill); +} + +TEST_F(precedence, allow_ok) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest, SIGSYS) +{ + pid_t parent, res = 0; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + res = syscall(__NR_getppid); + EXPECT_EQ(parent, res); + /* getpid() should never return. */ + res = syscall(__NR_getpid); + EXPECT_EQ(0, res); +} + +TEST_F_SIGNAL(precedence, kill_is_highest_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->kill); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F_SIGNAL(precedence, trap_is_second_in_any_order, SIGSYS) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trap); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* getpid() should never return. */ + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, errno_is_third_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->error); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + EXPECT_EQ(0, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +TEST_F(precedence, trace_is_fourth_in_any_order) +{ + pid_t parent; + long ret; + + parent = getppid(); + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->trace); + ASSERT_EQ(0, ret); + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->allow); + ASSERT_EQ(0, ret); + /* Should work just fine. */ + EXPECT_EQ(parent, syscall(__NR_getppid)); + /* No ptracer */ + EXPECT_EQ(-1, syscall(__NR_getpid)); +} + +#ifndef PTRACE_O_TRACESECCOMP +#define PTRACE_O_TRACESECCOMP 0x00000080 +#endif + +/* Catch the Ubuntu 12.04 value error. */ +#if PTRACE_EVENT_SECCOMP != 7 +#undef PTRACE_EVENT_SECCOMP +#endif + +#ifndef PTRACE_EVENT_SECCOMP +#define PTRACE_EVENT_SECCOMP 7 +#endif + +#define IS_SECCOMP_EVENT(status) ((status >> 16) == PTRACE_EVENT_SECCOMP) +bool tracer_running; +void tracer_stop(int sig) +{ + tracer_running = false; +} + +typedef void tracer_func_t(struct __test_metadata *_metadata, + pid_t tracee, int status, void *args); + +void tracer(struct __test_metadata *_metadata, int fd, pid_t tracee, + tracer_func_t tracer_func, void *args) +{ + int ret = -1; + struct sigaction action = { + .sa_handler = tracer_stop, + }; + + /* Allow external shutdown. */ + tracer_running = true; + ASSERT_EQ(0, sigaction(SIGUSR1, &action, NULL)); + + errno = 0; + while (ret == -1 && errno != EINVAL) + ret = ptrace(PTRACE_ATTACH, tracee, NULL, 0); + ASSERT_EQ(0, ret) { + kill(tracee, SIGKILL); + } + /* Wait for attach stop */ + wait(NULL); + + ret = ptrace(PTRACE_SETOPTIONS, tracee, NULL, PTRACE_O_TRACESECCOMP); + ASSERT_EQ(0, ret) { + TH_LOG("Failed to set PTRACE_O_TRACESECCOMP"); + kill(tracee, SIGKILL); + } + ptrace(PTRACE_CONT, tracee, NULL, 0); + + /* Unblock the tracee */ + ASSERT_EQ(1, write(fd, "A", 1)); + ASSERT_EQ(0, close(fd)); + + /* Run until we're shut down. Must assert to stop execution. */ + while (tracer_running) { + int status; + + if (wait(&status) != tracee) + continue; + if (WIFSIGNALED(status) || WIFEXITED(status)) + /* Child is dead. Time to go. */ + return; + + /* Make sure this is a seccomp event. */ + ASSERT_EQ(true, IS_SECCOMP_EVENT(status)); + + tracer_func(_metadata, tracee, status, args); + + ret = ptrace(PTRACE_CONT, tracee, NULL, NULL); + ASSERT_EQ(0, ret); + } + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS : EXIT_FAILURE); +} + +/* Common tracer setup/teardown functions. */ +void cont_handler(int num) +{ } +pid_t setup_trace_fixture(struct __test_metadata *_metadata, + tracer_func_t func, void *args) +{ + char sync; + int pipefd[2]; + pid_t tracer_pid; + pid_t tracee = getpid(); + + /* Setup a pipe for clean synchronization. */ + ASSERT_EQ(0, pipe(pipefd)); + + /* Fork a child which we'll promote to tracer */ + tracer_pid = fork(); + ASSERT_LE(0, tracer_pid); + signal(SIGALRM, cont_handler); + if (tracer_pid == 0) { + close(pipefd[0]); + tracer(_metadata, pipefd[1], tracee, func, args); + syscall(__NR_exit, 0); + } + close(pipefd[1]); + prctl(PR_SET_PTRACER, tracer_pid, 0, 0, 0); + read(pipefd[0], &sync, 1); + close(pipefd[0]); + + return tracer_pid; +} +void teardown_trace_fixture(struct __test_metadata *_metadata, + pid_t tracer) +{ + if (tracer) { + int status; + /* + * Extract the exit code from the other process and + * adopt it for ourselves in case its asserts failed. + */ + ASSERT_EQ(0, kill(tracer, SIGUSR1)); + ASSERT_EQ(tracer, waitpid(tracer, &status, 0)); + if (WEXITSTATUS(status)) + _metadata->passed = 0; + } +} + +/* "poke" tracer arguments and function. */ +struct tracer_args_poke_t { + unsigned long poke_addr; +}; + +void tracer_poke(struct __test_metadata *_metadata, pid_t tracee, int status, + void *args) +{ + int ret; + unsigned long msg; + struct tracer_args_poke_t *info = (struct tracer_args_poke_t *)args; + + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + /* If this fails, don't try to recover. */ + ASSERT_EQ(0x1001, msg) { + kill(tracee, SIGKILL); + } + /* + * Poke in the message. + * Registers are not touched to try to keep this relatively arch + * agnostic. + */ + ret = ptrace(PTRACE_POKEDATA, tracee, info->poke_addr, 0x1001); + EXPECT_EQ(0, ret); +} + +FIXTURE_DATA(TRACE_poke) { + struct sock_fprog prog; + pid_t tracer; + long poked; + struct tracer_args_poke_t tracer_args; +}; + +FIXTURE_SETUP(TRACE_poke) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1001), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + self->poked = 0; + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Set up tracer args. */ + self->tracer_args.poke_addr = (unsigned long)&self->poked; + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_poke, + &self->tracer_args); +} + +FIXTURE_TEARDOWN(TRACE_poke) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_poke, read_has_side_effects) +{ + ssize_t ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + ret = read(-1, NULL, 0); + EXPECT_EQ(-1, ret); + EXPECT_EQ(0x1001, self->poked); +} + +TEST_F(TRACE_poke, getpid_runs_normally) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + EXPECT_EQ(0, self->poked); + EXPECT_NE(0, syscall(__NR_getpid)); + EXPECT_EQ(0, self->poked); +} + +#if defined(__x86_64__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_rax +# define SYSCALL_RET rax +#elif defined(__i386__) +# define ARCH_REGS struct user_regs_struct +# define SYSCALL_NUM orig_eax +# define SYSCALL_RET eax +#elif defined(__arm__) +# define ARCH_REGS struct pt_regs +# define SYSCALL_NUM ARM_r7 +# define SYSCALL_RET ARM_r0 +#elif defined(__aarch64__) +# define ARCH_REGS struct user_pt_regs +# define SYSCALL_NUM regs[8] +# define SYSCALL_RET regs[0] +#else +# error "Do not know how to find your architecture's registers and syscalls" +#endif + +/* Architecture-specific syscall fetching routine. */ +int get_syscall(struct __test_metadata *_metadata, pid_t tracee) +{ + struct iovec iov; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + EXPECT_EQ(0, ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov)) { + TH_LOG("PTRACE_GETREGSET failed"); + return -1; + } + + return regs.SYSCALL_NUM; +} + +/* Architecture-specific syscall changing routine. */ +void change_syscall(struct __test_metadata *_metadata, + pid_t tracee, int syscall) +{ + struct iovec iov; + int ret; + ARCH_REGS regs; + + iov.iov_base = ®s; + iov.iov_len = sizeof(regs); + ret = ptrace(PTRACE_GETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); + +#if defined(__x86_64__) || defined(__i386__) || defined(__aarch64__) + { + regs.SYSCALL_NUM = syscall; + } + +#elif defined(__arm__) +# ifndef PTRACE_SET_SYSCALL +# define PTRACE_SET_SYSCALL 23 +# endif + { + ret = ptrace(PTRACE_SET_SYSCALL, tracee, NULL, syscall); + EXPECT_EQ(0, ret); + } + +#else + ASSERT_EQ(1, 0) { + TH_LOG("How is the syscall changed on this architecture?"); + } +#endif + + /* If syscall is skipped, change return value. */ + if (syscall == -1) + regs.SYSCALL_RET = 1; + + ret = ptrace(PTRACE_SETREGSET, tracee, NT_PRSTATUS, &iov); + EXPECT_EQ(0, ret); +} + +void tracer_syscall(struct __test_metadata *_metadata, pid_t tracee, + int status, void *args) +{ + int ret; + unsigned long msg; + + /* Make sure we got the right message. */ + ret = ptrace(PTRACE_GETEVENTMSG, tracee, NULL, &msg); + EXPECT_EQ(0, ret); + + switch (msg) { + case 0x1002: + /* change getpid to getppid. */ + change_syscall(_metadata, tracee, __NR_getppid); + break; + case 0x1003: + /* skip gettid. */ + change_syscall(_metadata, tracee, -1); + break; + case 0x1004: + /* do nothing (allow getppid) */ + break; + default: + EXPECT_EQ(0, msg) { + TH_LOG("Unknown PTRACE_GETEVENTMSG: 0x%lx", msg); + kill(tracee, SIGKILL); + } + } + +} + +FIXTURE_DATA(TRACE_syscall) { + struct sock_fprog prog; + pid_t tracer, mytid, mypid, parent; +}; + +FIXTURE_SETUP(TRACE_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getpid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1002), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_gettid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1003), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_getppid, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE | 0x1004), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->prog, 0, sizeof(self->prog)); + self->prog.filter = malloc(sizeof(filter)); + ASSERT_NE(NULL, self->prog.filter); + memcpy(self->prog.filter, filter, sizeof(filter)); + self->prog.len = (unsigned short)ARRAY_SIZE(filter); + + /* Prepare some testable syscall results. */ + self->mytid = syscall(__NR_gettid); + ASSERT_GT(self->mytid, 0); + ASSERT_NE(self->mytid, 1) { + TH_LOG("Running this test as init is not supported. :)"); + } + + self->mypid = getpid(); + ASSERT_GT(self->mypid, 0); + ASSERT_EQ(self->mytid, self->mypid); + + self->parent = getppid(); + ASSERT_GT(self->parent, 0); + ASSERT_NE(self->parent, self->mypid); + + /* Launch tracer. */ + self->tracer = setup_trace_fixture(_metadata, tracer_syscall, NULL); +} + +FIXTURE_TEARDOWN(TRACE_syscall) +{ + teardown_trace_fixture(_metadata, self->tracer); + if (self->prog.filter) + free(self->prog.filter); +} + +TEST_F(TRACE_syscall, syscall_allowed) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getppid works as expected (no changes). */ + EXPECT_EQ(self->parent, syscall(__NR_getppid)); + EXPECT_NE(self->mypid, syscall(__NR_getppid)); +} + +TEST_F(TRACE_syscall, syscall_redirected) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* getpid has been redirected to getppid as expected. */ + EXPECT_EQ(self->parent, syscall(__NR_getpid)); + EXPECT_NE(self->mypid, syscall(__NR_getpid)); +} + +TEST_F(TRACE_syscall, syscall_dropped) +{ + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret); + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &self->prog, 0, 0); + ASSERT_EQ(0, ret); + + /* gettid has been skipped and an altered return value stored. */ + EXPECT_EQ(1, syscall(__NR_gettid)); + EXPECT_NE(self->mytid, syscall(__NR_gettid)); +} + +#ifndef __NR_seccomp +# if defined(__i386__) +# define __NR_seccomp 354 +# elif defined(__x86_64__) +# define __NR_seccomp 317 +# elif defined(__arm__) +# define __NR_seccomp 383 +# elif defined(__aarch64__) +# define __NR_seccomp 277 +# else +# warning "seccomp syscall number unknown for this architecture" +# define __NR_seccomp 0xffff +# endif +#endif + +#ifndef SECCOMP_SET_MODE_STRICT +#define SECCOMP_SET_MODE_STRICT 0 +#endif + +#ifndef SECCOMP_SET_MODE_FILTER +#define SECCOMP_SET_MODE_FILTER 1 +#endif + +#ifndef SECCOMP_FLAG_FILTER_TSYNC +#define SECCOMP_FLAG_FILTER_TSYNC 1 +#endif + +#ifndef seccomp +int seccomp(unsigned int op, unsigned int flags, struct sock_fprog *filter) +{ + errno = 0; + return syscall(__NR_seccomp, op, flags, filter); +} +#endif + +TEST(seccomp_syscall) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Reject insane operation. */ + ret = seccomp(-1, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy op value!"); + } + + /* Reject strict with flags or pointer. */ + ret = seccomp(SECCOMP_SET_MODE_STRICT, -1, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject mode strict with uargs!"); + } + + /* Reject insane args for filter. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, -1, &prog); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Did not reject crazy filter flags!"); + } + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, NULL); + EXPECT_EQ(EFAULT, errno) { + TH_LOG("Did not reject NULL filter!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, errno) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER: %s", + strerror(errno)); + } +} + +TEST(seccomp_syscall_mode_lock) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + EXPECT_EQ(0, ret) { + TH_LOG("Could not install filter!"); + } + + /* Make sure neither entry point will switch to strict. */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_STRICT, 0, 0, 0); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } + + ret = seccomp(SECCOMP_SET_MODE_STRICT, 0, NULL); + EXPECT_EQ(EINVAL, errno) { + TH_LOG("Switched to mode strict!"); + } +} + +TEST(TSYNC_first) +{ + struct sock_filter filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + long ret; + + ret = prctl(PR_SET_NO_NEW_PRIVS, 1, NULL, 0, 0); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &prog); + EXPECT_EQ(0, ret) { + TH_LOG("Could not install initial filter with TSYNC!"); + } +} + +#define TSYNC_SIBLINGS 2 +struct tsync_sibling { + pthread_t tid; + pid_t system_tid; + sem_t *started; + pthread_cond_t *cond; + pthread_mutex_t *mutex; + int diverge; + int num_waits; + struct sock_fprog *prog; + struct __test_metadata *metadata; +}; + +FIXTURE_DATA(TSYNC) { + struct sock_fprog root_prog, apply_prog; + struct tsync_sibling sibling[TSYNC_SIBLINGS]; + sem_t started; + pthread_cond_t cond; + pthread_mutex_t mutex; + int sibling_count; +}; + +FIXTURE_SETUP(TSYNC) +{ + struct sock_filter root_filter[] = { + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_filter apply_filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + + memset(&self->root_prog, 0, sizeof(self->root_prog)); + memset(&self->apply_prog, 0, sizeof(self->apply_prog)); + memset(&self->sibling, 0, sizeof(self->sibling)); + self->root_prog.filter = malloc(sizeof(root_filter)); + ASSERT_NE(NULL, self->root_prog.filter); + memcpy(self->root_prog.filter, &root_filter, sizeof(root_filter)); + self->root_prog.len = (unsigned short)ARRAY_SIZE(root_filter); + + self->apply_prog.filter = malloc(sizeof(apply_filter)); + ASSERT_NE(NULL, self->apply_prog.filter); + memcpy(self->apply_prog.filter, &apply_filter, sizeof(apply_filter)); + self->apply_prog.len = (unsigned short)ARRAY_SIZE(apply_filter); + + self->sibling_count = 0; + pthread_mutex_init(&self->mutex, NULL); + pthread_cond_init(&self->cond, NULL); + sem_init(&self->started, 0, 0); + self->sibling[0].tid = 0; + self->sibling[0].cond = &self->cond; + self->sibling[0].started = &self->started; + self->sibling[0].mutex = &self->mutex; + self->sibling[0].diverge = 0; + self->sibling[0].num_waits = 1; + self->sibling[0].prog = &self->root_prog; + self->sibling[0].metadata = _metadata; + self->sibling[1].tid = 0; + self->sibling[1].cond = &self->cond; + self->sibling[1].started = &self->started; + self->sibling[1].mutex = &self->mutex; + self->sibling[1].diverge = 0; + self->sibling[1].prog = &self->root_prog; + self->sibling[1].num_waits = 1; + self->sibling[1].metadata = _metadata; +} + +FIXTURE_TEARDOWN(TSYNC) +{ + int sib = 0; + + if (self->root_prog.filter) + free(self->root_prog.filter); + if (self->apply_prog.filter) + free(self->apply_prog.filter); + + for ( ; sib < self->sibling_count; ++sib) { + struct tsync_sibling *s = &self->sibling[sib]; + void *status; + + if (!s->tid) + continue; + if (pthread_kill(s->tid, 0)) { + pthread_cancel(s->tid); + pthread_join(s->tid, &status); + } + } + pthread_mutex_destroy(&self->mutex); + pthread_cond_destroy(&self->cond); + sem_destroy(&self->started); +} + +void *tsync_sibling(void *data) +{ + long ret = 0; + struct tsync_sibling *me = data; + + me->system_tid = syscall(__NR_gettid); + + pthread_mutex_lock(me->mutex); + if (me->diverge) { + /* Just re-apply the root prog to fork the tree */ + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, + me->prog, 0, 0); + } + sem_post(me->started); + /* Return outside of started so parent notices failures. */ + if (ret) { + pthread_mutex_unlock(me->mutex); + return (void *)SIBLING_EXIT_FAILURE; + } + do { + pthread_cond_wait(me->cond, me->mutex); + me->num_waits = me->num_waits - 1; + } while (me->num_waits); + pthread_mutex_unlock(me->mutex); + + ret = prctl(PR_GET_NO_NEW_PRIVS, 0, 0, 0, 0); + if (!ret) + return (void *)SIBLING_EXIT_NEWPRIVS; + read(0, NULL, 0); + return (void *)SIBLING_EXIT_UNKILLED; +} + +void tsync_start_sibling(struct tsync_sibling *sibling) +{ + pthread_create(&sibling->tid, NULL, tsync_sibling, (void *)sibling); +} + +TEST_F(TSYNC, siblings_fail_prctl) +{ + long ret; + void *status; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_prctl, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ERRNO | EINVAL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* Check prctl failure detection by requesting sib 0 diverge. */ + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &prog); + ASSERT_EQ(0, ret) { + TH_LOG("setting filter failed"); + } + + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Signal the threads to clean up*/ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure diverging sibling failed to call prctl. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_FAILURE, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_ancestor) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_sibling_want_nnp) +{ + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + /* Tell the siblings to test no policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both upset about lacking nnp. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_NEWPRIVS, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_no_filter) +{ + long ret; + void *status; + + /* start siblings before any prctl() operations */ + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Could install filter on all threads!"); + } + + /* Tell the siblings to test the policy */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both killed and don't exit cleanly. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(0x0, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(0x0, (long)status); +} + +TEST_F(TSYNC, two_siblings_with_one_divergence) +{ + long ret; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(self->sibling[0].system_tid, ret) { + TH_LOG("Did not fail on diverged sibling."); + } + + /* Wake the threads */ + pthread_mutex_lock(&self->mutex); + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + + /* Ensure they are both unkilled. */ + pthread_join(self->sibling[0].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + pthread_join(self->sibling[1].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); +} + +TEST_F(TSYNC, two_siblings_not_under_filter) +{ + long ret, sib; + void *status; + + ASSERT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + /* + * Sibling 0 will have its own seccomp policy + * and Sibling 1 will not be under seccomp at + * all. Sibling 1 will enter seccomp and 0 + * will cause failure. + */ + self->sibling[0].diverge = 1; + tsync_start_sibling(&self->sibling[0]); + tsync_start_sibling(&self->sibling[1]); + + while (self->sibling_count < TSYNC_SIBLINGS) { + sem_wait(&self->started); + self->sibling_count++; + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, 0, &self->root_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Kernel does not support SECCOMP_SET_MODE_FILTER!"); + } + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(ret, self->sibling[0].system_tid) { + TH_LOG("Did not fail on diverged sibling."); + } + sib = 1; + if (ret == self->sibling[0].system_tid) + sib = 0; + + pthread_mutex_lock(&self->mutex); + + /* Increment the other siblings num_waits so we can clean up + * the one we just saw. + */ + self->sibling[!sib].num_waits += 1; + + /* Signal the thread to clean up*/ + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(SIBLING_EXIT_UNKILLED, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + /* Switch to the remaining sibling */ + sib = !sib; + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret) { + TH_LOG("Expected the remaining sibling to sync"); + }; + + pthread_mutex_lock(&self->mutex); + + /* If remaining sibling didn't have a chance to wake up during + * the first broadcast, manually reduce the num_waits now. + */ + if (self->sibling[sib].num_waits > 1) + self->sibling[sib].num_waits = 1; + ASSERT_EQ(0, pthread_cond_broadcast(&self->cond)) { + TH_LOG("cond broadcast non-zero"); + } + pthread_mutex_unlock(&self->mutex); + pthread_join(self->sibling[sib].tid, &status); + EXPECT_EQ(0, (long)status); + /* Poll for actual task death. pthread_join doesn't guarantee it. */ + while (!kill(self->sibling[sib].system_tid, 0)) + sleep(0.1); + + ret = seccomp(SECCOMP_SET_MODE_FILTER, SECCOMP_FLAG_FILTER_TSYNC, + &self->apply_prog); + ASSERT_EQ(0, ret); /* just us chickens */ +} + +/* Make sure restarted syscalls are seen directly as "restart_syscall". */ +TEST(syscall_restart) +{ + long ret; + unsigned long msg; + pid_t child_pid; + int pipefd[2]; + int status; + siginfo_t info = { }; + struct sock_filter filter[] = { + BPF_STMT(BPF_LD|BPF_W|BPF_ABS, + offsetof(struct seccomp_data, nr)), + +#ifdef __NR_sigreturn + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_sigreturn, 6, 0), +#endif + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_read, 5, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_exit, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_rt_sigreturn, 3, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_poll, 4, 0), + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_restart_syscall, 4, 0), + + /* Allow __NR_write for easy logging. */ + BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, __NR_write, 0, 1), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_ALLOW), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_KILL), + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x100), /* poll */ + BPF_STMT(BPF_RET|BPF_K, SECCOMP_RET_TRACE|0x200), /* restart */ + }; + struct sock_fprog prog = { + .len = (unsigned short)ARRAY_SIZE(filter), + .filter = filter, + }; + + ASSERT_EQ(0, pipe(pipefd)); + + child_pid = fork(); + ASSERT_LE(0, child_pid); + if (child_pid == 0) { + /* Child uses EXPECT not ASSERT to deliver status correctly. */ + char buf = ' '; + struct pollfd fds = { + .fd = pipefd[0], + .events = POLLIN, + }; + + /* Attach parent as tracer and stop. */ + EXPECT_EQ(0, ptrace(PTRACE_TRACEME)); + EXPECT_EQ(0, raise(SIGSTOP)); + + EXPECT_EQ(0, close(pipefd[1])); + + EXPECT_EQ(0, prctl(PR_SET_NO_NEW_PRIVS, 1, 0, 0, 0)) { + TH_LOG("Kernel does not support PR_SET_NO_NEW_PRIVS!"); + } + + ret = prctl(PR_SET_SECCOMP, SECCOMP_MODE_FILTER, &prog, 0, 0); + EXPECT_EQ(0, ret) { + TH_LOG("Failed to install filter!"); + } + + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed to read() sync from parent"); + } + EXPECT_EQ('.', buf) { + TH_LOG("Failed to get sync data from read()"); + } + + /* Start poll to be interrupted. */ + errno = 0; + EXPECT_EQ(1, poll(&fds, 1, -1)) { + TH_LOG("Call to poll() failed (errno %d)", errno); + } + + /* Read final sync from parent. */ + EXPECT_EQ(1, read(pipefd[0], &buf, 1)) { + TH_LOG("Failed final read() from parent"); + } + EXPECT_EQ('!', buf) { + TH_LOG("Failed to get final data from read()"); + } + + /* Directly report the status of our test harness results. */ + syscall(__NR_exit, _metadata->passed ? EXIT_SUCCESS + : EXIT_FAILURE); + } + EXPECT_EQ(0, close(pipefd[0])); + + /* Attach to child, setup options, and release. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(0, ptrace(PTRACE_SETOPTIONS, child_pid, NULL, + PTRACE_O_TRACESECCOMP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], ".", 1)); + + /* Wait for poll() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x100, msg); + EXPECT_EQ(__NR_poll, get_syscall(_metadata, child_pid)); + + /* Might as well check siginfo for sanity while we're here. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + ASSERT_EQ(SIGTRAP, info.si_signo); + ASSERT_EQ(SIGTRAP | (PTRACE_EVENT_SECCOMP << 8), info.si_code); + EXPECT_EQ(0, info.si_errno); + EXPECT_EQ(getuid(), info.si_uid); + /* Verify signal delivery came from child (seccomp-triggered). */ + EXPECT_EQ(child_pid, info.si_pid); + + /* Interrupt poll with SIGSTOP (which we'll need to handle). */ + ASSERT_EQ(0, kill(child_pid, SIGSTOP)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGSTOP, WSTOPSIG(status)); + /* Verify signal delivery came from parent now. */ + ASSERT_EQ(0, ptrace(PTRACE_GETSIGINFO, child_pid, NULL, &info)); + EXPECT_EQ(getpid(), info.si_pid); + + /* Restart poll with SIGCONT, which triggers restart_syscall. */ + ASSERT_EQ(0, kill(child_pid, SIGCONT)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGCONT, WSTOPSIG(status)); + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + + /* Wait for restart_syscall() to start. */ + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + ASSERT_EQ(true, WIFSTOPPED(status)); + ASSERT_EQ(SIGTRAP, WSTOPSIG(status)); + ASSERT_EQ(PTRACE_EVENT_SECCOMP, (status >> 16)); + ASSERT_EQ(0, ptrace(PTRACE_GETEVENTMSG, child_pid, NULL, &msg)); + ASSERT_EQ(0x200, msg); + ret = get_syscall(_metadata, child_pid); +#if defined(__arm__) + /* FIXME: ARM does not expose true syscall in registers. */ + EXPECT_EQ(__NR_poll, ret); +#else + EXPECT_EQ(__NR_restart_syscall, ret); +#endif + + /* Write again to end poll. */ + ASSERT_EQ(0, ptrace(PTRACE_CONT, child_pid, NULL, 0)); + ASSERT_EQ(1, write(pipefd[1], "!", 1)); + EXPECT_EQ(0, close(pipefd[1])); + + ASSERT_EQ(child_pid, waitpid(child_pid, &status, 0)); + if (WIFSIGNALED(status) || WEXITSTATUS(status)) + _metadata->passed = 0; +} + +/* + * TODO: + * - add microbenchmarks + * - expand NNP testing + * - better arch-specific TRACE and TRAP handlers. + * - endianness checking when appropriate + * - 64-bit arg prodding + * - arch value testing (x86 modes especially) + * - ... + */ + +TEST_HARNESS_MAIN diff --git a/tools/testing/selftests/seccomp/test_harness.h b/tools/testing/selftests/seccomp/test_harness.h new file mode 100644 index 000000000000..977a6afc4489 --- /dev/null +++ b/tools/testing/selftests/seccomp/test_harness.h @@ -0,0 +1,537 @@ +/* + * Copyright (c) 2012 The Chromium OS Authors. All rights reserved. + * Use of this source code is governed by the GPLv2 license. + * + * test_harness.h: simple C unit test helper. + * + * Usage: + * #include "test_harness.h" + * TEST(standalone_test) { + * do_some_stuff; + * EXPECT_GT(10, stuff) { + * stuff_state_t state; + * enumerate_stuff_state(&state); + * TH_LOG("expectation failed with state: %s", state.msg); + * } + * more_stuff; + * ASSERT_NE(some_stuff, NULL) TH_LOG("how did it happen?!"); + * last_stuff; + * EXPECT_EQ(0, last_stuff); + * } + * + * FIXTURE(my_fixture) { + * mytype_t *data; + * int awesomeness_level; + * }; + * FIXTURE_SETUP(my_fixture) { + * self->data = mytype_new(); + * ASSERT_NE(NULL, self->data); + * } + * FIXTURE_TEARDOWN(my_fixture) { + * mytype_free(self->data); + * } + * TEST_F(my_fixture, data_is_good) { + * EXPECT_EQ(1, is_my_data_good(self->data)); + * } + * + * TEST_HARNESS_MAIN + * + * API inspired by code.google.com/p/googletest + */ +#ifndef TEST_HARNESS_H_ +#define TEST_HARNESS_H_ + +#define _GNU_SOURCE +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sys/types.h> +#include <sys/wait.h> +#include <unistd.h> + +/* All exported functionality should be declared through this macro. */ +#define TEST_API(x) _##x + +/* + * Exported APIs + */ + +/* TEST(name) { implementation } + * Defines a test by name. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST TEST_API(TEST) + +/* TEST_SIGNAL(name, signal) { implementation } + * Defines a test by name and the expected term signal. + * Names must be unique and tests must not be run in parallel. The + * implementation containing block is a function and scoping should be treated + * as such. Returning early may be performed with a bare "return;" statement. + * + * EXPECT_* and ASSERT_* are valid in a TEST() { } context. + */ +#define TEST_SIGNAL TEST_API(TEST_SIGNAL) + +/* FIXTURE(datatype name) { + * type property1; + * ... + * }; + * Defines the data provided to TEST_F()-defined tests as |self|. It should be + * populated and cleaned up using FIXTURE_SETUP and FIXTURE_TEARDOWN. + */ +#define FIXTURE TEST_API(FIXTURE) + +/* FIXTURE_DATA(datatype name) + * This call may be used when the type of the fixture data + * is needed. In general, this should not be needed unless + * the |self| is being passed to a helper directly. + */ +#define FIXTURE_DATA TEST_API(FIXTURE_DATA) + +/* FIXTURE_SETUP(fixture name) { implementation } + * Populates the required "setup" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation. + * + * ASSERT_* are valid for use in this context and will prempt the execution + * of any dependent fixture tests. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_SETUP TEST_API(FIXTURE_SETUP) + +/* FIXTURE_TEARDOWN(fixture name) { implementation } + * Populates the required "teardown" function for a fixture. An instance of the + * datatype defined with _FIXTURE_DATA will be exposed as |self| for the + * implementation to clean up. + * + * A bare "return;" statement may be used to return early. + */ +#define FIXTURE_TEARDOWN TEST_API(FIXTURE_TEARDOWN) + +/* TEST_F(fixture, name) { implementation } + * Defines a test that depends on a fixture (e.g., is part of a test case). + * Very similar to TEST() except that |self| is the setup instance of fixture's + * datatype exposed for use by the implementation. + */ +#define TEST_F TEST_API(TEST_F) + +#define TEST_F_SIGNAL TEST_API(TEST_F_SIGNAL) + +/* Use once to append a main() to the test file. E.g., + * TEST_HARNESS_MAIN + */ +#define TEST_HARNESS_MAIN TEST_API(TEST_HARNESS_MAIN) + +/* + * Operators for use in TEST and TEST_F. + * ASSERT_* calls will stop test execution immediately. + * EXPECT_* calls will emit a failure warning, note it, and continue. + */ + +/* ASSERT_EQ(expected, measured): expected == measured */ +#define ASSERT_EQ TEST_API(ASSERT_EQ) +/* ASSERT_NE(expected, measured): expected != measured */ +#define ASSERT_NE TEST_API(ASSERT_NE) +/* ASSERT_LT(expected, measured): expected < measured */ +#define ASSERT_LT TEST_API(ASSERT_LT) +/* ASSERT_LE(expected, measured): expected <= measured */ +#define ASSERT_LE TEST_API(ASSERT_LE) +/* ASSERT_GT(expected, measured): expected > measured */ +#define ASSERT_GT TEST_API(ASSERT_GT) +/* ASSERT_GE(expected, measured): expected >= measured */ +#define ASSERT_GE TEST_API(ASSERT_GE) +/* ASSERT_NULL(measured): NULL == measured */ +#define ASSERT_NULL TEST_API(ASSERT_NULL) +/* ASSERT_TRUE(measured): measured != 0 */ +#define ASSERT_TRUE TEST_API(ASSERT_TRUE) +/* ASSERT_FALSE(measured): measured == 0 */ +#define ASSERT_FALSE TEST_API(ASSERT_FALSE) +/* ASSERT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define ASSERT_STREQ TEST_API(ASSERT_STREQ) +/* ASSERT_STRNE(expected, measured): strcmp(expected, measured) */ +#define ASSERT_STRNE TEST_API(ASSERT_STRNE) +/* EXPECT_EQ(expected, measured): expected == measured */ +#define EXPECT_EQ TEST_API(EXPECT_EQ) +/* EXPECT_NE(expected, measured): expected != measured */ +#define EXPECT_NE TEST_API(EXPECT_NE) +/* EXPECT_LT(expected, measured): expected < measured */ +#define EXPECT_LT TEST_API(EXPECT_LT) +/* EXPECT_LE(expected, measured): expected <= measured */ +#define EXPECT_LE TEST_API(EXPECT_LE) +/* EXPECT_GT(expected, measured): expected > measured */ +#define EXPECT_GT TEST_API(EXPECT_GT) +/* EXPECT_GE(expected, measured): expected >= measured */ +#define EXPECT_GE TEST_API(EXPECT_GE) +/* EXPECT_NULL(measured): NULL == measured */ +#define EXPECT_NULL TEST_API(EXPECT_NULL) +/* EXPECT_TRUE(measured): 0 != measured */ +#define EXPECT_TRUE TEST_API(EXPECT_TRUE) +/* EXPECT_FALSE(measured): 0 == measured */ +#define EXPECT_FALSE TEST_API(EXPECT_FALSE) +/* EXPECT_STREQ(expected, measured): !strcmp(expected, measured) */ +#define EXPECT_STREQ TEST_API(EXPECT_STREQ) +/* EXPECT_STRNE(expected, measured): strcmp(expected, measured) */ +#define EXPECT_STRNE TEST_API(EXPECT_STRNE) + +/* TH_LOG(format, ...) + * Optional debug logging function available for use in tests. + * Logging may be enabled or disabled by defining TH_LOG_ENABLED. + * E.g., #define TH_LOG_ENABLED 1 + * If no definition is provided, logging is enabled by default. + */ +#define TH_LOG TEST_API(TH_LOG) + +/* + * Internal implementation. + * + */ + +/* Utilities exposed to the test definitions */ +#ifndef TH_LOG_STREAM +# define TH_LOG_STREAM stderr +#endif + +#ifndef TH_LOG_ENABLED +# define TH_LOG_ENABLED 1 +#endif + +#define _TH_LOG(fmt, ...) do { \ + if (TH_LOG_ENABLED) \ + __TH_LOG(fmt, ##__VA_ARGS__); \ +} while (0) + +/* Unconditional logger for internal use. */ +#define __TH_LOG(fmt, ...) \ + fprintf(TH_LOG_STREAM, "%s:%d:%s:" fmt "\n", \ + __FILE__, __LINE__, _metadata->name, ##__VA_ARGS__) + +/* Defines the test function and creates the registration stub. */ +#define _TEST(test_name) __TEST_IMPL(test_name, -1) + +#define _TEST_SIGNAL(test_name, signal) __TEST_IMPL(test_name, signal) + +#define __TEST_IMPL(test_name, _signal) \ + static void test_name(struct __test_metadata *_metadata); \ + static struct __test_metadata _##test_name##_object = \ + { name: "global." #test_name, \ + fn: &test_name, termsig: _signal }; \ + static void __attribute__((constructor)) _register_##test_name(void) \ + { \ + __register_test(&_##test_name##_object); \ + } \ + static void test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata) + +/* Wraps the struct name so we have one less argument to pass around. */ +#define _FIXTURE_DATA(fixture_name) struct _test_data_##fixture_name + +/* Called once per fixture to setup the data and register. */ +#define _FIXTURE(fixture_name) \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_data(void) \ + { \ + __fixture_count++; \ + } \ + _FIXTURE_DATA(fixture_name) + +/* Prepares the setup function for the fixture. |_metadata| is included + * so that ASSERT_* work as a convenience. + */ +#define _FIXTURE_SETUP(fixture_name) \ + void fixture_name##_setup( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) +#define _FIXTURE_TEARDOWN(fixture_name) \ + void fixture_name##_teardown( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Emits test registration and helpers for fixture-based test + * cases. + * TODO(wad) register fixtures on dedicated test lists. + */ +#define _TEST_F(fixture_name, test_name) \ + __TEST_F_IMPL(fixture_name, test_name, -1) + +#define _TEST_F_SIGNAL(fixture_name, test_name, signal) \ + __TEST_F_IMPL(fixture_name, test_name, signal) + +#define __TEST_F_IMPL(fixture_name, test_name, signal) \ + static void fixture_name##_##test_name( \ + struct __test_metadata *_metadata, \ + _FIXTURE_DATA(fixture_name) *self); \ + static inline void wrapper_##fixture_name##_##test_name( \ + struct __test_metadata *_metadata) \ + { \ + /* fixture data is alloced, setup, and torn down per call. */ \ + _FIXTURE_DATA(fixture_name) self; \ + memset(&self, 0, sizeof(_FIXTURE_DATA(fixture_name))); \ + fixture_name##_setup(_metadata, &self); \ + /* Let setup failure terminate early. */ \ + if (!_metadata->passed) \ + return; \ + fixture_name##_##test_name(_metadata, &self); \ + fixture_name##_teardown(_metadata, &self); \ + } \ + static struct __test_metadata \ + _##fixture_name##_##test_name##_object = { \ + name: #fixture_name "." #test_name, \ + fn: &wrapper_##fixture_name##_##test_name, \ + termsig: signal, \ + }; \ + static void __attribute__((constructor)) \ + _register_##fixture_name##_##test_name(void) \ + { \ + __register_test(&_##fixture_name##_##test_name##_object); \ + } \ + static void fixture_name##_##test_name( \ + struct __test_metadata __attribute__((unused)) *_metadata, \ + _FIXTURE_DATA(fixture_name) __attribute__((unused)) *self) + +/* Exports a simple wrapper to run the test harness. */ +#define _TEST_HARNESS_MAIN \ + static void __attribute__((constructor)) \ + __constructor_order_last(void) \ + { \ + if (!__constructor_order) \ + __constructor_order = _CONSTRUCTOR_ORDER_BACKWARD; \ + } \ + int main(int argc, char **argv) { \ + return test_harness_run(argc, argv); \ + } + +#define _ASSERT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 1) +#define _ASSERT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 1) +#define _ASSERT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 1) +#define _ASSERT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 1) +#define _ASSERT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 1) +#define _ASSERT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 1) +#define _ASSERT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 1) + +#define _ASSERT_TRUE(_seen) \ + _ASSERT_NE(0, _seen) +#define _ASSERT_FALSE(_seen) \ + _ASSERT_EQ(0, _seen) +#define _ASSERT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 1) +#define _ASSERT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 1) + +#define _EXPECT_EQ(_expected, _seen) \ + __EXPECT(_expected, _seen, ==, 0) +#define _EXPECT_NE(_expected, _seen) \ + __EXPECT(_expected, _seen, !=, 0) +#define _EXPECT_LT(_expected, _seen) \ + __EXPECT(_expected, _seen, <, 0) +#define _EXPECT_LE(_expected, _seen) \ + __EXPECT(_expected, _seen, <=, 0) +#define _EXPECT_GT(_expected, _seen) \ + __EXPECT(_expected, _seen, >, 0) +#define _EXPECT_GE(_expected, _seen) \ + __EXPECT(_expected, _seen, >=, 0) + +#define _EXPECT_NULL(_seen) \ + __EXPECT(NULL, _seen, ==, 0) +#define _EXPECT_TRUE(_seen) \ + _EXPECT_NE(0, _seen) +#define _EXPECT_FALSE(_seen) \ + _EXPECT_EQ(0, _seen) + +#define _EXPECT_STREQ(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, ==, 0) +#define _EXPECT_STRNE(_expected, _seen) \ + __EXPECT_STR(_expected, _seen, !=, 0) + +#define ARRAY_SIZE(a) (sizeof(a) / sizeof(a[0])) + +/* Support an optional handler after and ASSERT_* or EXPECT_*. The approach is + * not thread-safe, but it should be fine in most sane test scenarios. + * + * Using __bail(), which optionally abort()s, is the easiest way to early + * return while still providing an optional block to the API consumer. + */ +#define OPTIONAL_HANDLER(_assert) \ + for (; _metadata->trigger; _metadata->trigger = __bail(_assert)) + +#define __EXPECT(_expected, _seen, _t, _assert) do { \ + /* Avoid multiple evaluation of the cases */ \ + __typeof__(_expected) __exp = (_expected); \ + __typeof__(_seen) __seen = (_seen); \ + if (!(__exp _t __seen)) { \ + unsigned long long __exp_print = 0; \ + unsigned long long __seen_print = 0; \ + /* Avoid casting complaints the scariest way we can. */ \ + memcpy(&__exp_print, &__exp, sizeof(__exp)); \ + memcpy(&__seen_print, &__seen, sizeof(__seen)); \ + __TH_LOG("Expected %s (%llu) %s %s (%llu)", \ + #_expected, __exp_print, #_t, \ + #_seen, __seen_print); \ + _metadata->passed = 0; \ + /* Ensure the optional handler is triggered */ \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +#define __EXPECT_STR(_expected, _seen, _t, _assert) do { \ + const char *__exp = (_expected); \ + const char *__seen = (_seen); \ + if (!(strcmp(__exp, __seen) _t 0)) { \ + __TH_LOG("Expected '%s' %s '%s'.", __exp, #_t, __seen); \ + _metadata->passed = 0; \ + _metadata->trigger = 1; \ + } \ +} while (0); OPTIONAL_HANDLER(_assert) + +/* Contains all the information for test execution and status checking. */ +struct __test_metadata { + const char *name; + void (*fn)(struct __test_metadata *); + int termsig; + int passed; + int trigger; /* extra handler after the evaluation */ + struct __test_metadata *prev, *next; +}; + +/* Storage for the (global) tests to be run. */ +static struct __test_metadata *__test_list; +static unsigned int __test_count; +static unsigned int __fixture_count; +static int __constructor_order; + +#define _CONSTRUCTOR_ORDER_FORWARD 1 +#define _CONSTRUCTOR_ORDER_BACKWARD -1 + +/* + * Since constructors are called in reverse order, reverse the test + * list so tests are run in source declaration order. + * https://gcc.gnu.org/onlinedocs/gccint/Initialization.html + * However, it seems not all toolchains do this correctly, so use + * __constructor_order to detect which direction is called first + * and adjust list building logic to get things running in the right + * direction. + */ +static inline void __register_test(struct __test_metadata *t) +{ + __test_count++; + /* Circular linked list where only prev is circular. */ + if (__test_list == NULL) { + __test_list = t; + t->next = NULL; + t->prev = t; + return; + } + if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { + t->next = NULL; + t->prev = __test_list->prev; + t->prev->next = t; + __test_list->prev = t; + } else { + t->next = __test_list; + t->next->prev = t; + t->prev = t; + __test_list = t; + } +} + +static inline int __bail(int for_realz) +{ + if (for_realz) + abort(); + return 0; +} + +void __run_test(struct __test_metadata *t) +{ + pid_t child_pid; + int status; + + t->passed = 1; + t->trigger = 0; + printf("[ RUN ] %s\n", t->name); + child_pid = fork(); + if (child_pid < 0) { + printf("ERROR SPAWNING TEST CHILD\n"); + t->passed = 0; + } else if (child_pid == 0) { + t->fn(t); + _exit(t->passed); + } else { + /* TODO(wad) add timeout support. */ + waitpid(child_pid, &status, 0); + if (WIFEXITED(status)) { + t->passed = t->termsig == -1 ? WEXITSTATUS(status) : 0; + if (t->termsig != -1) { + fprintf(TH_LOG_STREAM, + "%s: Test exited normally " + "instead of by signal (code: %d)\n", + t->name, + WEXITSTATUS(status)); + } + } else if (WIFSIGNALED(status)) { + t->passed = 0; + if (WTERMSIG(status) == SIGABRT) { + fprintf(TH_LOG_STREAM, + "%s: Test terminated by assertion\n", + t->name); + } else if (WTERMSIG(status) == t->termsig) { + t->passed = 1; + } else { + fprintf(TH_LOG_STREAM, + "%s: Test terminated unexpectedly " + "by signal %d\n", + t->name, + WTERMSIG(status)); + } + } else { + fprintf(TH_LOG_STREAM, + "%s: Test ended in some other way [%u]\n", + t->name, + status); + } + } + printf("[ %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name); +} + +static int test_harness_run(int __attribute__((unused)) argc, + char __attribute__((unused)) **argv) +{ + struct __test_metadata *t; + int ret = 0; + unsigned int count = 0; + unsigned int pass_count = 0; + + /* TODO(wad) add optional arguments similar to gtest. */ + printf("[==========] Running %u tests from %u test cases.\n", + __test_count, __fixture_count + 1); + for (t = __test_list; t; t = t->next) { + count++; + __run_test(t); + if (t->passed) + pass_count++; + else + ret = 1; + } + printf("[==========] %u / %u tests passed.\n", pass_count, count); + printf("[ %s ]\n", (ret ? "FAILED" : "PASSED")); + return ret; +} + +static void __attribute__((constructor)) __constructor_order_first(void) +{ + if (!__constructor_order) + __constructor_order = _CONSTRUCTOR_ORDER_FORWARD; +} + +#endif /* TEST_HARNESS_H_ */ diff --git a/tools/testing/selftests/size/Makefile b/tools/testing/selftests/size/Makefile index 04dc25e4fa92..bbd0b5398b61 100644 --- a/tools/testing/selftests/size/Makefile +++ b/tools/testing/selftests/size/Makefile @@ -1,12 +1,11 @@ -CC = $(CROSS_COMPILE)gcc - all: get_size get_size: get_size.c $(CC) -static -ffreestanding -nostartfiles -s $< -o $@ -run_tests: all - ./get_size +TEST_PROGS := get_size + +include ../lib.mk clean: $(RM) get_size diff --git a/tools/testing/selftests/sysctl/Makefile b/tools/testing/selftests/sysctl/Makefile index 0a92adaf0865..b3c33e071f10 100644 --- a/tools/testing/selftests/sysctl/Makefile +++ b/tools/testing/selftests/sysctl/Makefile @@ -4,16 +4,10 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests". all: -# Allow specific tests to be selected. -test_num: - @/bin/sh ./run_numerictests +TEST_PROGS := run_numerictests run_stringtests +TEST_FILES := common_tests -test_string: - @/bin/sh ./run_stringtests - -run_tests: all test_num test_string +include ../lib.mk # Nothing to clean up. clean: - -.PHONY: all run_tests clean test_num test_string diff --git a/tools/testing/selftests/sysctl/run_numerictests b/tools/testing/selftests/sysctl/run_numerictests index 8510f93f2d14..8510f93f2d14 100644..100755 --- a/tools/testing/selftests/sysctl/run_numerictests +++ b/tools/testing/selftests/sysctl/run_numerictests diff --git a/tools/testing/selftests/sysctl/run_stringtests b/tools/testing/selftests/sysctl/run_stringtests index 90a9293d520c..90a9293d520c 100644..100755 --- a/tools/testing/selftests/sysctl/run_stringtests +++ b/tools/testing/selftests/sysctl/run_stringtests diff --git a/tools/testing/selftests/timers/.gitignore b/tools/testing/selftests/timers/.gitignore new file mode 100644 index 000000000000..ced998151bc4 --- /dev/null +++ b/tools/testing/selftests/timers/.gitignore @@ -0,0 +1,18 @@ +alarmtimer-suspend +change_skew +clocksource-switch +inconsistency-check +leap-a-day +leapcrash +mqueue-lat +nanosleep +nsleep-lat +posix_timers +raw_skew +rtctest +set-2038 +set-tai +set-timer-lat +skew_consistency +threadtest +valid-adjtimex diff --git a/tools/testing/selftests/timers/Makefile b/tools/testing/selftests/timers/Makefile index eb2859f4ad21..89a3f44bf355 100644 --- a/tools/testing/selftests/timers/Makefile +++ b/tools/testing/selftests/timers/Makefile @@ -1,8 +1,36 @@ -all: - gcc posix_timers.c -o posix_timers -lrt +CC = $(CROSS_COMPILE)gcc +BUILD_FLAGS = -DKTEST +CFLAGS += -O3 -Wl,-no-as-needed -Wall $(BUILD_FLAGS) +LDFLAGS += -lrt -lpthread -run_tests: all - ./posix_timers +# these are all "safe" tests that don't modify +# system time or require escalated privledges +TEST_PROGS = posix_timers nanosleep nsleep-lat set-timer-lat mqueue-lat \ + inconsistency-check raw_skew threadtest rtctest + +TEST_PROGS_EXTENDED = alarmtimer-suspend valid-adjtimex change_skew \ + skew_consistency clocksource-switch leap-a-day \ + leapcrash set-tai set-2038 + +bins = $(TEST_PROGS) $(TEST_PROGS_EXTENDED) + +all: ${bins} + +include ../lib.mk + +# these tests require escalated privledges +# and may modify the system time or trigger +# other behavior like suspend +run_destructive_tests: run_tests + ./alarmtimer-suspend + ./valid-adjtimex + ./change_skew + ./skew_consistency + ./clocksource-switch + ./leap-a-day -s -i 10 + ./leapcrash + ./set-tai + ./set-2038 clean: - rm -f ./posix_timers + rm -f ${bins} diff --git a/tools/testing/selftests/timers/alarmtimer-suspend.c b/tools/testing/selftests/timers/alarmtimer-suspend.c new file mode 100644 index 000000000000..72cacf5383dd --- /dev/null +++ b/tools/testing/selftests/timers/alarmtimer-suspend.c @@ -0,0 +1,189 @@ +/* alarmtimer suspend test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This test makes sure the alarmtimer & RTC wakeup code is + * functioning. + * + * To build: + * $ gcc alarmtimer-suspend.c -o alarmtimer-suspend -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNREASONABLE_LAT (NSEC_PER_SEC * 5) /* hopefully we resume in 5 secs */ + +#define SUSPEND_SECS 15 +int alarmcount; +int alarm_clock_id; +struct timespec start_time; + + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int final_ret = 0; + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(alarm_clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * SUSPEND_SECS * alarmcount; + + printf("ALARM(%i): %ld:%ld latency: %lld ns ", alarmcount, ts.tv_sec, + ts.tv_nsec, delta_ns); + + if (delta_ns > UNREASONABLE_LAT) { + printf("[FAIL]\n"); + final_ret = -1; + } else + printf("[OK]\n"); + +} + +int main(void) +{ + timer_t tm1; + struct itimerspec its1, its2; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + + for (alarm_clock_id = CLOCK_REALTIME_ALARM; + alarm_clock_id <= CLOCK_BOOTTIME_ALARM; + alarm_clock_id++) { + + alarmcount = 0; + if (timer_create(alarm_clock_id, &se, &tm1) == -1) { + printf("timer_create failled, %s unspported?\n", + clockstring(alarm_clock_id)); + break; + } + + clock_gettime(alarm_clock_id, &start_time); + printf("Start time (%s): %ld:%ld\n", clockstring(alarm_clock_id), + start_time.tv_sec, start_time.tv_nsec); + printf("Setting alarm for every %i seconds\n", SUSPEND_SECS); + its1.it_value = start_time; + its1.it_value.tv_sec += SUSPEND_SECS; + its1.it_interval.tv_sec = SUSPEND_SECS; + its1.it_interval.tv_nsec = 0; + + timer_settime(tm1, TIMER_ABSTIME, &its1, &its2); + + while (alarmcount < 5) + sleep(1); /* First 5 alarms, do nothing */ + + printf("Starting suspend loops\n"); + while (alarmcount < 10) { + int ret; + + sleep(3); + ret = system("echo mem > /sys/power/state"); + if (ret) + break; + } + timer_delete(tm1); + } + if (final_ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/change_skew.c b/tools/testing/selftests/timers/change_skew.c new file mode 100644 index 000000000000..cb1968977c04 --- /dev/null +++ b/tools/testing/selftests/timers/change_skew.c @@ -0,0 +1,107 @@ +/* ADJ_FREQ Skew change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob and + * then uses other tests to detect problems. Thus this test requires + * that the raw_skew, inconsistency-check and nanosleep tests be + * present in the same directory it is run from. + * + * To build: + * $ gcc change_skew.c -o change_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + + +int change_skew_test(int ppm) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error adjusting freq\n"); + return ret; + } + + ret = system("./raw_skew"); + ret |= system("./inconsistency-check"); + ret |= system("./nanosleep"); + + return ret; +} + + +int main(int argv, char **argc) +{ + struct timex tx; + int i, ret; + + int ppm[5] = {0, 250, 500, -250, -500}; + + /* Kill ntpd */ + ret = system("killall -9 ntpd"); + + /* Make sure there's no offset adjustment going on */ + tx.modes = ADJ_OFFSET; + tx.offset = 0; + ret = adjtimex(&tx); + + if (ret < 0) { + printf("Maybe you're not running as root?\n"); + return -1; + } + + for (i = 0; i < 5; i++) { + printf("Using %i ppm adjustment\n", ppm[i]); + ret = change_skew_test(ppm[i]); + if (ret) + break; + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + if (ret) { + printf("[FAIL]"); + return ksft_exit_fail(); + } + printf("[OK]"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/clocksource-switch.c b/tools/testing/selftests/timers/clocksource-switch.c new file mode 100644 index 000000000000..627ec7425f78 --- /dev/null +++ b/tools/testing/selftests/timers/clocksource-switch.c @@ -0,0 +1,179 @@ +/* Clocksource change test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which quickly changes the clocksourc and + * then uses other tests to detect problems. Thus this test requires + * that the inconsistency-check and nanosleep tests be present in the + * same directory it is run from. + * + * To build: + * $ gcc clocksource-switch.c -o clocksource-switch -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +int get_clocksources(char list[][30]) +{ + int fd, i; + size_t size; + char buf[512]; + char *head, *tmp; + + fd = open("/sys/devices/system/clocksource/clocksource0/available_clocksource", O_RDONLY); + + size = read(fd, buf, 512); + + close(fd); + + for (i = 0; i < 30; i++) + list[i][0] = '\0'; + + head = buf; + i = 0; + while (head - buf < size) { + /* Find the next space */ + for (tmp = head; *tmp != ' '; tmp++) { + if (*tmp == '\n') + break; + if (*tmp == '\0') + break; + } + *tmp = '\0'; + strcpy(list[i], head); + head = tmp + 1; + i++; + } + + return i-1; +} + +int get_cur_clocksource(char *buf, size_t size) +{ + int fd; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_RDONLY); + + size = read(fd, buf, size); + + return 0; +} + +int change_clocksource(char *clocksource) +{ + int fd; + size_t size; + + fd = open("/sys/devices/system/clocksource/clocksource0/current_clocksource", O_WRONLY); + + if (fd < 0) + return -1; + + size = write(fd, clocksource, strlen(clocksource)); + + if (size < 0) + return -1; + + close(fd); + return 0; +} + + +int run_tests(int secs) +{ + int ret; + char buf[255]; + + sprintf(buf, "./inconsistency-check -t %i", secs); + ret = system(buf); + if (ret) + return ret; + ret = system("./nanosleep"); + return ret; +} + + +char clocksource_list[10][30]; + +int main(int argv, char **argc) +{ + char orig_clk[512]; + int count, i, status; + pid_t pid; + + get_cur_clocksource(orig_clk, 512); + + count = get_clocksources(clocksource_list); + + if (change_clocksource(clocksource_list[0])) { + printf("Error: You probably need to run this as root\n"); + return -1; + } + + /* Check everything is sane before we start switching asyncrhonously */ + for (i = 0; i < count; i++) { + printf("Validating clocksource %s\n", clocksource_list[i]); + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } + if (run_tests(5)) { + status = -1; + goto out; + } + } + + + printf("Running Asyncrhonous Switching Tests...\n"); + pid = fork(); + if (!pid) + return run_tests(60); + + while (pid != waitpid(pid, &status, WNOHANG)) + for (i = 0; i < count; i++) + if (change_clocksource(clocksource_list[i])) { + status = -1; + goto out; + } +out: + change_clocksource(orig_clk); + + if (status) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/inconsistency-check.c b/tools/testing/selftests/timers/inconsistency-check.c new file mode 100644 index 000000000000..caf1bc9257c4 --- /dev/null +++ b/tools/testing/selftests/timers/inconsistency-check.c @@ -0,0 +1,204 @@ +/* Time inconsistency check test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2003, 2004, 2005, 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc inconsistency-check.c -o inconsistency-check -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CALLS_PER_LOOP 64 +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + /* use unsigned to avoid false positives on 2038 rollover */ + if ((unsigned long)a.tv_sec < (unsigned long)b.tv_sec) + return 1; + if ((unsigned long)a.tv_sec > (unsigned long)b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + + + +int consistency_test(int clock_type, unsigned long seconds) +{ + struct timespec list[CALLS_PER_LOOP]; + int i, inconsistent; + long now, then; + time_t t; + char *start_str; + + clock_gettime(clock_type, &list[0]); + now = then = list[0].tv_sec; + + /* timestamp start of test */ + t = time(0); + start_str = ctime(&t); + + while (seconds == -1 || now - then < seconds) { + inconsistent = 0; + + /* Fill list */ + for (i = 0; i < CALLS_PER_LOOP; i++) + clock_gettime(clock_type, &list[i]); + + /* Check for inconsistencies */ + for (i = 0; i < CALLS_PER_LOOP - 1; i++) + if (!in_order(list[i], list[i+1])) + inconsistent = i; + + /* display inconsistency */ + if (inconsistent) { + unsigned long long delta; + + printf("\%s\n", start_str); + for (i = 0; i < CALLS_PER_LOOP; i++) { + if (i == inconsistent) + printf("--------------------\n"); + printf("%lu:%lu\n", list[i].tv_sec, + list[i].tv_nsec); + if (i == inconsistent + 1) + printf("--------------------\n"); + } + delta = list[inconsistent].tv_sec * NSEC_PER_SEC; + delta += list[inconsistent].tv_nsec; + delta -= list[inconsistent+1].tv_sec * NSEC_PER_SEC; + delta -= list[inconsistent+1].tv_nsec; + printf("Delta: %llu ns\n", delta); + fflush(0); + /* timestamp inconsistency*/ + t = time(0); + printf("%s\n", ctime(&t)); + printf("[FAILED]\n"); + return -1; + } + now = list[0].tv_sec; + } + printf("[OK]\n"); + return 0; +} + + +int main(int argc, char *argv[]) +{ + int clockid, opt; + int userclock = CLOCK_REALTIME; + int maxclocks = NR_CLOCKIDS; + int runtime = 10; + struct timespec ts; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:c:")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'c': + userclock = atoi(optarg); + maxclocks = userclock + 1; + break; + default: + printf("Usage: %s [-t <secs>] [-c <clockid>]\n", argv[0]); + printf(" -t: Number of seconds to run\n"); + printf(" -c: clockid to use (default, all clockids)\n"); + exit(-1); + } + } + + setbuf(stdout, NULL); + + for (clockid = userclock; clockid < maxclocks; clockid++) { + + if (clockid == CLOCK_HWSPECIFIC) + continue; + + if (!clock_gettime(clockid, &ts)) { + printf("Consistent %-30s ", clockstring(clockid)); + if (consistency_test(clockid, runtime)) + return ksft_exit_fail(); + } + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leap-a-day.c b/tools/testing/selftests/timers/leap-a-day.c new file mode 100644 index 000000000000..fb46ad6ac92c --- /dev/null +++ b/tools/testing/selftests/timers/leap-a-day.c @@ -0,0 +1,388 @@ +/* Leap second stress test + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPLv2 + * + * This test signals the kernel to insert a leap second + * every day at midnight GMT. This allows for stessing the + * kernel's leap-second behavior, as well as how well applications + * handle the leap-second discontinuity. + * + * Usage: leap-a-day [-s] [-i <num>] + * + * Options: + * -s: Each iteration, set the date to 10 seconds before midnight GMT. + * This speeds up the number of leapsecond transitions tested, + * but because it calls settimeofday frequently, advancing the + * time by 24 hours every ~16 seconds, it may cause application + * disruption. + * + * -i: Number of iterations to run (default: infinite) + * + * Other notes: Disabling NTP prior to running this is advised, as the two + * may conflict in their commands to the kernel. + * + * To build: + * $ gcc leap-a-day.c -o leap-a-day -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <sys/errno.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL +#define CLOCK_TAI 11 + +time_t next_leap; +int error_found; + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +char *time_state_str(int state) +{ + switch (state) { + case TIME_OK: return "TIME_OK"; + case TIME_INS: return "TIME_INS"; + case TIME_DEL: return "TIME_DEL"; + case TIME_OOP: return "TIME_OOP"; + case TIME_WAIT: return "TIME_WAIT"; + case TIME_BAD: return "TIME_BAD"; + } + return "ERROR"; +} + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + /* Clear maxerror, as it can cause UNSYNC to be set */ + tx.modes = ADJ_MAXERROR; + tx.maxerror = 0; + ret = adjtimex(&tx); + + /* Clear the status */ + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + +void sigalarm(int signo) +{ + struct timex tx; + int ret; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tx.time.tv_sec < next_leap) { + printf("Error: Early timer expiration! (Should be %ld)\n", next_leap); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + if (ret != TIME_WAIT) { + printf("Error: Timer seeing incorrect NTP state? (Should be TIME_WAIT)\n"); + error_found = 1; + printf("adjtimex: %10ld sec + %6ld us (%i)\t%s\n", + tx.time.tv_sec, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } +} + + +/* Test for known hrtimer failure */ +void test_hrtimer_failure(void) +{ + struct timespec now, target; + + clock_gettime(CLOCK_REALTIME, &now); + target = timespec_add(now, NSEC_PER_SEC/2); + clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &target, NULL); + clock_gettime(CLOCK_REALTIME, &now); + + if (!in_order(target, now)) { + printf("ERROR: hrtimer early expiration failure observed.\n"); + error_found = 1; + } +} + +int main(int argc, char **argv) +{ + timer_t tm1; + struct itimerspec its1; + struct sigevent se; + struct sigaction act; + int signum = SIGRTMAX; + int settime = 0; + int tai_time = 0; + int insert = 1; + int iterations = -1; + int opt; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "sti:")) != -1) { + switch (opt) { + case 's': + printf("Setting time to speed up testing\n"); + settime = 1; + break; + case 'i': + iterations = atoi(optarg); + break; + case 't': + tai_time = 1; + break; + default: + printf("Usage: %s [-s] [-i <iterations>]\n", argv[0]); + printf(" -s: Set time to right before leap second each iteration\n"); + printf(" -i: Number of iterations\n"); + printf(" -t: Print TAI time\n"); + exit(-1); + } + } + + /* Make sure TAI support is present if -t was used */ + if (tai_time) { + struct timespec ts; + + if (clock_gettime(CLOCK_TAI, &ts)) { + printf("System doesn't support CLOCK_TAI\n"); + ksft_exit_fail(); + } + } + + signal(SIGINT, handler); + signal(SIGKILL, handler); + + /* Set up timer signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + if (iterations < 0) + printf("This runs continuously. Press ctrl-c to stop\n"); + else + printf("Running for %i iterations. Press ctrl-c to stop\n", iterations); + + printf("\n"); + while (1) { + int ret; + struct timespec ts; + struct timex tx; + time_t now; + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + if (settime) { + struct timeval tv; + + tv.tv_sec = next_leap - 10; + tv.tv_usec = 0; + settimeofday(&tv, NULL); + printf("Setting time to %s", ctime(&tv.tv_sec)); + } + + /* Reset NTP time state */ + clear_time_state(); + + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + if (ret < 0) { + printf("Error: Problem setting STA_INS/STA_DEL!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + /* Validate STA_INS was set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Error: STA_INS/STA_DEL not set!: %s\n", + time_state_str(ret)); + return ksft_exit_fail(); + } + + if (tai_time) { + printf("Using TAI time," + " no inconsistencies should be seen!\n"); + } + + printf("Scheduling leap second for %s", ctime(&next_leap)); + + /* Set up timer */ + printf("Setting timer for %ld - %s", next_leap, ctime(&next_leap)); + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = signum; + se.sigev_value.sival_int = 0; + if (timer_create(CLOCK_REALTIME, &se, &tm1) == -1) { + printf("Error: timer_create failed\n"); + return ksft_exit_fail(); + } + its1.it_value.tv_sec = next_leap; + its1.it_value.tv_nsec = 0; + its1.it_interval.tv_sec = 0; + its1.it_interval.tv_nsec = 0; + timer_settime(tm1, TIMER_ABSTIME, &its1, NULL); + + /* Wake up 3 seconds before leap */ + ts.tv_sec = next_leap - 3; + ts.tv_nsec = 0; + + + while (clock_nanosleep(CLOCK_REALTIME, TIMER_ABSTIME, &ts, NULL)) + printf("Something woke us up, returning to sleep\n"); + + /* Validate STA_INS is still set */ + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.status != STA_INS && tx.status != STA_DEL) { + printf("Something cleared STA_INS/STA_DEL, setting it again.\n"); + tx.modes = ADJ_STATUS; + if (insert) + tx.status = STA_INS; + else + tx.status = STA_DEL; + ret = adjtimex(&tx); + } + + /* Check adjtimex output every half second */ + now = tx.time.tv_sec; + while (now < next_leap + 2) { + char buf[26]; + struct timespec tai; + int ret; + + tx.modes = 0; + ret = adjtimex(&tx); + + if (tai_time) { + clock_gettime(CLOCK_TAI, &tai); + printf("%ld sec, %9ld ns\t%s\n", + tai.tv_sec, + tai.tv_nsec, + time_state_str(ret)); + } else { + ctime_r(&tx.time.tv_sec, buf); + buf[strlen(buf)-1] = 0; /*remove trailing\n */ + + printf("%s + %6ld us (%i)\t%s\n", + buf, + tx.time.tv_usec, + tx.tai, + time_state_str(ret)); + } + now = tx.time.tv_sec; + /* Sleep for another half second */ + ts.tv_sec = 0; + ts.tv_nsec = NSEC_PER_SEC / 2; + clock_nanosleep(CLOCK_MONOTONIC, 0, &ts, NULL); + } + /* Switch to using other mode */ + insert = !insert; + + /* Note if kernel has known hrtimer failure */ + test_hrtimer_failure(); + + printf("Leap complete\n"); + if (error_found) { + printf("Errors observed\n"); + clear_time_state(); + return ksft_exit_fail(); + } + printf("\n"); + if ((iterations != -1) && !(--iterations)) + break; + } + + clear_time_state(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/leapcrash.c b/tools/testing/selftests/timers/leapcrash.c new file mode 100644 index 000000000000..a1071bdbdeb7 --- /dev/null +++ b/tools/testing/selftests/timers/leapcrash.c @@ -0,0 +1,120 @@ +/* Demo leapsecond deadlock + * by: John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright 2013, 2015 Linaro Limited + * Licensed under the GPL + * + * This test demonstrates leapsecond deadlock that is possibe + * on kernels from 2.6.26 to 3.3. + * + * WARNING: THIS WILL LIKELY HARDHANG SYSTEMS AND MAY LOSE DATA + * RUN AT YOUR OWN RISK! + * To build: + * $ gcc leapcrash.c -o leapcrash -lrt + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + /* + * We have to call adjtime twice here, as kernels + * prior to 6b1859dba01c7 (included in 3.5 and + * -stable), had an issue with the state machine + * and wouldn't clear the STA_INS/DEL flag directly. + */ + tx.modes = ADJ_STATUS; + tx.status = STA_PLL; + ret = adjtimex(&tx); + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + + return ret; +} + +/* Make sure we cleanup on ctrl-c */ +void handler(int unused) +{ + clear_time_state(); + exit(0); +} + + +int main(void) +{ + struct timex tx; + struct timespec ts; + time_t next_leap; + int count = 0; + + setbuf(stdout, NULL); + + signal(SIGINT, handler); + signal(SIGKILL, handler); + printf("This runs for a few minutes. Press ctrl-c to stop\n"); + + clear_time_state(); + + + /* Get the current time */ + clock_gettime(CLOCK_REALTIME, &ts); + + /* Calculate the next possible leap second 23:59:60 GMT */ + next_leap = ts.tv_sec; + next_leap += 86400 - (next_leap % 86400); + + for (count = 0; count < 20; count++) { + struct timeval tv; + + + /* set the time to 2 seconds before the leap */ + tv.tv_sec = next_leap - 2; + tv.tv_usec = 0; + if (settimeofday(&tv, NULL)) { + printf("Error: You're likely not running with proper (ie: root) permissions\n"); + return ksft_exit_fail(); + } + tx.modes = 0; + adjtimex(&tx); + + /* hammer on adjtime w/ STA_INS */ + while (tx.time.tv_sec < next_leap + 1) { + /* Set the leap second insert flag */ + tx.modes = ADJ_STATUS; + tx.status = STA_INS; + adjtimex(&tx); + } + clear_time_state(); + printf("."); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/mqueue-lat.c b/tools/testing/selftests/timers/mqueue-lat.c new file mode 100644 index 000000000000..a2a3924d0b41 --- /dev/null +++ b/tools/testing/selftests/timers/mqueue-lat.c @@ -0,0 +1,124 @@ +/* Measure mqueue timeout latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * + * Inspired with permission from example test by: + * Romain Francoise <romain@orebokech.com> + * Licensed under the GPLv2 + * + * To build: + * $ gcc mqueue-lat.c -o mqueue-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <errno.h> +#include <mqueue.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define TARGET_TIMEOUT 100000000 /* 100ms in nanoseconds */ +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int mqueue_lat_test(void) +{ + + mqd_t q; + struct mq_attr attr; + struct timespec start, end, now, target; + int i, count, ret; + + q = mq_open("/foo", O_CREAT | O_RDONLY, 0666, NULL); + if (q < 0) { + perror("mq_open"); + return -1; + } + mq_getattr(q, &attr); + + + count = 100; + clock_gettime(CLOCK_MONOTONIC, &start); + + for (i = 0; i < count; i++) { + char buf[attr.mq_msgsize]; + + clock_gettime(CLOCK_REALTIME, &now); + target = now; + target = timespec_add(now, TARGET_TIMEOUT); /* 100ms */ + + ret = mq_timedreceive(q, buf, sizeof(buf), NULL, &target); + if (ret < 0 && errno != ETIMEDOUT) { + perror("mq_timedreceive"); + return -1; + } + } + clock_gettime(CLOCK_MONOTONIC, &end); + + mq_close(q); + + if ((timespec_sub(start, end)/count) > TARGET_TIMEOUT + UNRESONABLE_LATENCY) + return -1; + + return 0; +} + +int main(int argc, char **argv) +{ + int ret; + + printf("Mqueue latency : "); + + ret = mqueue_lat_test(); + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nanosleep.c b/tools/testing/selftests/timers/nanosleep.c new file mode 100644 index 000000000000..8a3c29de7d49 --- /dev/null +++ b/tools/testing/selftests/timers/nanosleep.c @@ -0,0 +1,174 @@ +/* Make sure timers don't return early + * by: john stultz (johnstul@us.ibm.com) + * John Stultz (john.stultz@linaro.org) + * (C) Copyright IBM 2012 + * (C) Copyright Linaro 2013 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nanosleep.c -o nanosleep -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +/* returns 1 if a <= b, 0 otherwise */ +static inline int in_order(struct timespec a, struct timespec b) +{ + if (a.tv_sec < b.tv_sec) + return 1; + if (a.tv_sec > b.tv_sec) + return 0; + if (a.tv_nsec > b.tv_nsec) + return 0; + return 1; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + +int nanosleep_test(int clockid, long long ns) +{ + struct timespec now, target, rel; + + /* First check abs time */ + if (clock_gettime(clockid, &now)) + return UNSUPPORTED; + target = timespec_add(now, ns); + + if (clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL)) + return UNSUPPORTED; + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + + /* Second check reltime */ + clock_gettime(clockid, &now); + rel.tv_sec = 0; + rel.tv_nsec = 0; + rel = timespec_add(rel, ns); + target = timespec_add(now, ns); + clock_nanosleep(clockid, 0, &rel, NULL); + clock_gettime(clockid, &now); + + if (!in_order(target, now)) + return -1; + return 0; +} + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("Nanosleep %-31s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_test(clockid, length); + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + goto next; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + length *= 100; + } + printf("[OK]\n"); +next: + ret = 0; + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/nsleep-lat.c b/tools/testing/selftests/timers/nsleep-lat.c new file mode 100644 index 000000000000..2d7898fda0f1 --- /dev/null +++ b/tools/testing/selftests/timers/nsleep-lat.c @@ -0,0 +1,190 @@ +/* Measure nanosleep timer latency + * by: john stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * To build: + * $ gcc nsleep-lat.c -o nsleep-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000ULL + +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + +#define UNSUPPORTED 0xf00f + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + +struct timespec timespec_add(struct timespec ts, unsigned long long ns) +{ + ts.tv_nsec += ns; + while (ts.tv_nsec >= NSEC_PER_SEC) { + ts.tv_nsec -= NSEC_PER_SEC; + ts.tv_sec++; + } + return ts; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + +int nanosleep_lat_test(int clockid, long long ns) +{ + struct timespec start, end, target; + long long latency = 0; + int i, count; + + target.tv_sec = ns/NSEC_PER_SEC; + target.tv_nsec = ns%NSEC_PER_SEC; + + if (clock_gettime(clockid, &start)) + return UNSUPPORTED; + if (clock_nanosleep(clockid, 0, &target, NULL)) + return UNSUPPORTED; + + count = 10; + + /* First check relative latency */ + clock_gettime(clockid, &start); + for (i = 0; i < count; i++) + clock_nanosleep(clockid, 0, &target, NULL); + clock_gettime(clockid, &end); + + if (((timespec_sub(start, end)/count)-ns) > UNRESONABLE_LATENCY) { + printf("Large rel latency: %lld ns :", (timespec_sub(start, end)/count)-ns); + return -1; + } + + /* Next check absolute latency */ + for (i = 0; i < count; i++) { + clock_gettime(clockid, &start); + target = timespec_add(start, ns); + clock_nanosleep(clockid, TIMER_ABSTIME, &target, NULL); + clock_gettime(clockid, &end); + latency += timespec_sub(target, end); + } + + if (latency/count > UNRESONABLE_LATENCY) { + printf("Large abs latency: %lld ns :", latency/count); + return -1; + } + + return 0; +} + + + +int main(int argc, char **argv) +{ + long long length; + int clockid, ret; + + for (clockid = CLOCK_REALTIME; clockid < NR_CLOCKIDS; clockid++) { + + /* Skip cputime clockids since nanosleep won't increment cputime */ + if (clockid == CLOCK_PROCESS_CPUTIME_ID || + clockid == CLOCK_THREAD_CPUTIME_ID || + clockid == CLOCK_HWSPECIFIC) + continue; + + printf("nsleep latency %-26s ", clockstring(clockid)); + + length = 10; + while (length <= (NSEC_PER_SEC * 10)) { + ret = nanosleep_lat_test(clockid, length); + if (ret) + break; + length *= 100; + + } + + if (ret == UNSUPPORTED) { + printf("[UNSUPPORTED]\n"); + continue; + } + if (ret < 0) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + } + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/posix_timers.c b/tools/testing/selftests/timers/posix_timers.c index f87d970a485c..5a246a02dff3 100644 --- a/tools/testing/selftests/timers/posix_timers.c +++ b/tools/testing/selftests/timers/posix_timers.c @@ -35,10 +35,11 @@ static void user_loop(void) static void kernel_loop(void) { void *addr = sbrk(0); + int err = 0; - while (!done) { - brk(addr + 4096); - brk(addr); + while (!done && !err) { + err = brk(addr + 4096); + err |= brk(addr); } } @@ -190,8 +191,6 @@ static int check_timer_create(int which) int main(int argc, char **argv) { - int err; - printf("Testing posix timers. False negative may happen on CPU execution \n"); printf("based timers if other threads run on the CPU...\n"); diff --git a/tools/testing/selftests/timers/raw_skew.c b/tools/testing/selftests/timers/raw_skew.c new file mode 100644 index 000000000000..30906bfd9c1b --- /dev/null +++ b/tools/testing/selftests/timers/raw_skew.c @@ -0,0 +1,154 @@ +/* CLOCK_MONOTONIC vs CLOCK_MONOTONIC_RAW skew test + * by: john stultz (johnstul@us.ibm.com) + * John Stultz <john.stultz@linaro.org> + * (C) Copyright IBM 2012 + * (C) Copyright Linaro Limited 2015 + * Licensed under the GPLv2 + * + * To build: + * $ gcc raw_skew.c -o raw_skew -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +#define CLOCK_MONOTONIC_RAW 4 +#define NSEC_PER_SEC 1000000000LL + +#define shift_right(x, s) ({ \ + __typeof__(x) __x = (x); \ + __typeof__(s) __s = (s); \ + __x < 0 ? -(-__x >> __s) : __x >> __s; \ +}) + +long long llabs(long long val) +{ + if (val < 0) + val = -val; + return val; +} + +unsigned long long ts_to_nsec(struct timespec ts) +{ + return ts.tv_sec * NSEC_PER_SEC + ts.tv_nsec; +} + +struct timespec nsec_to_ts(long long ns) +{ + struct timespec ts; + + ts.tv_sec = ns/NSEC_PER_SEC; + ts.tv_nsec = ns%NSEC_PER_SEC; + return ts; +} + +long long diff_timespec(struct timespec start, struct timespec end) +{ + long long start_ns, end_ns; + + start_ns = ts_to_nsec(start); + end_ns = ts_to_nsec(end); + return end_ns - start_ns; +} + +void get_monotonic_and_raw(struct timespec *mon, struct timespec *raw) +{ + struct timespec start, mid, end; + long long diff = 0, tmp; + int i; + + for (i = 0; i < 3; i++) { + long long newdiff; + + clock_gettime(CLOCK_MONOTONIC, &start); + clock_gettime(CLOCK_MONOTONIC_RAW, &mid); + clock_gettime(CLOCK_MONOTONIC, &end); + + newdiff = diff_timespec(start, end); + if (diff == 0 || newdiff < diff) { + diff = newdiff; + *raw = mid; + tmp = (ts_to_nsec(start) + ts_to_nsec(end))/2; + *mon = nsec_to_ts(tmp); + } + } +} + +int main(int argv, char **argc) +{ + struct timespec mon, raw, start, end; + long long delta1, delta2, interval, eppm, ppm; + struct timex tx1, tx2; + + setbuf(stdout, NULL); + + if (clock_gettime(CLOCK_MONOTONIC_RAW, &raw)) { + printf("ERR: NO CLOCK_MONOTONIC_RAW\n"); + return -1; + } + + tx1.modes = 0; + adjtimex(&tx1); + get_monotonic_and_raw(&mon, &raw); + start = mon; + delta1 = diff_timespec(mon, raw); + + if (tx1.offset) + printf("WARNING: ADJ_OFFSET in progress, this will cause inaccurate results\n"); + + printf("Estimating clock drift: "); + sleep(120); + + get_monotonic_and_raw(&mon, &raw); + end = mon; + tx2.modes = 0; + adjtimex(&tx2); + delta2 = diff_timespec(mon, raw); + + interval = diff_timespec(start, end); + + /* calculate measured ppm between MONOTONIC and MONOTONIC_RAW */ + eppm = ((delta2-delta1)*NSEC_PER_SEC)/interval; + eppm = -eppm; + printf("%lld.%i(est)", eppm/1000, abs((int)(eppm%1000))); + + /* Avg the two actual freq samples adjtimex gave us */ + ppm = (tx1.freq + tx2.freq) * 1000 / 2; + ppm = (long long)tx1.freq * 1000; + ppm = shift_right(ppm, 16); + printf(" %lld.%i(act)", ppm/1000, abs((int)(ppm%1000))); + + if (llabs(eppm - ppm) > 1000) { + printf(" [FAILED]\n"); + return ksft_exit_fail(); + } + printf(" [OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/rtctest.c b/tools/testing/selftests/timers/rtctest.c new file mode 100644 index 000000000000..d80ae852334d --- /dev/null +++ b/tools/testing/selftests/timers/rtctest.c @@ -0,0 +1,271 @@ +/* + * Real Time Clock Driver Test/Example Program + * + * Compile with: + * gcc -s -Wall -Wstrict-prototypes rtctest.c -o rtctest + * + * Copyright (C) 1996, Paul Gortmaker. + * + * Released under the GNU General Public License, version 2, + * included herein by reference. + * + */ + +#include <stdio.h> +#include <linux/rtc.h> +#include <sys/ioctl.h> +#include <sys/time.h> +#include <sys/types.h> +#include <fcntl.h> +#include <unistd.h> +#include <stdlib.h> +#include <errno.h> + + +/* + * This expects the new RTC class driver framework, working with + * clocks that will often not be clones of what the PC-AT had. + * Use the command line to specify another RTC if you need one. + */ +static const char default_rtc[] = "/dev/rtc0"; + + +int main(int argc, char **argv) +{ + int i, fd, retval, irqcount = 0; + unsigned long tmp, data; + struct rtc_time rtc_tm; + const char *rtc = default_rtc; + struct timeval start, end, diff; + + switch (argc) { + case 2: + rtc = argv[1]; + /* FALLTHROUGH */ + case 1: + break; + default: + fprintf(stderr, "usage: rtctest [rtcdev]\n"); + return 1; + } + + fd = open(rtc, O_RDONLY); + + if (fd == -1) { + perror(rtc); + exit(errno); + } + + fprintf(stderr, "\n\t\t\tRTC Driver Test Example.\n\n"); + + /* Turn on update interrupts (one per second) */ + retval = ioctl(fd, RTC_UIE_ON, 0); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Update IRQs not supported.\n"); + goto test_READ; + } + perror("RTC_UIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Counting 5 update (1/sec) interrupts from reading %s:", + rtc); + fflush(stderr); + for (i=1; i<6; i++) { + /* This read will block */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + fprintf(stderr, "\nAgain, from using select(2) on /dev/rtc:"); + fflush(stderr); + for (i=1; i<6; i++) { + struct timeval tv = {5, 0}; /* 5 second timeout on select */ + fd_set readfds; + + FD_ZERO(&readfds); + FD_SET(fd, &readfds); + /* The select will wait until an RTC interrupt happens. */ + retval = select(fd+1, &readfds, NULL, NULL, &tv); + if (retval == -1) { + perror("select"); + exit(errno); + } + /* This read won't block unlike the select-less case above. */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Turn off update interrupts */ + retval = ioctl(fd, RTC_UIE_OFF, 0); + if (retval == -1) { + perror("RTC_UIE_OFF ioctl"); + exit(errno); + } + +test_READ: + /* Read the RTC time/date */ + retval = ioctl(fd, RTC_RD_TIME, &rtc_tm); + if (retval == -1) { + perror("RTC_RD_TIME ioctl"); + exit(errno); + } + + fprintf(stderr, "\n\nCurrent RTC date/time is %d-%d-%d, %02d:%02d:%02d.\n", + rtc_tm.tm_mday, rtc_tm.tm_mon + 1, rtc_tm.tm_year + 1900, + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Set the alarm to 5 sec in the future, and check for rollover */ + rtc_tm.tm_sec += 5; + if (rtc_tm.tm_sec >= 60) { + rtc_tm.tm_sec %= 60; + rtc_tm.tm_min++; + } + if (rtc_tm.tm_min == 60) { + rtc_tm.tm_min = 0; + rtc_tm.tm_hour++; + } + if (rtc_tm.tm_hour == 24) + rtc_tm.tm_hour = 0; + + retval = ioctl(fd, RTC_ALM_SET, &rtc_tm); + if (retval == -1) { + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Alarm IRQs not supported.\n"); + goto test_PIE; + } + perror("RTC_ALM_SET ioctl"); + exit(errno); + } + + /* Read the current alarm settings */ + retval = ioctl(fd, RTC_ALM_READ, &rtc_tm); + if (retval == -1) { + perror("RTC_ALM_READ ioctl"); + exit(errno); + } + + fprintf(stderr, "Alarm time now set to %02d:%02d:%02d.\n", + rtc_tm.tm_hour, rtc_tm.tm_min, rtc_tm.tm_sec); + + /* Enable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_ON, 0); + if (retval == -1) { + perror("RTC_AIE_ON ioctl"); + exit(errno); + } + + fprintf(stderr, "Waiting 5 seconds for alarm..."); + fflush(stderr); + /* This blocks until the alarm ring causes an interrupt */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + irqcount++; + fprintf(stderr, " okay. Alarm rang.\n"); + + /* Disable alarm interrupts */ + retval = ioctl(fd, RTC_AIE_OFF, 0); + if (retval == -1) { + perror("RTC_AIE_OFF ioctl"); + exit(errno); + } + +test_PIE: + /* Read periodic IRQ rate */ + retval = ioctl(fd, RTC_IRQP_READ, &tmp); + if (retval == -1) { + /* not all RTCs support periodic IRQs */ + if (errno == ENOTTY) { + fprintf(stderr, "\nNo periodic IRQ support\n"); + goto done; + } + perror("RTC_IRQP_READ ioctl"); + exit(errno); + } + fprintf(stderr, "\nPeriodic IRQ rate is %ldHz.\n", tmp); + + fprintf(stderr, "Counting 20 interrupts at:"); + fflush(stderr); + + /* The frequencies 128Hz, 256Hz, ... 8192Hz are only allowed for root. */ + for (tmp=2; tmp<=64; tmp*=2) { + + retval = ioctl(fd, RTC_IRQP_SET, tmp); + if (retval == -1) { + /* not all RTCs can change their periodic IRQ rate */ + if (errno == ENOTTY) { + fprintf(stderr, + "\n...Periodic IRQ rate is fixed\n"); + goto done; + } + perror("RTC_IRQP_SET ioctl"); + exit(errno); + } + + fprintf(stderr, "\n%ldHz:\t", tmp); + fflush(stderr); + + /* Enable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_ON, 0); + if (retval == -1) { + perror("RTC_PIE_ON ioctl"); + exit(errno); + } + + for (i=1; i<21; i++) { + gettimeofday(&start, NULL); + /* This blocks */ + retval = read(fd, &data, sizeof(unsigned long)); + if (retval == -1) { + perror("read"); + exit(errno); + } + gettimeofday(&end, NULL); + timersub(&end, &start, &diff); + if (diff.tv_sec > 0 || + diff.tv_usec > ((1000000L / tmp) * 1.10)) { + fprintf(stderr, "\nPIE delta error: %ld.%06ld should be close to 0.%06ld\n", + diff.tv_sec, diff.tv_usec, + (1000000L / tmp)); + fflush(stdout); + exit(-1); + } + + fprintf(stderr, " %d",i); + fflush(stderr); + irqcount++; + } + + /* Disable periodic interrupts */ + retval = ioctl(fd, RTC_PIE_OFF, 0); + if (retval == -1) { + perror("RTC_PIE_OFF ioctl"); + exit(errno); + } + } + +done: + fprintf(stderr, "\n\n\t\t\t *** Test complete ***\n"); + + close(fd); + + return 0; +} diff --git a/tools/testing/selftests/timers/set-2038.c b/tools/testing/selftests/timers/set-2038.c new file mode 100644 index 000000000000..c8a7e14446b1 --- /dev/null +++ b/tools/testing/selftests/timers/set-2038.c @@ -0,0 +1,144 @@ +/* Time bounds setting test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which sets the time to edge cases then + * uses other tests to detect problems. Thus this test requires that + * the inconsistency-check and nanosleep tests be present in the same + * directory it is run from. + * + * To build: + * $ gcc set-2038.c -o set-2038 -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <time.h> +#include <sys/time.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +#define KTIME_MAX ((long long)~((unsigned long long)1 << 63)) +#define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) + +#define YEAR_1901 (-0x7fffffffL) +#define YEAR_1970 1 +#define YEAR_2038 0x7fffffffL /*overflows 32bit time_t */ +#define YEAR_2262 KTIME_SEC_MAX /*overflows 64bit ktime_t */ +#define YEAR_MAX ((long long)((1ULL<<63)-1)) /*overflows 64bit time_t */ + +int is32bits(void) +{ + return (sizeof(long) == 4); +} + +int settime(long long time) +{ + struct timeval now; + int ret; + + now.tv_sec = (time_t)time; + now.tv_usec = 0; + + ret = settimeofday(&now, NULL); + + printf("Setting time to 0x%lx: %d\n", (long)time, ret); + return ret; +} + +int do_tests(void) +{ + int ret; + + ret = system("date"); + ret = system("./inconsistency-check -c 0 -t 20"); + ret |= system("./nanosleep"); + ret |= system("./nsleep-lat"); + return ret; + +} + +int main(int argc, char *argv[]) +{ + int ret = 0; + int opt, dangerous = 0; + time_t start; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "d")) != -1) { + switch (opt) { + case 'd': + dangerous = 1; + } + } + + start = time(0); + + /* First test that crazy values don't work */ + if (!settime(YEAR_1901)) { + ret = -1; + goto out; + } + if (!settime(YEAR_MAX)) { + ret = -1; + goto out; + } + if (!is32bits() && !settime(YEAR_2262)) { + ret = -1; + goto out; + } + + /* Now test behavior near edges */ + settime(YEAR_1970); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2038 - 600); + ret = do_tests(); + if (ret) + goto out; + + /* The rest of the tests can blowup on 32bit systems */ + if (is32bits() && !dangerous) + goto out; + /* Test rollover behavior 32bit edge */ + settime(YEAR_2038 - 10); + ret = do_tests(); + if (ret) + goto out; + + settime(YEAR_2262 - 600); + ret = do_tests(); + +out: + /* restore clock */ + settime(start); + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-tai.c b/tools/testing/selftests/timers/set-tai.c new file mode 100644 index 000000000000..dc88dbc8831f --- /dev/null +++ b/tools/testing/selftests/timers/set-tai.c @@ -0,0 +1,79 @@ +/* Set tai offset + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2013 + * Licensed under the GPLv2 + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +int set_tai(int offset) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + tx.modes = ADJ_TAI; + tx.constant = offset; + + return adjtimex(&tx); +} + +int get_tai(void) +{ + struct timex tx; + + memset(&tx, 0, sizeof(tx)); + + adjtimex(&tx); + return tx.tai; +} + +int main(int argc, char **argv) +{ + int i, ret; + + ret = get_tai(); + printf("tai offset started at %i\n", ret); + + printf("Checking tai offsets can be properly set: "); + for (i = 1; i <= 60; i++) { + ret = set_tai(i); + ret = get_tai(); + if (ret != i) { + printf("[FAILED] expected: %i got %i\n", i, ret); + return ksft_exit_fail(); + } + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/set-timer-lat.c b/tools/testing/selftests/timers/set-timer-lat.c new file mode 100644 index 000000000000..4fc98c5b0899 --- /dev/null +++ b/tools/testing/selftests/timers/set-timer-lat.c @@ -0,0 +1,216 @@ +/* set_timer latency test + * John Stultz (john.stultz@linaro.org) + * (C) Copyright Linaro 2014 + * Licensed under the GPLv2 + * + * This test makes sure the set_timer api is correct + * + * To build: + * $ gcc set-timer-lat.c -o set-timer-lat -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <unistd.h> +#include <time.h> +#include <string.h> +#include <signal.h> +#include <stdlib.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define CLOCK_REALTIME 0 +#define CLOCK_MONOTONIC 1 +#define CLOCK_PROCESS_CPUTIME_ID 2 +#define CLOCK_THREAD_CPUTIME_ID 3 +#define CLOCK_MONOTONIC_RAW 4 +#define CLOCK_REALTIME_COARSE 5 +#define CLOCK_MONOTONIC_COARSE 6 +#define CLOCK_BOOTTIME 7 +#define CLOCK_REALTIME_ALARM 8 +#define CLOCK_BOOTTIME_ALARM 9 +#define CLOCK_HWSPECIFIC 10 +#define CLOCK_TAI 11 +#define NR_CLOCKIDS 12 + + +#define NSEC_PER_SEC 1000000000ULL +#define UNRESONABLE_LATENCY 40000000 /* 40ms in nanosecs */ + +#define TIMER_SECS 1 +int alarmcount; +int clock_id; +struct timespec start_time; +long long max_latency_ns; + +char *clockstring(int clockid) +{ + switch (clockid) { + case CLOCK_REALTIME: + return "CLOCK_REALTIME"; + case CLOCK_MONOTONIC: + return "CLOCK_MONOTONIC"; + case CLOCK_PROCESS_CPUTIME_ID: + return "CLOCK_PROCESS_CPUTIME_ID"; + case CLOCK_THREAD_CPUTIME_ID: + return "CLOCK_THREAD_CPUTIME_ID"; + case CLOCK_MONOTONIC_RAW: + return "CLOCK_MONOTONIC_RAW"; + case CLOCK_REALTIME_COARSE: + return "CLOCK_REALTIME_COARSE"; + case CLOCK_MONOTONIC_COARSE: + return "CLOCK_MONOTONIC_COARSE"; + case CLOCK_BOOTTIME: + return "CLOCK_BOOTTIME"; + case CLOCK_REALTIME_ALARM: + return "CLOCK_REALTIME_ALARM"; + case CLOCK_BOOTTIME_ALARM: + return "CLOCK_BOOTTIME_ALARM"; + case CLOCK_TAI: + return "CLOCK_TAI"; + }; + return "UNKNOWN_CLOCKID"; +} + + +long long timespec_sub(struct timespec a, struct timespec b) +{ + long long ret = NSEC_PER_SEC * b.tv_sec + b.tv_nsec; + + ret -= NSEC_PER_SEC * a.tv_sec + a.tv_nsec; + return ret; +} + + +void sigalarm(int signo) +{ + long long delta_ns; + struct timespec ts; + + clock_gettime(clock_id, &ts); + alarmcount++; + + delta_ns = timespec_sub(start_time, ts); + delta_ns -= NSEC_PER_SEC * TIMER_SECS * alarmcount; + + if (delta_ns < 0) + printf("%s timer fired early: FAIL\n", clockstring(clock_id)); + + if (delta_ns > max_latency_ns) + max_latency_ns = delta_ns; +} + +int do_timer(int clock_id, int flags) +{ + struct sigevent se; + timer_t tm1; + struct itimerspec its1, its2; + int err; + + /* Set up timer: */ + memset(&se, 0, sizeof(se)); + se.sigev_notify = SIGEV_SIGNAL; + se.sigev_signo = SIGRTMAX; + se.sigev_value.sival_int = 0; + + max_latency_ns = 0; + alarmcount = 0; + + err = timer_create(clock_id, &se, &tm1); + if (err) { + if ((clock_id == CLOCK_REALTIME_ALARM) || + (clock_id == CLOCK_BOOTTIME_ALARM)) { + printf("%-22s %s missing CAP_WAKE_ALARM? : [UNSUPPORTED]\n", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME"); + return 0; + } + printf("%s - timer_create() failed\n", clockstring(clock_id)); + return -1; + } + + clock_gettime(clock_id, &start_time); + if (flags) { + its1.it_value = start_time; + its1.it_value.tv_sec += TIMER_SECS; + } else { + its1.it_value.tv_sec = TIMER_SECS; + its1.it_value.tv_nsec = 0; + } + its1.it_interval.tv_sec = TIMER_SECS; + its1.it_interval.tv_nsec = 0; + + err = timer_settime(tm1, flags, &its1, &its2); + if (err) { + printf("%s - timer_settime() failed\n", clockstring(clock_id)); + return -1; + } + + while (alarmcount < 5) + sleep(1); + + printf("%-22s %s max latency: %10lld ns : ", + clockstring(clock_id), + flags ? "ABSTIME":"RELTIME", + max_latency_ns); + + timer_delete(tm1); + if (max_latency_ns < UNRESONABLE_LATENCY) { + printf("[OK]\n"); + return 0; + } + printf("[FAILED]\n"); + return -1; +} + +int main(void) +{ + struct sigaction act; + int signum = SIGRTMAX; + int ret = 0; + + /* Set up signal handler: */ + sigfillset(&act.sa_mask); + act.sa_flags = 0; + act.sa_handler = sigalarm; + sigaction(signum, &act, NULL); + + printf("Setting timers for every %i seconds\n", TIMER_SECS); + for (clock_id = 0; clock_id < NR_CLOCKIDS; clock_id++) { + + if ((clock_id == CLOCK_PROCESS_CPUTIME_ID) || + (clock_id == CLOCK_THREAD_CPUTIME_ID) || + (clock_id == CLOCK_MONOTONIC_RAW) || + (clock_id == CLOCK_REALTIME_COARSE) || + (clock_id == CLOCK_MONOTONIC_COARSE) || + (clock_id == CLOCK_HWSPECIFIC)) + continue; + + ret |= do_timer(clock_id, TIMER_ABSTIME); + ret |= do_timer(clock_id, 0); + } + if (ret) + return ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/skew_consistency.c b/tools/testing/selftests/timers/skew_consistency.c new file mode 100644 index 000000000000..5562f84ee07c --- /dev/null +++ b/tools/testing/selftests/timers/skew_consistency.c @@ -0,0 +1,89 @@ +/* ADJ_FREQ Skew consistency test + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2012 + * Licensed under the GPLv2 + * + * NOTE: This is a meta-test which cranks the ADJ_FREQ knob back + * and forth and watches for consistency problems. Thus this test requires + * that the inconsistency-check tests be present in the same directory it + * is run from. + * + * To build: + * $ gcc skew_consistency.c -o skew_consistency -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + +#include <stdio.h> +#include <stdlib.h> +#include <unistd.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <time.h> +#include <sys/types.h> +#include <sys/stat.h> +#include <fcntl.h> +#include <stdlib.h> +#include <string.h> +#include <sys/wait.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000LL + +int main(int argv, char **argc) +{ + struct timex tx; + int ret, ppm; + pid_t pid; + + + printf("Running Asyncrhonous Frequency Changing Tests...\n"); + + pid = fork(); + if (!pid) + return system("./inconsistency-check -c 1 -t 600"); + + ppm = 500; + ret = 0; + + while (pid != waitpid(pid, &ret, WNOHANG)) { + ppm = -ppm; + tx.modes = ADJ_FREQUENCY; + tx.freq = ppm << 16; + adjtimex(&tx); + usleep(500000); + } + + /* Set things back */ + tx.modes = ADJ_FREQUENCY; + tx.offset = 0; + adjtimex(&tx); + + + if (ret) { + printf("[FAILED]\n"); + return ksft_exit_fail(); + } + printf("[OK]\n"); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/threadtest.c b/tools/testing/selftests/timers/threadtest.c new file mode 100644 index 000000000000..e632e116f05e --- /dev/null +++ b/tools/testing/selftests/timers/threadtest.c @@ -0,0 +1,204 @@ +/* threadtest.c + * by: john stultz (johnstul@us.ibm.com) + * (C) Copyright IBM 2004, 2005, 2006, 2012 + * Licensed under the GPLv2 + * + * To build: + * $ gcc threadtest.c -o threadtest -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ +#include <stdio.h> +#include <unistd.h> +#include <stdlib.h> +#include <sys/time.h> +#include <pthread.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + + +/* serializes shared list access */ +pthread_mutex_t list_lock = PTHREAD_MUTEX_INITIALIZER; +/* serializes console output */ +pthread_mutex_t print_lock = PTHREAD_MUTEX_INITIALIZER; + + +#define MAX_THREADS 128 +#define LISTSIZE 128 + +int done = 0; + +struct timespec global_list[LISTSIZE]; +int listcount = 0; + + +void checklist(struct timespec *list, int size) +{ + int i, j; + struct timespec *a, *b; + + /* scan the list */ + for (i = 0; i < size-1; i++) { + a = &list[i]; + b = &list[i+1]; + + /* look for any time inconsistencies */ + if ((b->tv_sec <= a->tv_sec) && + (b->tv_nsec < a->tv_nsec)) { + + /* flag other threads */ + done = 1; + + /*serialize printing to avoid junky output*/ + pthread_mutex_lock(&print_lock); + + /* dump the list */ + printf("\n"); + for (j = 0; j < size; j++) { + if (j == i) + printf("---------------\n"); + printf("%lu:%lu\n", list[j].tv_sec, list[j].tv_nsec); + if (j == i+1) + printf("---------------\n"); + } + printf("[FAILED]\n"); + + pthread_mutex_unlock(&print_lock); + } + } +} + +/* The shared thread shares a global list + * that each thread fills while holding the lock. + * This stresses clock syncronization across cpus. + */ +void *shared_thread(void *arg) +{ + while (!done) { + /* protect the list */ + pthread_mutex_lock(&list_lock); + + /* see if we're ready to check the list */ + if (listcount >= LISTSIZE) { + checklist(global_list, LISTSIZE); + listcount = 0; + } + clock_gettime(CLOCK_MONOTONIC, &global_list[listcount++]); + + pthread_mutex_unlock(&list_lock); + } + return NULL; +} + + +/* Each independent thread fills in its own + * list. This stresses clock_gettime() lock contention. + */ +void *independent_thread(void *arg) +{ + struct timespec my_list[LISTSIZE]; + int count; + + while (!done) { + /* fill the list */ + for (count = 0; count < LISTSIZE; count++) + clock_gettime(CLOCK_MONOTONIC, &my_list[count]); + checklist(my_list, LISTSIZE); + } + return NULL; +} + +#define DEFAULT_THREAD_COUNT 8 +#define DEFAULT_RUNTIME 30 + +int main(int argc, char **argv) +{ + int thread_count, i; + time_t start, now, runtime; + char buf[255]; + pthread_t pth[MAX_THREADS]; + int opt; + void *tret; + int ret = 0; + void *(*thread)(void *) = shared_thread; + + thread_count = DEFAULT_THREAD_COUNT; + runtime = DEFAULT_RUNTIME; + + /* Process arguments */ + while ((opt = getopt(argc, argv, "t:n:i")) != -1) { + switch (opt) { + case 't': + runtime = atoi(optarg); + break; + case 'n': + thread_count = atoi(optarg); + break; + case 'i': + thread = independent_thread; + printf("using independent threads\n"); + break; + default: + printf("Usage: %s [-t <secs>] [-n <numthreads>] [-i]\n", argv[0]); + printf(" -t: time to run\n"); + printf(" -n: number of threads\n"); + printf(" -i: use independent threads\n"); + return -1; + } + } + + if (thread_count > MAX_THREADS) + thread_count = MAX_THREADS; + + + setbuf(stdout, NULL); + + start = time(0); + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&start)); + printf("%s\n", buf); + printf("Testing consistency with %i threads for %ld seconds: ", thread_count, runtime); + + /* spawn */ + for (i = 0; i < thread_count; i++) + pthread_create(&pth[i], 0, thread, 0); + + while (time(&now) < start + runtime) { + sleep(1); + if (done) { + ret = 1; + strftime(buf, 255, "%a, %d %b %Y %T %z", localtime(&now)); + printf("%s\n", buf); + goto out; + } + } + printf("[OK]\n"); + done = 1; + +out: + /* wait */ + for (i = 0; i < thread_count; i++) + pthread_join(pth[i], &tret); + + /* die */ + if (ret) + ksft_exit_fail(); + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/timers/valid-adjtimex.c b/tools/testing/selftests/timers/valid-adjtimex.c new file mode 100644 index 000000000000..e86d937cc22c --- /dev/null +++ b/tools/testing/selftests/timers/valid-adjtimex.c @@ -0,0 +1,202 @@ +/* valid adjtimex test + * by: John Stultz <john.stultz@linaro.org> + * (C) Copyright Linaro 2015 + * Licensed under the GPLv2 + * + * This test validates adjtimex interface with valid + * and invalid test data. + * + * Usage: valid-adjtimex + * + * To build: + * $ gcc valid-adjtimex.c -o valid-adjtimex -lrt + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation, either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + + + +#include <stdio.h> +#include <stdlib.h> +#include <time.h> +#include <sys/time.h> +#include <sys/timex.h> +#include <string.h> +#include <signal.h> +#include <unistd.h> +#ifdef KTEST +#include "../kselftest.h" +#else +static inline int ksft_exit_pass(void) +{ + exit(0); +} +static inline int ksft_exit_fail(void) +{ + exit(1); +} +#endif + +#define NSEC_PER_SEC 1000000000L + +/* clear NTP time_status & time_state */ +int clear_time_state(void) +{ + struct timex tx; + int ret; + + tx.modes = ADJ_STATUS; + tx.status = 0; + ret = adjtimex(&tx); + return ret; +} + +#define NUM_FREQ_VALID 32 +#define NUM_FREQ_OUTOFRANGE 4 +#define NUM_FREQ_INVALID 2 + +long valid_freq[NUM_FREQ_VALID] = { + -499<<16, + -450<<16, + -400<<16, + -350<<16, + -300<<16, + -250<<16, + -200<<16, + -150<<16, + -100<<16, + -75<<16, + -50<<16, + -25<<16, + -10<<16, + -5<<16, + -1<<16, + -1000, + 1<<16, + 5<<16, + 10<<16, + 25<<16, + 50<<16, + 75<<16, + 100<<16, + 150<<16, + 200<<16, + 250<<16, + 300<<16, + 350<<16, + 400<<16, + 450<<16, + 499<<16, +}; + +long outofrange_freq[NUM_FREQ_OUTOFRANGE] = { + -1000<<16, + -550<<16, + 550<<16, + 1000<<16, +}; + +#define LONG_MAX (~0UL>>1) +#define LONG_MIN (-LONG_MAX - 1) + +long invalid_freq[NUM_FREQ_INVALID] = { + LONG_MAX, + LONG_MIN, +}; + +int validate_freq(void) +{ + struct timex tx; + int ret, pass = 0; + int i; + + clear_time_state(); + + memset(&tx, 0, sizeof(struct timex)); + /* Set the leap second insert flag */ + + printf("Testing ADJ_FREQ... "); + for (i = 0; i < NUM_FREQ_VALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = valid_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + valid_freq[i], valid_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq != valid_freq[i]) { + printf("Warning: freq value %ld not what we set it (%ld)!\n", + tx.freq, valid_freq[i]); + } + } + for (i = 0; i < NUM_FREQ_OUTOFRANGE; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = outofrange_freq[i]; + + ret = adjtimex(&tx); + if (ret < 0) { + printf("[FAIL]\n"); + printf("Error: adjtimex(ADJ_FREQ, %ld - %ld ppm\n", + outofrange_freq[i], outofrange_freq[i]>>16); + pass = -1; + goto out; + } + tx.modes = 0; + ret = adjtimex(&tx); + if (tx.freq == outofrange_freq[i]) { + printf("[FAIL]\n"); + printf("ERROR: out of range value %ld actually set!\n", + tx.freq); + pass = -1; + goto out; + } + } + + + if (sizeof(long) == 8) { /* this case only applies to 64bit systems */ + for (i = 0; i < NUM_FREQ_INVALID; i++) { + tx.modes = ADJ_FREQUENCY; + tx.freq = invalid_freq[i]; + ret = adjtimex(&tx); + if (ret >= 0) { + printf("[FAIL]\n"); + printf("Error: No failure on invalid ADJ_FREQUENCY %ld\n", + invalid_freq[i]); + pass = -1; + goto out; + } + } + } + + printf("[OK]\n"); +out: + /* reset freq to zero */ + tx.modes = ADJ_FREQUENCY; + tx.freq = 0; + ret = adjtimex(&tx); + + return pass; +} + + +int main(int argc, char **argv) +{ + if (validate_freq()) + return ksft_exit_fail(); + + return ksft_exit_pass(); +} diff --git a/tools/testing/selftests/user/Makefile b/tools/testing/selftests/user/Makefile index 12c9d15bab07..d401b63c5b1a 100644 --- a/tools/testing/selftests/user/Makefile +++ b/tools/testing/selftests/user/Makefile @@ -3,5 +3,6 @@ # No binaries, but make sure arg-less "make" doesn't trigger "run_tests" all: -run_tests: all - ./test_user_copy.sh +TEST_PROGS := test_user_copy.sh + +include ../lib.mk diff --git a/tools/testing/selftests/vm/Makefile b/tools/testing/selftests/vm/Makefile index 077828c889f1..231b9a031f6a 100644 --- a/tools/testing/selftests/vm/Makefile +++ b/tools/testing/selftests/vm/Makefile @@ -1,16 +1,22 @@ # Makefile for vm selftests -CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -BINARIES = hugepage-mmap hugepage-shm map_hugetlb thuge-gen hugetlbfstest +BINARIES = compaction_test +BINARIES += hugepage-mmap +BINARIES += hugepage-shm +BINARIES += hugetlbfstest +BINARIES += map_hugetlb +BINARIES += thuge-gen BINARIES += transhuge-stress all: $(BINARIES) %: %.c $(CC) $(CFLAGS) -o $@ $^ -lrt -run_tests: all - @/bin/sh ./run_vmtests || (echo "vmtests: [FAIL]"; exit 1) +TEST_PROGS := run_vmtests +TEST_FILES := $(BINARIES) + +include ../lib.mk clean: $(RM) $(BINARIES) diff --git a/tools/testing/selftests/vm/compaction_test.c b/tools/testing/selftests/vm/compaction_test.c new file mode 100644 index 000000000000..932ff577ffc0 --- /dev/null +++ b/tools/testing/selftests/vm/compaction_test.c @@ -0,0 +1,225 @@ +/* + * + * A test for the patch "Allow compaction of unevictable pages". + * With this patch we should be able to allocate at least 1/4 + * of RAM in huge pages. Without the patch much less is + * allocated. + */ + +#include <stdio.h> +#include <stdlib.h> +#include <sys/mman.h> +#include <sys/resource.h> +#include <fcntl.h> +#include <errno.h> +#include <unistd.h> +#include <string.h> + +#define MAP_SIZE 1048576 + +struct map_list { + void *map; + struct map_list *next; +}; + +int read_memory_info(unsigned long *memfree, unsigned long *hugepagesize) +{ + char buffer[256] = {0}; + char *cmd = "cat /proc/meminfo | grep -i memfree | grep -o '[0-9]*'"; + FILE *cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + + *memfree = atoll(buffer); + cmd = "cat /proc/meminfo | grep -i hugepagesize | grep -o '[0-9]*'"; + cmdfile = popen(cmd, "r"); + + if (!(fgets(buffer, sizeof(buffer), cmdfile))) { + perror("Failed to read meminfo\n"); + return -1; + } + + pclose(cmdfile); + *hugepagesize = atoll(buffer); + + return 0; +} + +int prereq(void) +{ + char allowed; + int fd; + + fd = open("/proc/sys/vm/compact_unevictable_allowed", + O_RDONLY | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + return -1; + } + + if (read(fd, &allowed, sizeof(char)) != sizeof(char)) { + perror("Failed to read from\n" + "/proc/sys/vm/compact_unevictable_allowed\n"); + close(fd); + return -1; + } + + close(fd); + if (allowed == '1') + return 0; + + return -1; +} + +int check_compaction(unsigned long mem_free, unsigned int hugepage_size) +{ + int fd; + int compaction_index = 0; + char initial_nr_hugepages[10] = {0}; + char nr_hugepages[10] = {0}; + + /* We want to test with 80% of available memory. Else, OOM killer comes + in to play */ + mem_free = mem_free * 0.8; + + fd = open("/proc/sys/vm/nr_hugepages", O_RDWR | O_NONBLOCK); + if (fd < 0) { + perror("Failed to open /proc/sys/vm/nr_hugepages"); + return -1; + } + + if (read(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages"); + goto close_fd; + } + + /* Start with the initial condition of 0 huge pages*/ + if (write(fd, "0", sizeof(char)) != sizeof(char)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + /* Request a large number of huge pages. The Kernel will allocate + as much as it can */ + if (write(fd, "100000", (6*sizeof(char))) != (6*sizeof(char))) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + lseek(fd, 0, SEEK_SET); + + if (read(fd, nr_hugepages, sizeof(nr_hugepages)) <= 0) { + perror("Failed to read from /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + /* We should have been able to request at least 1/3 rd of the memory in + huge pages */ + compaction_index = mem_free/(atoi(nr_hugepages) * hugepage_size); + + if (compaction_index > 3) { + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + fprintf(stderr, "ERROR: Less that 1/%d of memory is available\n" + "as huge pages\n", compaction_index); + goto close_fd; + } + + printf("No of huge pages allocated = %d\n", + (atoi(nr_hugepages))); + + if (write(fd, initial_nr_hugepages, sizeof(initial_nr_hugepages)) + != strlen(initial_nr_hugepages)) { + perror("Failed to write to /proc/sys/vm/nr_hugepages\n"); + goto close_fd; + } + + close(fd); + return 0; + + close_fd: + close(fd); + printf("Not OK. Compaction test failed."); + return -1; +} + + +int main(int argc, char **argv) +{ + struct rlimit lim; + struct map_list *list, *entry; + size_t page_size, i; + void *map = NULL; + unsigned long mem_free = 0; + unsigned long hugepage_size = 0; + unsigned long mem_fragmentable = 0; + + if (prereq() != 0) { + printf("Either the sysctl compact_unevictable_allowed is not\n" + "set to 1 or couldn't read the proc file.\n" + "Skipping the test\n"); + return 0; + } + + lim.rlim_cur = RLIM_INFINITY; + lim.rlim_max = RLIM_INFINITY; + if (setrlimit(RLIMIT_MEMLOCK, &lim)) { + perror("Failed to set rlimit:\n"); + return -1; + } + + page_size = getpagesize(); + + list = NULL; + + if (read_memory_info(&mem_free, &hugepage_size) != 0) { + printf("ERROR: Cannot read meminfo\n"); + return -1; + } + + mem_fragmentable = mem_free * 0.8 / 1024; + + while (mem_fragmentable > 0) { + map = mmap(NULL, MAP_SIZE, PROT_READ | PROT_WRITE, + MAP_ANONYMOUS | MAP_PRIVATE | MAP_LOCKED, -1, 0); + if (map == MAP_FAILED) + break; + + entry = malloc(sizeof(struct map_list)); + if (!entry) { + munmap(map, MAP_SIZE); + break; + } + entry->map = map; + entry->next = list; + list = entry; + + /* Write something (in this case the address of the map) to + * ensure that KSM can't merge the mapped pages + */ + for (i = 0; i < MAP_SIZE; i += page_size) + *(unsigned long *)(map + i) = (unsigned long)map + i; + + mem_fragmentable--; + } + + for (entry = list; entry != NULL; entry = entry->next) { + munmap(entry->map, MAP_SIZE); + if (!entry->next) + break; + entry = entry->next; + } + + if (check_compaction(mem_free, hugepage_size) == 0) + return 0; + + return -1; +} diff --git a/tools/testing/selftests/vm/hugetlbfstest.c b/tools/testing/selftests/vm/hugetlbfstest.c index ea40ff8c2391..02e1072ec187 100644 --- a/tools/testing/selftests/vm/hugetlbfstest.c +++ b/tools/testing/selftests/vm/hugetlbfstest.c @@ -34,6 +34,7 @@ static void do_mmap(int fd, int extra_flags, int unmap) int *p; int flags = MAP_PRIVATE | MAP_POPULATE | extra_flags; u64 before, after; + int ret; before = read_rss(); p = mmap(NULL, length, PROT_READ | PROT_WRITE, flags, fd, 0); @@ -44,7 +45,8 @@ static void do_mmap(int fd, int extra_flags, int unmap) !"rss didn't grow as expected"); if (!unmap) return; - munmap(p, length); + ret = munmap(p, length); + assert(!ret || !"munmap returned an unexpected error"); after = read_rss(); assert(llabs(after - before) < 0x40000 || !"rss didn't shrink as expected"); diff --git a/tools/testing/selftests/vm/map_hugetlb.c b/tools/testing/selftests/vm/map_hugetlb.c index ac56639dd4a9..addcd6fc1ecc 100644 --- a/tools/testing/selftests/vm/map_hugetlb.c +++ b/tools/testing/selftests/vm/map_hugetlb.c @@ -73,7 +73,11 @@ int main(void) write_bytes(addr); ret = read_bytes(addr); - munmap(addr, LENGTH); + /* munmap() length of MAP_HUGETLB memory must be hugepage aligned */ + if (munmap(addr, LENGTH)) { + perror("munmap"); + exit(1); + } return ret; } diff --git a/tools/testing/selftests/vm/run_vmtests b/tools/testing/selftests/vm/run_vmtests index c87b6812300d..49ece11ff7fd 100644..100755 --- a/tools/testing/selftests/vm/run_vmtests +++ b/tools/testing/selftests/vm/run_vmtests @@ -90,4 +90,16 @@ fi umount $mnt rm -rf $mnt echo $nr_hugepgs > /proc/sys/vm/nr_hugepages + +echo "-----------------------" +echo "running compaction_test" +echo "-----------------------" +./compaction_test +if [ $? -ne 0 ]; then + echo "[FAIL]" + exitcode=1 +else + echo "[PASS]" +fi + exit $exitcode diff --git a/tools/testing/selftests/x86/.gitignore b/tools/testing/selftests/x86/.gitignore new file mode 100644 index 000000000000..15034fef9698 --- /dev/null +++ b/tools/testing/selftests/x86/.gitignore @@ -0,0 +1,2 @@ +*_32 +*_64 diff --git a/tools/testing/selftests/x86/Makefile b/tools/testing/selftests/x86/Makefile new file mode 100644 index 000000000000..caa60d56d7d1 --- /dev/null +++ b/tools/testing/selftests/x86/Makefile @@ -0,0 +1,62 @@ +all: + +include ../lib.mk + +.PHONY: all all_32 all_64 warn_32bit_failure clean + +TARGETS_C_BOTHBITS := sigreturn single_step_syscall sysret_ss_attrs +TARGETS_C_32BIT_ONLY := entry_from_vm86 + +TARGETS_C_32BIT_ALL := $(TARGETS_C_BOTHBITS) $(TARGETS_C_32BIT_ONLY) +BINARIES_32 := $(TARGETS_C_32BIT_ALL:%=%_32) +BINARIES_64 := $(TARGETS_C_BOTHBITS:%=%_64) + +CFLAGS := -O2 -g -std=gnu99 -pthread -Wall + +UNAME_M := $(shell uname -m) +CAN_BUILD_I386 := $(shell ./check_cc.sh $(CC) trivial_32bit_program.c -m32) +CAN_BUILD_X86_64 := $(shell ./check_cc.sh $(CC) trivial_64bit_program.c) + +ifeq ($(CAN_BUILD_I386),1) +all: all_32 +TEST_PROGS += $(BINARIES_32) +endif + +ifeq ($(CAN_BUILD_X86_64),1) +all: all_64 +TEST_PROGS += $(BINARIES_64) +endif + +all_32: $(BINARIES_32) + +all_64: $(BINARIES_64) + +clean: + $(RM) $(BINARIES_32) $(BINARIES_64) + +$(TARGETS_C_32BIT_ALL:%=%_32): %_32: %.c + $(CC) -m32 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +$(TARGETS_C_BOTHBITS:%=%_64): %_64: %.c + $(CC) -m64 -o $@ $(CFLAGS) $(EXTRA_CFLAGS) $^ -lrt -ldl + +# x86_64 users should be encouraged to install 32-bit libraries +ifeq ($(CAN_BUILD_I386)$(CAN_BUILD_X86_64),01) +all: warn_32bit_failure + +warn_32bit_failure: + @echo "Warning: you seem to have a broken 32-bit build" 2>&1; \ + echo "environment. This will reduce test coverage of 64-bit" 2>&1; \ + echo "kernels. If you are using a Debian-like distribution," 2>&1; \ + echo "try:"; 2>&1; \ + echo ""; \ + echo " apt-get install gcc-multilib libc6-i386 libc6-dev-i386"; \ + echo ""; \ + echo "If you are using a Fedora-like distribution, try:"; \ + echo ""; \ + echo " yum install glibc-devel.*i686"; \ + exit 0; +endif + +# Some tests have additional dependencies. +sysret_ss_attrs_64: thunks.S diff --git a/tools/testing/selftests/x86/check_cc.sh b/tools/testing/selftests/x86/check_cc.sh new file mode 100755 index 000000000000..172d3293fb7b --- /dev/null +++ b/tools/testing/selftests/x86/check_cc.sh @@ -0,0 +1,16 @@ +#!/bin/sh +# check_cc.sh - Helper to test userspace compilation support +# Copyright (c) 2015 Andrew Lutomirski +# GPL v2 + +CC="$1" +TESTPROG="$2" +shift 2 + +if "$CC" -o /dev/null "$TESTPROG" -O0 "$@" 2>/dev/null; then + echo 1 +else + echo 0 +fi + +exit 0 diff --git a/tools/testing/selftests/x86/entry_from_vm86.c b/tools/testing/selftests/x86/entry_from_vm86.c new file mode 100644 index 000000000000..5c38a187677b --- /dev/null +++ b/tools/testing/selftests/x86/entry_from_vm86.c @@ -0,0 +1,114 @@ +/* + * entry_from_vm86.c - tests kernel entries from vm86 mode + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This exercises a few paths that need to special-case vm86 mode. + * + * GPL v2. + */ + +#define _GNU_SOURCE + +#include <assert.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <errno.h> +#include <sys/vm86.h> + +static unsigned long load_addr = 0x10000; +static int nerrs = 0; + +asm ( + ".pushsection .rodata\n\t" + ".type vmcode_bound, @object\n\t" + "vmcode:\n\t" + "vmcode_bound:\n\t" + ".code16\n\t" + "bound %ax, (2048)\n\t" + "int3\n\t" + "vmcode_sysenter:\n\t" + "sysenter\n\t" + ".size vmcode, . - vmcode\n\t" + "end_vmcode:\n\t" + ".code32\n\t" + ".popsection" + ); + +extern unsigned char vmcode[], end_vmcode[]; +extern unsigned char vmcode_bound[], vmcode_sysenter[]; + +static void do_test(struct vm86plus_struct *v86, unsigned long eip, + const char *text) +{ + long ret; + + printf("[RUN]\t%s from vm86 mode\n", text); + v86->regs.eip = eip; + ret = vm86(VM86_ENTER, v86); + + if (ret == -1 && errno == ENOSYS) { + printf("[SKIP]\tvm86 not supported\n"); + return; + } + + if (VM86_TYPE(ret) == VM86_INTx) { + char trapname[32]; + int trapno = VM86_ARG(ret); + if (trapno == 13) + strcpy(trapname, "GP"); + else if (trapno == 5) + strcpy(trapname, "BR"); + else if (trapno == 14) + strcpy(trapname, "PF"); + else + sprintf(trapname, "%d", trapno); + + printf("[OK]\tExited vm86 mode due to #%s\n", trapname); + } else if (VM86_TYPE(ret) == VM86_UNKNOWN) { + printf("[OK]\tExited vm86 mode due to unhandled GP fault\n"); + } else { + printf("[OK]\tExited vm86 mode due to type %ld, arg %ld\n", + VM86_TYPE(ret), VM86_ARG(ret)); + } +} + +int main(void) +{ + struct vm86plus_struct v86; + unsigned char *addr = mmap((void *)load_addr, 4096, + PROT_READ | PROT_WRITE | PROT_EXEC, + MAP_ANONYMOUS | MAP_PRIVATE, -1,0); + if (addr != (unsigned char *)load_addr) + err(1, "mmap"); + + memcpy(addr, vmcode, end_vmcode - vmcode); + addr[2048] = 2; + addr[2050] = 3; + + memset(&v86, 0, sizeof(v86)); + + v86.regs.cs = load_addr / 16; + v86.regs.ss = load_addr / 16; + v86.regs.ds = load_addr / 16; + v86.regs.es = load_addr / 16; + + assert((v86.regs.cs & 3) == 0); /* Looks like RPL = 0 */ + + /* #BR -- should deliver SIG??? */ + do_test(&v86, vmcode_bound - vmcode, "#BR"); + + /* SYSENTER -- should cause #GP or #UD depending on CPU */ + do_test(&v86, vmcode_sysenter - vmcode, "SYSENTER"); + + return (nerrs == 0 ? 0 : 1); +} diff --git a/tools/testing/selftests/x86/sigreturn.c b/tools/testing/selftests/x86/sigreturn.c new file mode 100644 index 000000000000..b5aa1bab7416 --- /dev/null +++ b/tools/testing/selftests/x86/sigreturn.c @@ -0,0 +1,684 @@ +/* + * sigreturn.c - tests for x86 sigreturn(2) and exit-to-userspace + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a series of tests that exercises the sigreturn(2) syscall and + * the IRET / SYSRET paths in the kernel. + * + * For now, this focuses on the effects of unusual CS and SS values, + * and it has a bunch of tests to make sure that ESP/RSP is restored + * properly. + * + * The basic idea behind these tests is to raise(SIGUSR1) to create a + * sigcontext frame, plug in the values to be tested, and then return, + * which implicitly invokes sigreturn(2) and programs the user context + * as desired. + * + * For tests for which we expect sigreturn and the subsequent return to + * user mode to succeed, we return to a short trampoline that generates + * SIGTRAP so that the meat of the tests can be ordinary C code in a + * SIGTRAP handler. + * + * The inner workings of each test is documented below. + * + * Do not run on outdated, unpatched kernels at risk of nasty crashes. + */ + +#define _GNU_SOURCE + +#include <sys/time.h> +#include <time.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <asm/ldt.h> +#include <err.h> +#include <setjmp.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +/* + * In principle, this test can run on Linux emulation layers (e.g. + * Illumos "LX branded zones"). Solaris-based kernels reserve LDT + * entries 0-5 for their own internal purposes, so start our LDT + * allocations above that reservation. (The tests don't pass on LX + * branded zones, but at least this lets them run.) + */ +#define LDT_OFFSET 6 + +/* An aligned stack accessible through some of our segments. */ +static unsigned char stack16[65536] __attribute__((aligned(4096))); + +/* + * An aligned int3 instruction used as a trampoline. Some of the tests + * want to fish out their ss values, so this trampoline copies ss to eax + * before the int3. + */ +asm (".pushsection .text\n\t" + ".type int3, @function\n\t" + ".align 4096\n\t" + "int3:\n\t" + "mov %ss,%eax\n\t" + "int3\n\t" + ".size int3, . - int3\n\t" + ".align 4096, 0xcc\n\t" + ".popsection"); +extern char int3[4096]; + +/* + * At startup, we prepapre: + * + * - ldt_nonexistent_sel: An LDT entry that doesn't exist (all-zero + * descriptor or out of bounds). + * - code16_sel: A 16-bit LDT code segment pointing to int3. + * - data16_sel: A 16-bit LDT data segment pointing to stack16. + * - npcode32_sel: A 32-bit not-present LDT code segment pointing to int3. + * - npdata32_sel: A 32-bit not-present LDT data segment pointing to stack16. + * - gdt_data16_idx: A 16-bit GDT data segment pointing to stack16. + * - gdt_npdata32_idx: A 32-bit not-present GDT data segment pointing to + * stack16. + * + * For no particularly good reason, xyz_sel is a selector value with the + * RPL and LDT bits filled in, whereas xyz_idx is just an index into the + * descriptor table. These variables will be zero if their respective + * segments could not be allocated. + */ +static unsigned short ldt_nonexistent_sel; +static unsigned short code16_sel, data16_sel, npcode32_sel, npdata32_sel; + +static unsigned short gdt_data16_idx, gdt_npdata32_idx; + +static unsigned short GDT3(int idx) +{ + return (idx << 3) | 3; +} + +static unsigned short LDT3(int idx) +{ + return (idx << 3) | 7; +} + +/* Our sigaltstack scratch space. */ +static char altstack_data[SIGSTKSZ]; + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void clearhandler(int sig) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_handler = SIG_DFL; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static void add_ldt(const struct user_desc *desc, unsigned short *var, + const char *name) +{ + if (syscall(SYS_modify_ldt, 1, desc, sizeof(*desc)) == 0) { + *var = LDT3(desc->entry_number); + } else { + printf("[NOTE]\tFailed to create %s segment\n", name); + *var = 0; + } +} + +static void setup_ldt(void) +{ + if ((unsigned long)stack16 > (1ULL << 32) - sizeof(stack16)) + errx(1, "stack16 is too high\n"); + if ((unsigned long)int3 > (1ULL << 32) - sizeof(int3)) + errx(1, "int3 is too high\n"); + + ldt_nonexistent_sel = LDT3(LDT_OFFSET + 2); + + const struct user_desc code16_desc = { + .entry_number = LDT_OFFSET + 0, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 0, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&code16_desc, &code16_sel, "code16"); + + const struct user_desc data16_desc = { + .entry_number = LDT_OFFSET + 1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + add_ldt(&data16_desc, &data16_sel, "data16"); + + const struct user_desc npcode32_desc = { + .entry_number = LDT_OFFSET + 3, + .base_addr = (unsigned long)int3, + .limit = 4095, + .seg_32bit = 1, + .contents = 2, /* Code, not conforming */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npcode32_desc, &npcode32_sel, "npcode32"); + + const struct user_desc npdata32_desc = { + .entry_number = LDT_OFFSET + 4, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + add_ldt(&npdata32_desc, &npdata32_sel, "npdata32"); + + struct user_desc gdt_data16_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 0, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 0, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_data16_desc) == 0) { + /* + * This probably indicates vulnerability to CVE-2014-8133. + * Merely getting here isn't definitive, though, and we'll + * diagnose the problem for real later on. + */ + printf("[WARN]\tset_thread_area allocated data16 at index %d\n", + gdt_data16_desc.entry_number); + gdt_data16_idx = gdt_data16_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } + + struct user_desc gdt_npdata32_desc = { + .entry_number = -1, + .base_addr = (unsigned long)stack16, + .limit = 0xffff, + .seg_32bit = 1, + .contents = 0, /* Data, grow-up */ + .read_exec_only = 0, + .limit_in_pages = 0, + .seg_not_present = 1, + .useable = 0 + }; + + if (syscall(SYS_set_thread_area, &gdt_npdata32_desc) == 0) { + /* + * As a hardening measure, newer kernels don't allow this. + */ + printf("[WARN]\tset_thread_area allocated npdata32 at index %d\n", + gdt_npdata32_desc.entry_number); + gdt_npdata32_idx = gdt_npdata32_desc.entry_number; + } else { + printf("[OK]\tset_thread_area refused 16-bit data\n"); + } +} + +/* State used by our signal handlers. */ +static gregset_t initial_regs, requested_regs, resulting_regs; + +/* Instructions for the SIGUSR1 handler. */ +static volatile unsigned short sig_cs, sig_ss; +static volatile sig_atomic_t sig_trapped, sig_err, sig_trapno; + +/* Abstractions for some 32-bit vs 64-bit differences. */ +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define REG_SP REG_RSP +# define REG_AX REG_RAX + +struct selectors { + unsigned short cs, gs, fs, ss; +}; + +static unsigned short *ssptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->ss; +} + +static unsigned short *csptr(ucontext_t *ctx) +{ + struct selectors *sels = (void *)&ctx->uc_mcontext.gregs[REG_CSGSFS]; + return &sels->cs; +} +#else +# define REG_IP REG_EIP +# define REG_SP REG_ESP +# define REG_AX REG_EAX + +static greg_t *ssptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_SS]; +} + +static greg_t *csptr(ucontext_t *ctx) +{ + return &ctx->uc_mcontext.gregs[REG_CS]; +} +#endif + +/* Number of errors in the current test case. */ +static volatile sig_atomic_t nerrs; + +/* + * SIGUSR1 handler. Sets CS and SS as requested and points IP to the + * int3 trampoline. Sets SP to a large known value so that we can see + * whether the value round-trips back to user mode correctly. + */ +static void sigusr1(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + memcpy(&initial_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + + *csptr(ctx) = sig_cs; + *ssptr(ctx) = sig_ss; + + ctx->uc_mcontext.gregs[REG_IP] = + sig_cs == code16_sel ? 0 : (unsigned long)&int3; + ctx->uc_mcontext.gregs[REG_SP] = (unsigned long)0x8badf00d5aadc0deULL; + ctx->uc_mcontext.gregs[REG_AX] = 0; + + memcpy(&requested_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + requested_regs[REG_AX] = *ssptr(ctx); /* The asm code does this. */ + + return; +} + +/* + * Called after a successful sigreturn. Restores our state so that + * the original raise(SIGUSR1) returns. + */ +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + sig_err = ctx->uc_mcontext.gregs[REG_ERR]; + sig_trapno = ctx->uc_mcontext.gregs[REG_TRAPNO]; + + unsigned short ss; + asm ("mov %%ss,%0" : "=r" (ss)); + + greg_t asm_ss = ctx->uc_mcontext.gregs[REG_AX]; + if (asm_ss != sig_ss && sig == SIGTRAP) { + /* Sanity check failure. */ + printf("[FAIL]\tSIGTRAP: ss = %hx, frame ss = %hx, ax = %llx\n", + ss, *ssptr(ctx), (unsigned long long)asm_ss); + nerrs++; + } + + memcpy(&resulting_regs, &ctx->uc_mcontext.gregs, sizeof(gregset_t)); + memcpy(&ctx->uc_mcontext.gregs, &initial_regs, sizeof(gregset_t)); + + sig_trapped = sig; +} + +/* + * Checks a given selector for its code bitness or returns -1 if it's not + * a usable code segment selector. + */ +int cs_bitness(unsigned short cs) +{ + uint32_t valid = 0, ar; + asm ("lar %[cs], %[ar]\n\t" + "jnz 1f\n\t" + "mov $1, %[valid]\n\t" + "1:" + : [ar] "=r" (ar), [valid] "+rm" (valid) + : [cs] "r" (cs)); + + if (!valid) + return -1; + + bool db = (ar & (1 << 22)); + bool l = (ar & (1 << 21)); + + if (!(ar & (1<<11))) + return -1; /* Not code. */ + + if (l && !db) + return 64; + else if (!l && db) + return 32; + else if (!l && !db) + return 16; + else + return -1; /* Unknown bitness. */ +} + +/* Finds a usable code segment of the requested bitness. */ +int find_cs(int bitness) +{ + unsigned short my_cs; + + asm ("mov %%cs,%0" : "=r" (my_cs)); + + if (cs_bitness(my_cs) == bitness) + return my_cs; + if (cs_bitness(my_cs + (2 << 3)) == bitness) + return my_cs + (2 << 3); + if (my_cs > (2<<3) && cs_bitness(my_cs - (2 << 3)) == bitness) + return my_cs - (2 << 3); + if (cs_bitness(code16_sel) == bitness) + return code16_sel; + + printf("[WARN]\tCould not find %d-bit CS\n", bitness); + return -1; +} + +static int test_valid_sigreturn(int cs_bits, bool use_16bit_ss, int force_ss) +{ + int cs = find_cs(cs_bits); + if (cs == -1) { + printf("[SKIP]\tCode segment unavailable for %d-bit CS, %d-bit SS\n", + cs_bits, use_16bit_ss ? 16 : 32); + return 0; + } + + if (force_ss != -1) { + sig_ss = force_ss; + } else { + if (use_16bit_ss) { + if (!data16_sel) { + printf("[SKIP]\tData segment unavailable for %d-bit CS, 16-bit SS\n", + cs_bits); + return 0; + } + sig_ss = data16_sel; + } else { + asm volatile ("mov %%ss,%0" : "=r" (sig_ss)); + } + } + + sig_cs = cs; + + printf("[RUN]\tValid sigreturn: %d-bit CS (%hx), %d-bit SS (%hx%s)\n", + cs_bits, sig_cs, use_16bit_ss ? 16 : 32, sig_ss, + (sig_ss & 4) ? "" : ", GDT"); + + raise(SIGUSR1); + + nerrs = 0; + + /* + * Check that each register had an acceptable value when the + * int3 trampoline was invoked. + */ + for (int i = 0; i < NGREG; i++) { + greg_t req = requested_regs[i], res = resulting_regs[i]; + if (i == REG_TRAPNO || i == REG_IP) + continue; /* don't care */ + if (i == REG_SP) { + printf("\tSP: %llx -> %llx\n", (unsigned long long)req, + (unsigned long long)res); + + /* + * In many circumstances, the high 32 bits of rsp + * are zeroed. For example, we could be a real + * 32-bit program, or we could hit any of a number + * of poorly-documented IRET or segmented ESP + * oddities. If this happens, it's okay. + */ + if (res == (req & 0xFFFFFFFF)) + continue; /* OK; not expected to work */ + } + + bool ignore_reg = false; +#if __i386__ + if (i == REG_UESP) + ignore_reg = true; +#else + if (i == REG_CSGSFS) { + struct selectors *req_sels = + (void *)&requested_regs[REG_CSGSFS]; + struct selectors *res_sels = + (void *)&resulting_regs[REG_CSGSFS]; + if (req_sels->cs != res_sels->cs) { + printf("[FAIL]\tCS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->cs, res_sels->cs); + nerrs++; + } + + if (req_sels->ss != res_sels->ss) { + printf("[FAIL]\tSS mismatch: requested 0x%hx; got 0x%hx\n", + req_sels->ss, res_sels->ss); + nerrs++; + } + + continue; + } +#endif + + /* Sanity check on the kernel */ + if (i == REG_AX && requested_regs[i] != resulting_regs[i]) { + printf("[FAIL]\tAX (saved SP) mismatch: requested 0x%llx; got 0x%llx\n", + (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + continue; + } + + if (requested_regs[i] != resulting_regs[i] && !ignore_reg) { + /* + * SP is particularly interesting here. The + * usual cause of failures is that we hit the + * nasty IRET case of returning to a 16-bit SS, + * in which case bits 16:31 of the *kernel* + * stack pointer persist in ESP. + */ + printf("[FAIL]\tReg %d mismatch: requested 0x%llx; got 0x%llx\n", + i, (unsigned long long)requested_regs[i], + (unsigned long long)resulting_regs[i]); + nerrs++; + } + } + + if (nerrs == 0) + printf("[OK]\tall registers okay\n"); + + return nerrs; +} + +static int test_bad_iret(int cs_bits, unsigned short ss, int force_cs) +{ + int cs = force_cs == -1 ? find_cs(cs_bits) : force_cs; + if (cs == -1) + return 0; + + sig_cs = cs; + sig_ss = ss; + + printf("[RUN]\t%d-bit CS (%hx), bogus SS (%hx)\n", + cs_bits, sig_cs, sig_ss); + + sig_trapped = 0; + raise(SIGUSR1); + if (sig_trapped) { + char errdesc[32] = ""; + if (sig_err) { + const char *src = (sig_err & 1) ? " EXT" : ""; + const char *table; + if ((sig_err & 0x6) == 0x0) + table = "GDT"; + else if ((sig_err & 0x6) == 0x4) + table = "LDT"; + else if ((sig_err & 0x6) == 0x2) + table = "IDT"; + else + table = "???"; + + sprintf(errdesc, "%s%s index %d, ", + table, src, sig_err >> 3); + } + + char trapname[32]; + if (sig_trapno == 13) + strcpy(trapname, "GP"); + else if (sig_trapno == 11) + strcpy(trapname, "NP"); + else if (sig_trapno == 12) + strcpy(trapname, "SS"); + else if (sig_trapno == 32) + strcpy(trapname, "IRET"); /* X86_TRAP_IRET */ + else + sprintf(trapname, "%d", sig_trapno); + + printf("[OK]\tGot #%s(0x%lx) (i.e. %s%s)\n", + trapname, (unsigned long)sig_err, + errdesc, strsignal(sig_trapped)); + return 0; + } else { + printf("[FAIL]\tDid not get SIGSEGV\n"); + return 1; + } +} + +int main() +{ + int total_nerrs = 0; + unsigned short my_cs, my_ss; + + asm volatile ("mov %%cs,%0" : "=r" (my_cs)); + asm volatile ("mov %%ss,%0" : "=r" (my_ss)); + setup_ldt(); + + stack_t stack = { + .ss_sp = altstack_data, + .ss_size = SIGSTKSZ, + }; + if (sigaltstack(&stack, NULL) != 0) + err(1, "sigaltstack"); + + sethandler(SIGUSR1, sigusr1, 0); + sethandler(SIGTRAP, sigtrap, SA_ONSTACK); + + /* Easy cases: return to a 32-bit SS in each possible CS bitness. */ + total_nerrs += test_valid_sigreturn(64, false, -1); + total_nerrs += test_valid_sigreturn(32, false, -1); + total_nerrs += test_valid_sigreturn(16, false, -1); + + /* + * Test easy espfix cases: return to a 16-bit LDT SS in each possible + * CS bitness. NB: with a long mode CS, the SS bitness is irrelevant. + * + * This catches the original missing-espfix-on-64-bit-kernels issue + * as well as CVE-2014-8134. + */ + total_nerrs += test_valid_sigreturn(64, true, -1); + total_nerrs += test_valid_sigreturn(32, true, -1); + total_nerrs += test_valid_sigreturn(16, true, -1); + + if (gdt_data16_idx) { + /* + * For performance reasons, Linux skips espfix if SS points + * to the GDT. If we were able to allocate a 16-bit SS in + * the GDT, see if it leaks parts of the kernel stack pointer. + * + * This tests for CVE-2014-8133. + */ + total_nerrs += test_valid_sigreturn(64, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(32, true, + GDT3(gdt_data16_idx)); + total_nerrs += test_valid_sigreturn(16, true, + GDT3(gdt_data16_idx)); + } + + /* + * We're done testing valid sigreturn cases. Now we test states + * for which sigreturn itself will succeed but the subsequent + * entry to user mode will fail. + * + * Depending on the failure mode and the kernel bitness, these + * entry failures can generate SIGSEGV, SIGBUS, or SIGILL. + */ + clearhandler(SIGTRAP); + sethandler(SIGSEGV, sigtrap, SA_ONSTACK); + sethandler(SIGBUS, sigtrap, SA_ONSTACK); + sethandler(SIGILL, sigtrap, SA_ONSTACK); /* 32-bit kernels do this */ + + /* Easy failures: invalid SS, resulting in #GP(0) */ + test_bad_iret(64, ldt_nonexistent_sel, -1); + test_bad_iret(32, ldt_nonexistent_sel, -1); + test_bad_iret(16, ldt_nonexistent_sel, -1); + + /* These fail because SS isn't a data segment, resulting in #GP(SS) */ + test_bad_iret(64, my_cs, -1); + test_bad_iret(32, my_cs, -1); + test_bad_iret(16, my_cs, -1); + + /* Try to return to a not-present code segment, triggering #NP(SS). */ + test_bad_iret(32, my_ss, npcode32_sel); + + /* + * Try to return to a not-present but otherwise valid data segment. + * This will cause IRET to fail with #SS on the espfix stack. This + * exercises CVE-2014-9322. + * + * Note that, if espfix is enabled, 64-bit Linux will lose track + * of the actual cause of failure and report #GP(0) instead. + * This would be very difficult for Linux to avoid, because + * espfix64 causes IRET failures to be promoted to #DF, so the + * original exception frame is never pushed onto the stack. + */ + test_bad_iret(32, npdata32_sel, -1); + + /* + * Try to return to a not-present but otherwise valid data + * segment without invoking espfix. Newer kernels don't allow + * this to happen in the first place. On older kernels, though, + * this can trigger CVE-2014-9322. + */ + if (gdt_npdata32_idx) + test_bad_iret(32, GDT3(gdt_npdata32_idx), -1); + + return total_nerrs ? 1 : 0; +} diff --git a/tools/testing/selftests/x86/single_step_syscall.c b/tools/testing/selftests/x86/single_step_syscall.c new file mode 100644 index 000000000000..50c26358e8b7 --- /dev/null +++ b/tools/testing/selftests/x86/single_step_syscall.c @@ -0,0 +1,181 @@ +/* + * single_step_syscall.c - single-steps various x86 syscalls + * Copyright (c) 2014-2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * This is a very simple series of tests that makes system calls with + * the TF flag set. This exercises some nasty kernel code in the + * SYSENTER case: SYSENTER does not clear TF, so SYSENTER with TF set + * immediately issues #DB from CPL 0. This requires special handling in + * the kernel. + */ + +#define _GNU_SOURCE + +#include <sys/time.h> +#include <time.h> +#include <stdlib.h> +#include <sys/syscall.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <inttypes.h> +#include <sys/mman.h> +#include <sys/signal.h> +#include <sys/ucontext.h> +#include <asm/ldt.h> +#include <err.h> +#include <setjmp.h> +#include <stddef.h> +#include <stdbool.h> +#include <sys/ptrace.h> +#include <sys/user.h> + +static void sethandler(int sig, void (*handler)(int, siginfo_t *, void *), + int flags) +{ + struct sigaction sa; + memset(&sa, 0, sizeof(sa)); + sa.sa_sigaction = handler; + sa.sa_flags = SA_SIGINFO | flags; + sigemptyset(&sa.sa_mask); + if (sigaction(sig, &sa, 0)) + err(1, "sigaction"); +} + +static volatile sig_atomic_t sig_traps; + +#ifdef __x86_64__ +# define REG_IP REG_RIP +# define WIDTH "q" +#else +# define REG_IP REG_EIP +# define WIDTH "l" +#endif + +static unsigned long get_eflags(void) +{ + unsigned long eflags; + asm volatile ("pushf" WIDTH "\n\tpop" WIDTH " %0" : "=rm" (eflags)); + return eflags; +} + +static void set_eflags(unsigned long eflags) +{ + asm volatile ("push" WIDTH " %0\n\tpopf" WIDTH + : : "rm" (eflags) : "flags"); +} + +#define X86_EFLAGS_TF (1UL << 8) + +static void sigtrap(int sig, siginfo_t *info, void *ctx_void) +{ + ucontext_t *ctx = (ucontext_t*)ctx_void; + + if (get_eflags() & X86_EFLAGS_TF) { + set_eflags(get_eflags() & ~X86_EFLAGS_TF); + printf("[WARN]\tSIGTRAP handler had TF set\n"); + _exit(1); + } + + sig_traps++; + + if (sig_traps == 10000 || sig_traps == 10001) { + printf("[WARN]\tHit %d SIGTRAPs with si_addr 0x%lx, ip 0x%lx\n", + (int)sig_traps, + (unsigned long)info->si_addr, + (unsigned long)ctx->uc_mcontext.gregs[REG_IP]); + } +} + +static void check_result(void) +{ + unsigned long new_eflags = get_eflags(); + set_eflags(new_eflags & ~X86_EFLAGS_TF); + + if (!sig_traps) { + printf("[FAIL]\tNo SIGTRAP\n"); + exit(1); + } + + if (!(new_eflags & X86_EFLAGS_TF)) { + printf("[FAIL]\tTF was cleared\n"); + exit(1); + } + + printf("[OK]\tSurvived with TF set and %d traps\n", (int)sig_traps); + sig_traps = 0; +} + +int main() +{ + int tmp; + + sethandler(SIGTRAP, sigtrap, 0); + + printf("[RUN]\tSet TF and check nop\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("nop"); + check_result(); + +#ifdef __x86_64__ + printf("[RUN]\tSet TF and check syscall-less opportunistic sysret\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + extern unsigned char post_nop[]; + asm volatile ("pushf" WIDTH "\n\t" + "pop" WIDTH " %%r11\n\t" + "nop\n\t" + "post_nop:" + : : "c" (post_nop) : "r11"); + check_result(); +#endif + + printf("[RUN]\tSet TF and check int80\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + asm volatile ("int $0x80" : "=a" (tmp) : "a" (SYS_getpid)); + check_result(); + + /* + * This test is particularly interesting if fast syscalls use + * SYSENTER: it triggers a nasty design flaw in SYSENTER. + * Specifically, SYSENTER does not clear TF, so either SYSENTER + * or the next instruction traps at CPL0. (Of course, Intel + * mostly forgot to document exactly what happens here.) So we + * get a CPL0 fault with usergs (on 64-bit kernels) and possibly + * no stack. The only sane way the kernel can possibly handle + * it is to clear TF on return from the #DB handler, but this + * happens way too early to set TF in the saved pt_regs, so the + * kernel has to do something clever to avoid losing track of + * the TF bit. + * + * Needless to say, we've had bugs in this area. + */ + syscall(SYS_getpid); /* Force symbol binding without TF set. */ + printf("[RUN]\tSet TF and check a fast syscall\n"); + set_eflags(get_eflags() | X86_EFLAGS_TF); + syscall(SYS_getpid); + check_result(); + + /* Now make sure that another fast syscall doesn't set TF again. */ + printf("[RUN]\tFast syscall with TF cleared\n"); + fflush(stdout); /* Force a syscall */ + if (get_eflags() & X86_EFLAGS_TF) { + printf("[FAIL]\tTF is now set\n"); + exit(1); + } + if (sig_traps) { + printf("[FAIL]\tGot SIGTRAP\n"); + exit(1); + } + printf("[OK]\tNothing unexpected happened\n"); + + return 0; +} diff --git a/tools/testing/selftests/x86/sysret_ss_attrs.c b/tools/testing/selftests/x86/sysret_ss_attrs.c new file mode 100644 index 000000000000..ce42d5a64009 --- /dev/null +++ b/tools/testing/selftests/x86/sysret_ss_attrs.c @@ -0,0 +1,112 @@ +/* + * sysret_ss_attrs.c - test that syscalls return valid hidden SS attributes + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * On AMD CPUs, SYSRET can return with a valid SS descriptor with with + * the hidden attributes set to an unusable state. Make sure the kernel + * doesn't let this happen. + */ + +#define _GNU_SOURCE + +#include <stdlib.h> +#include <unistd.h> +#include <stdio.h> +#include <string.h> +#include <sys/mman.h> +#include <err.h> +#include <stddef.h> +#include <stdbool.h> +#include <pthread.h> + +static void *threadproc(void *ctx) +{ + /* + * Do our best to cause sleeps on this CPU to exit the kernel and + * re-enter with SS = 0. + */ + while (true) + ; + + return NULL; +} + +#ifdef __x86_64__ +extern unsigned long call32_from_64(void *stack, void (*function)(void)); + +asm (".pushsection .text\n\t" + ".code32\n\t" + "test_ss:\n\t" + "pushl $0\n\t" + "popl %eax\n\t" + "ret\n\t" + ".code64"); +extern void test_ss(void); +#endif + +int main() +{ + /* + * Start a busy-looping thread on the same CPU we're on. + * For simplicity, just stick everything to CPU 0. This will + * fail in some containers, but that's probably okay. + */ + cpu_set_t cpuset; + CPU_ZERO(&cpuset); + CPU_SET(0, &cpuset); + if (sched_setaffinity(0, sizeof(cpuset), &cpuset) != 0) + printf("[WARN]\tsched_setaffinity failed\n"); + + pthread_t thread; + if (pthread_create(&thread, 0, threadproc, 0) != 0) + err(1, "pthread_create"); + +#ifdef __x86_64__ + unsigned char *stack32 = mmap(NULL, 4096, PROT_READ | PROT_WRITE, + MAP_32BIT | MAP_ANONYMOUS | MAP_PRIVATE, + -1, 0); + if (stack32 == MAP_FAILED) + err(1, "mmap"); +#endif + + printf("[RUN]\tSyscalls followed by SS validation\n"); + + for (int i = 0; i < 1000; i++) { + /* + * Go to sleep and return using sysret (if we're 64-bit + * or we're 32-bit on AMD on a 64-bit kernel). On AMD CPUs, + * SYSRET doesn't fix up the cached SS descriptor, so the + * kernel needs some kind of workaround to make sure that we + * end the system call with a valid stack segment. This + * can be a confusing failure because the SS *selector* + * is the same regardless. + */ + usleep(2); + +#ifdef __x86_64__ + /* + * On 32-bit, just doing a syscall through glibc is enough + * to cause a crash if our cached SS descriptor is invalid. + * On 64-bit, it's not, so try extra hard. + */ + call32_from_64(stack32 + 4088, test_ss); +#endif + } + + printf("[OK]\tWe survived\n"); + +#ifdef __x86_64__ + munmap(stack32, 4096); +#endif + + return 0; +} diff --git a/tools/testing/selftests/x86/thunks.S b/tools/testing/selftests/x86/thunks.S new file mode 100644 index 000000000000..ce8a995bbb17 --- /dev/null +++ b/tools/testing/selftests/x86/thunks.S @@ -0,0 +1,67 @@ +/* + * thunks.S - assembly helpers for mixed-bitness code + * Copyright (c) 2015 Andrew Lutomirski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * These are little helpers that make it easier to switch bitness on + * the fly. + */ + + .text + + .global call32_from_64 + .type call32_from_64, @function +call32_from_64: + // rdi: stack to use + // esi: function to call + + // Save registers + pushq %rbx + pushq %rbp + pushq %r12 + pushq %r13 + pushq %r14 + pushq %r15 + pushfq + + // Switch stacks + mov %rsp,(%rdi) + mov %rdi,%rsp + + // Switch to compatibility mode + pushq $0x23 /* USER32_CS */ + pushq $1f + lretq + +1: + .code32 + // Call the function + call *%esi + // Switch back to long mode + jmp $0x33,$1f + .code64 + +1: + // Restore the stack + mov (%rsp),%rsp + + // Restore registers + popfq + popq %r15 + popq %r14 + popq %r13 + popq %r12 + popq %rbp + popq %rbx + + ret + +.size call32_from_64, .-call32_from_64 diff --git a/tools/testing/selftests/x86/trivial_32bit_program.c b/tools/testing/selftests/x86/trivial_32bit_program.c new file mode 100644 index 000000000000..fabdf0f51621 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_32bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 32-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __i386__ +# error wrong architecture +#endif + +#include <stdio.h> + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/tools/testing/selftests/x86/trivial_64bit_program.c b/tools/testing/selftests/x86/trivial_64bit_program.c new file mode 100644 index 000000000000..05c6a41b3671 --- /dev/null +++ b/tools/testing/selftests/x86/trivial_64bit_program.c @@ -0,0 +1,18 @@ +/* + * Trivial program to check that we have a valid 64-bit build environment. + * Copyright (c) 2015 Andy Lutomirski + * GPL v2 + */ + +#ifndef __x86_64__ +# error wrong architecture +#endif + +#include <stdio.h> + +int main() +{ + printf("\n"); + + return 0; +} diff --git a/tools/thermal/tmon/Makefile b/tools/thermal/tmon/Makefile index 0788621c8d76..2e83dd3655a2 100644 --- a/tools/thermal/tmon/Makefile +++ b/tools/thermal/tmon/Makefile @@ -12,10 +12,6 @@ TARGET=tmon INSTALL_PROGRAM=install -m 755 -p DEL_FILE=rm -f -INSTALL_CONFIGFILE=install -m 644 -p -CONFIG_FILE= -CONFIG_PATH= - # Static builds might require -ltinfo, for instance ifneq ($(findstring -static, $(LDFLAGS)),) STATIC := --static @@ -38,13 +34,9 @@ valgrind: tmon install: - mkdir -p $(INSTALL_ROOT)/$(BINDIR) - $(INSTALL_PROGRAM) "$(TARGET)" "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" - - mkdir -p $(INSTALL_ROOT)/$(CONFIG_PATH) - - $(INSTALL_CONFIGFILE) "$(CONFIG_FILE)" "$(INSTALL_ROOT)/$(CONFIG_PATH)" uninstall: $(DEL_FILE) "$(INSTALL_ROOT)/$(BINDIR)/$(TARGET)" - $(CONFIG_FILE) "$(CONFIG_PATH)" - clean: find . -name "*.o" | xargs $(DEL_FILE) diff --git a/tools/vm/Makefile b/tools/vm/Makefile index ac884b65a072..93aadaf7ff63 100644 --- a/tools/vm/Makefile +++ b/tools/vm/Makefile @@ -3,7 +3,7 @@ TARGETS=page-types slabinfo page_owner_sort LIB_DIR = ../lib/api -LIBS = $(LIB_DIR)/libapikfs.a +LIBS = $(LIB_DIR)/libapi.a CC = $(CROSS_COMPILE)gcc CFLAGS = -Wall -Wextra -I../lib/ |